8th day of python challenges 111-117

2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions
--- a/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/init.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/init.py
@@ -0,0 +1,3 @@
+from .array import DecimalArray, DecimalDtype, make_data, to_decimal
+
+__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]
--- a/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/array.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/array.py
@@ -0,0 +1,189 @@
+import decimal
+import numbers
+import random
+import sys
+
+import numpy as np
+
+from pandas.core.dtypes.base import ExtensionDtype
+
+import pandas as pd
+from pandas.api.extensions import register_extension_dtype
+from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
+
+
+@register_extension_dtype
+class DecimalDtype(ExtensionDtype):
+    type = decimal.Decimal
+    name = "decimal"
+    na_value = decimal.Decimal("NaN")
+    _metadata = ("context",)
+
+    def __init__(self, context=None):
+        self.context = context or decimal.getcontext()
+
+    def __repr__(self):
+        return "DecimalDtype(context={})".format(self.context)
+
+    @classmethod
+    def construct_array_type(cls):
+        """Return the array type associated with this dtype
+
+        Returns
+        -------
+        type
+        """
+        return DecimalArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
+
+    @property
+    def _is_numeric(self):
+        return True
+
+
+class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
+    __array_priority__ = 1000
+
+    def __init__(self, values, dtype=None, copy=False, context=None):
+        for val in values:
+            if not isinstance(val, decimal.Decimal):
+                raise TypeError("All values must be of type " + str(decimal.Decimal))
+        values = np.asarray(values, dtype=object)
+
+        self._data = values
+        # Some aliases for common attribute names to ensure pandas supports
+        # these
+        self._items = self.data = self._data
+        # those aliases are currently not working due to assumptions
+        # in internal code (GH-20735)
+        # self._values = self.values = self.data
+        self._dtype = DecimalDtype(context)
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars)
+
+    @classmethod
+    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
+        return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values)
+
+    _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        #
+        if not all(
+            isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
+        ):
+            return NotImplemented
+
+        inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+
+        def reconstruct(x):
+            if isinstance(x, (decimal.Decimal, numbers.Number)):
+                return x
+            else:
+                return DecimalArray._from_sequence(x)
+
+        if isinstance(result, tuple):
+            return tuple(reconstruct(x) for x in result)
+        else:
+            return reconstruct(result)
+
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self._data[item]
+        else:
+            return type(self)(self._data[item])
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+
+        data = self._data
+        if allow_fill and fill_value is None:
+            fill_value = self.dtype.na_value
+
+        result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
+        return self._from_sequence(result)
+
+    def copy(self):
+        return type(self)(self._data.copy())
+
+    def astype(self, dtype, copy=True):
+        if isinstance(dtype, type(self.dtype)):
+            return type(self)(self._data, context=dtype.context)
+        return np.asarray(self, dtype=dtype)
+
+    def __setitem__(self, key, value):
+        if pd.api.types.is_list_like(value):
+            if pd.api.types.is_scalar(key):
+                raise ValueError("setting an array element with a sequence.")
+            value = [decimal.Decimal(v) for v in value]
+        else:
+            value = decimal.Decimal(value)
+        self._data[key] = value
+
+    def __len__(self):
+        return len(self._data)
+
+    @property
+    def nbytes(self):
+        n = len(self)
+        if n:
+            return n * sys.getsizeof(self[0])
+        return 0
+
+    def isna(self):
+        return np.array([x.is_nan() for x in self._data], dtype=bool)
+
+    @property
+    def _na_value(self):
+        return decimal.Decimal("NaN")
+
+    def _formatter(self, boxed=False):
+        if boxed:
+            return "Decimal: {0}".format
+        return repr
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([x._data for x in to_concat]))
+
+    def _reduce(self, name, skipna=True, **kwargs):
+
+        if skipna:
+            raise NotImplementedError("decimal does not support skipna=True")
+
+        try:
+            op = getattr(self.data, name)
+        except AttributeError:
+            raise NotImplementedError(
+                "decimal does not support the {} operation".format(name)
+            )
+        return op(axis=0)
+
+
+def to_decimal(values, context=None):
+    return DecimalArray([decimal.Decimal(x) for x in values], context=context)
+
+
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
+
+
+DecimalArray._add_arithmetic_ops()
+DecimalArray._add_comparison_ops()
--- a/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/test_decimal.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/extension/decimal/test_decimal.py
@@ -0,0 +1,439 @@
+import decimal
+import math
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas.tests.extension import base
+import pandas.util.testing as tm
+
+from .array import DecimalArray, DecimalDtype, make_data, to_decimal
+
+
+@pytest.fixture
+def dtype():
+    return DecimalDtype()
+
+
+@pytest.fixture
+def data():
+    return DecimalArray(make_data())
+
+
+@pytest.fixture
+def data_for_twos():
+    return DecimalArray([decimal.Decimal(2) for _ in range(100)])
+
+
+@pytest.fixture
+def data_missing():
+    return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
+
+
+@pytest.fixture
+def data_for_sorting():
+    return DecimalArray(
+        [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
+    )
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return DecimalArray(
+        [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
+    )
+
+
+@pytest.fixture
+def na_cmp():
+    return lambda x, y: x.is_nan() and y.is_nan()
+
+
+@pytest.fixture
+def na_value():
+    return decimal.Decimal("NaN")
+
+
+@pytest.fixture
+def data_for_grouping():
+    b = decimal.Decimal("1.0")
+    a = decimal.Decimal("0.0")
+    c = decimal.Decimal("2.0")
+    na = decimal.Decimal("NaN")
+    return DecimalArray([b, b, na, na, a, a, b, c])
+
+
+class BaseDecimal:
+    def assert_series_equal(self, left, right, *args, **kwargs):
+        def convert(x):
+            # need to convert array([Decimal(NaN)], dtype='object') to np.NaN
+            # because Series[object].isnan doesn't recognize decimal(NaN) as
+            # NA.
+            try:
+                return math.isnan(x)
+            except TypeError:
+                return False
+
+        if left.dtype == "object":
+            left_na = left.apply(convert)
+        else:
+            left_na = left.isna()
+        if right.dtype == "object":
+            right_na = right.apply(convert)
+        else:
+            right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na], right[~right_na], *args, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        # TODO(EA): select_dtypes
+        tm.assert_index_equal(
+            left.columns,
+            right.columns,
+            exact=kwargs.get("check_column_type", "equiv"),
+            check_names=kwargs.get("check_names", True),
+            check_exact=kwargs.get("check_exact", False),
+            check_categorical=kwargs.get("check_categorical", True),
+            obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")),
+        )
+
+        decimals = (left.dtypes == "decimal").index
+
+        for col in decimals:
+            self.assert_series_equal(left[col], right[col], *args, **kwargs)
+
+        left = left.drop(columns=decimals)
+        right = right.drop(columns=decimals)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseDecimal, base.BaseDtypeTests):
+    def test_hashable(self, dtype):
+        pass
+
+
+class TestInterface(BaseDecimal, base.BaseInterfaceTests):
+    pass
+
+
+class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
+    @pytest.mark.skip(reason="not implemented constructor from dtype")
+    def test_from_dtype(self, data):
+        # construct from our dtype & string dtype
+        pass
+
+
+class TestReshaping(BaseDecimal, base.BaseReshapingTests):
+    pass
+
+
+class TestGetitem(BaseDecimal, base.BaseGetitemTests):
+    def test_take_na_value_other_decimal(self):
+        arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+        result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
+        expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
+        self.assert_extension_array_equal(result, expected)
+
+
+class TestMissing(BaseDecimal, base.BaseMissingTests):
+    pass
+
+
+class Reduce:
+    def check_reduce(self, s, op_name, skipna):
+
+        if skipna or op_name in ["median", "skew", "kurt"]:
+            with pytest.raises(NotImplementedError):
+                getattr(s, op_name)(skipna=skipna)
+
+        else:
+            result = getattr(s, op_name)(skipna=skipna)
+            expected = getattr(np.asarray(s), op_name)()
+            tm.assert_almost_equal(result, expected)
+
+
+class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
+    pass
+
+
+class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
+    pass
+
+
+class TestMethods(BaseDecimal, base.BaseMethodsTests):
+    @pytest.mark.parametrize("dropna", [True, False])
+    @pytest.mark.xfail(reason="value_counts not implemented yet.")
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
+
+
+class TestCasting(BaseDecimal, base.BaseCastingTests):
+    pass
+
+
+class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
+    @pytest.mark.xfail(
+        reason="needs to correctly define __eq__ to handle nans, xref #27081."
+    )
+    def test_groupby_apply_identity(self, data_for_grouping):
+        super().test_groupby_apply_identity(data_for_grouping)
+
+
+class TestSetitem(BaseDecimal, base.BaseSetitemTests):
+    pass
+
+
+class TestPrinting(BaseDecimal, base.BasePrintingTests):
+    def test_series_repr(self, data):
+        # Overriding this base test to explicitly test that
+        # the custom _formatter is used
+        ser = pd.Series(data)
+        assert data.dtype.name in repr(ser)
+        assert "Decimal: " in repr(ser)
+
+
+# TODO(extension)
+@pytest.mark.xfail(
+    reason=(
+        "raising AssertionError as this is not implemented, though easy enough to do"
+    )
+)
+def test_series_constructor_coerce_data_to_extension_dtype_raises():
+    xpr = (
+        "Cannot cast data to extension dtype 'decimal'. Pass the "
+        "extension array directly."
+    )
+    with pytest.raises(ValueError, match=xpr):
+        pd.Series([0, 1, 2], dtype=DecimalDtype())
+
+
+def test_series_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal("10.0")])
+    result = pd.Series(arr, dtype=DecimalDtype())
+    expected = pd.Series(arr)
+    tm.assert_series_equal(result, expected)
+
+    result = pd.Series(arr, dtype="int64")
+    expected = pd.Series([10])
+    tm.assert_series_equal(result, expected)
+
+
+def test_dataframe_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal("10.0")])
+
+    result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
+    expected = pd.DataFrame({"A": arr})
+    tm.assert_frame_equal(result, expected)
+
+    arr = DecimalArray([decimal.Decimal("10.0")])
+    result = pd.DataFrame({"A": arr}, dtype="int64")
+    expected = pd.DataFrame({"A": [10]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("frame", [True, False])
+def test_astype_dispatches(frame):
+    # This is a dtype-specific test that ensures Series[decimal].astype
+    # gets all the way through to ExtensionArray.astype
+    # Designing a reliable smoke test that works for arbitrary data types
+    # is difficult.
+    data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
+    ctx = decimal.Context()
+    ctx.prec = 5
+
+    if frame:
+        data = data.to_frame()
+
+    result = data.astype(DecimalDtype(ctx))
+
+    if frame:
+        result = result["a"]
+
+    assert result.dtype.context.prec == ctx.prec
+
+
+class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
+    def check_opname(self, s, op_name, other, exc=None):
+        super().check_opname(s, op_name, other, exc=None)
+
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
+        op_name = all_arithmetic_operators
+        s = pd.Series(data)
+
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+
+        # Decimal supports ops with int, but not float
+        other = pd.Series([int(d * 100) for d in data])
+        self.check_opname(s, op_name, other)
+
+        if "mod" not in op_name:
+            self.check_opname(s, op_name, s * 2)
+
+        self.check_opname(s, op_name, 0)
+        self.check_opname(s, op_name, 5)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+
+    def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
+        # We implement divmod
+        super()._check_divmod_op(s, op, other, exc=None)
+
+    def test_error(self):
+        pass
+
+
+class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests):
+    def check_opname(self, s, op_name, other, exc=None):
+        super().check_opname(s, op_name, other, exc=None)
+
+    def _compare_other(self, s, data, op_name, other):
+        self.check_opname(s, op_name, other)
+
+    def test_compare_scalar(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        self._compare_other(s, data, op_name, 0.5)
+
+    def test_compare_array(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+
+        alter = np.random.choice([-1, 0, 1], len(data))
+        # Randomly double, halve or keep same value
+        other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
+        self._compare_other(s, data, op_name, other)
+
+
+class DecimalArrayWithoutFromSequence(DecimalArray):
+    """Helper class for testing error handling in _from_sequence."""
+
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        raise KeyError("For the test")
+
+
+class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
+    @classmethod
+    def _create_arithmetic_method(cls, op):
+        return cls._create_method(op, coerce_to_dtype=False)
+
+
+DecimalArrayWithoutCoercion._add_arithmetic_ops()
+
+
+def test_combine_from_sequence_raises():
+    # https://github.com/pandas-dev/pandas/issues/22850
+    ser = pd.Series(
+        DecimalArrayWithoutFromSequence(
+            [decimal.Decimal("1.0"), decimal.Decimal("2.0")]
+        )
+    )
+    result = ser.combine(ser, operator.add)
+
+    # note: object dtype
+    expected = pd.Series(
+        [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
+    )
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
+)
+def test_scalar_ops_from_sequence_raises(class_):
+    # op(EA, EA) should return an EA, or an ndarray if it's not possible
+    # to return an EA with the return values.
+    arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+    result = arr + arr
+    expected = np.array(
+        [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
+    )
+    tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "reverse, expected_div, expected_mod",
+    [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
+)
+def test_divmod_array(reverse, expected_div, expected_mod):
+    # https://github.com/pandas-dev/pandas/issues/22930
+    arr = to_decimal([1, 2, 3, 4])
+    if reverse:
+        div, mod = divmod(2, arr)
+    else:
+        div, mod = divmod(arr, 2)
+    expected_div = to_decimal(expected_div)
+    expected_mod = to_decimal(expected_mod)
+
+    tm.assert_extension_array_equal(div, expected_div)
+    tm.assert_extension_array_equal(mod, expected_mod)
+
+
+def test_ufunc_fallback(data):
+    a = data[:5]
+    s = pd.Series(a, index=range(3, 8))
+    result = np.abs(s)
+    expected = pd.Series(np.abs(a), index=range(3, 8))
+    tm.assert_series_equal(result, expected)
+
+
+def test_formatting_values_deprecated():
+    class DecimalArray2(DecimalArray):
+        def _formatting_values(self):
+            return np.array(self)
+
+    ser = pd.Series(DecimalArray2([decimal.Decimal("1.0")]))
+
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        repr(ser)
+
+
+def test_array_ufunc():
+    a = to_decimal([1, 2, 3])
+    result = np.exp(a)
+    expected = to_decimal(np.exp(a._data))
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_array_ufunc_series():
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+    result = np.exp(s)
+    expected = pd.Series(to_decimal(np.exp(a._data)))
+    tm.assert_series_equal(result, expected)
+
+
+def test_array_ufunc_series_scalar_other():
+    # check _HANDLED_TYPES
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+    result = np.add(s, decimal.Decimal(1))
+    expected = pd.Series(np.add(a, decimal.Decimal(1)))
+    tm.assert_series_equal(result, expected)
+
+
+def test_array_ufunc_series_defer():
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+
+    expected = pd.Series(to_decimal([2, 4, 6]))
+    r1 = np.add(s, a)
+    r2 = np.add(a, s)
+
+    tm.assert_series_equal(r1, expected)
+    tm.assert_series_equal(r2, expected)