8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
"""Rudimentary Apache Arrow-backed ExtensionArray.
|
||||
|
||||
At the moment, just a boolean array / type is implemented.
|
||||
Eventually, we'll want to parametrize the type and support
|
||||
multiple dtypes. Not all methods are implemented yet, and the
|
||||
current implementation is not efficient.
|
||||
"""
|
||||
import copy
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import (
|
||||
ExtensionArray,
|
||||
ExtensionDtype,
|
||||
register_extension_dtype,
|
||||
take,
|
||||
)
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class ArrowBoolDtype(ExtensionDtype):
|
||||
|
||||
type = np.bool_
|
||||
kind = "b"
|
||||
name = "arrow_bool"
|
||||
na_value = pa.NULL
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return ArrowBoolArray
|
||||
|
||||
def _is_boolean(self):
|
||||
return True
|
||||
|
||||
|
||||
class ArrowBoolArray(ExtensionArray):
|
||||
def __init__(self, values):
|
||||
if not isinstance(values, pa.ChunkedArray):
|
||||
raise ValueError
|
||||
|
||||
assert values.type == pa.bool_()
|
||||
self._data = values
|
||||
self._dtype = ArrowBoolDtype()
|
||||
|
||||
def __repr__(self):
|
||||
return "ArrowBoolArray({})".format(repr(self._data))
|
||||
|
||||
@classmethod
|
||||
def from_scalars(cls, values):
|
||||
arr = pa.chunked_array([pa.array(np.asarray(values))])
|
||||
return cls(arr)
|
||||
|
||||
@classmethod
|
||||
def from_array(cls, arr):
|
||||
assert isinstance(arr, pa.Array)
|
||||
return cls(pa.chunked_array([arr]))
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls.from_scalars(scalars)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if pd.api.types.is_scalar(item):
|
||||
return self._data.to_pandas()[item]
|
||||
else:
|
||||
vals = self._data.to_pandas()[item]
|
||||
return type(self).from_scalars(vals)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# needed to fix this astype for the Series constructor.
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
return super().astype(dtype, copy)
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return sum(
|
||||
x.size
|
||||
for chunk in self._data.chunks
|
||||
for x in chunk.buffers()
|
||||
if x is not None
|
||||
)
|
||||
|
||||
def isna(self):
|
||||
nas = pd.isna(self._data.to_pandas())
|
||||
return type(self).from_scalars(nas)
|
||||
|
||||
def take(self, indices, allow_fill=False, fill_value=None):
|
||||
data = self._data.to_pandas()
|
||||
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
|
||||
return self._from_sequence(result, dtype=self.dtype)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(copy.copy(self._data))
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
chunks = list(itertools.chain.from_iterable(x._data.chunks for x in to_concat))
|
||||
arr = pa.chunked_array(chunks)
|
||||
return cls(arr)
|
||||
|
||||
def __invert__(self):
|
||||
return type(self).from_scalars(~self._data.to_pandas())
|
||||
|
||||
def _reduce(self, method, skipna=True, **kwargs):
|
||||
if skipna:
|
||||
arr = self[~self.isna()]
|
||||
else:
|
||||
arr = self
|
||||
|
||||
try:
|
||||
op = getattr(arr, method)
|
||||
except AttributeError:
|
||||
raise TypeError
|
||||
return op(**kwargs)
|
||||
|
||||
def any(self, axis=0, out=None):
|
||||
return self._data.to_pandas().any()
|
||||
|
||||
def all(self, axis=0, out=None):
|
||||
return self._data.to_pandas().all()
|
@@ -0,0 +1,70 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
pytest.importorskip("pyarrow", minversion="0.10.0")
|
||||
|
||||
from .bool import ArrowBoolArray, ArrowBoolDtype # isort:skip
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return ArrowBoolDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
values = np.random.randint(0, 2, size=100, dtype=bool)
|
||||
values[1] = ~values[0]
|
||||
return ArrowBoolArray.from_scalars(values)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
return ArrowBoolArray.from_scalars([None, True])
|
||||
|
||||
|
||||
class BaseArrowTests:
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype(BaseArrowTests, base.BaseDtypeTests):
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
pytest.skip("GH-22666")
|
||||
|
||||
|
||||
class TestInterface(BaseArrowTests, base.BaseInterfaceTests):
|
||||
def test_copy(self, data):
|
||||
# __setitem__ does not work, so we only have a smoke-test
|
||||
data.copy()
|
||||
|
||||
|
||||
class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
|
||||
def test_from_dtype(self, data):
|
||||
pytest.skip("GH-22666")
|
||||
|
||||
# seems like some bug in isna on empty BoolArray returning floats.
|
||||
@pytest.mark.xfail(reason="bad is-na for empty data")
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
super().test_from_sequence_from_cls(data)
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
def test_reduce_series_boolean(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestReduceBoolean(base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
def test_is_bool_dtype(data):
|
||||
assert pd.api.types.is_bool_dtype(data)
|
||||
assert pd.core.common.is_bool_indexer(data)
|
||||
s = pd.Series(range(len(data)))
|
||||
result = s[data]
|
||||
expected = s[np.asarray(data)]
|
||||
tm.assert_series_equal(result, expected)
|
@@ -0,0 +1,60 @@
|
||||
"""Base test suite for extension arrays.
|
||||
|
||||
These tests are intended for third-party libraries to subclass to validate
|
||||
that their extension arrays and dtypes satisfy the interface. Moving or
|
||||
renaming the tests should not be done lightly.
|
||||
|
||||
Libraries are expected to implement a few pytest fixtures to provide data
|
||||
for the tests. The fixtures may be located in either
|
||||
|
||||
* The same module as your test class.
|
||||
* A ``conftest.py`` in the same directory as your test class.
|
||||
|
||||
The full list of fixtures may be found in the ``conftest.py`` next to this
|
||||
file.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pytest
|
||||
from pandas.tests.extension.base import BaseDtypeTests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return MyDtype()
|
||||
|
||||
|
||||
class TestMyDtype(BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
Your class ``TestDtype`` will inherit all the tests defined on
|
||||
``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
|
||||
wherever the test requires it. You're free to implement additional tests.
|
||||
|
||||
All the tests in these modules use ``self.assert_frame_equal`` or
|
||||
``self.assert_series_equal`` for dataframe or series comparisons. By default,
|
||||
they use the usual ``pandas.testing.assert_frame_equal`` and
|
||||
``pandas.testing.assert_series_equal``. You can override the checks used
|
||||
by defining the staticmethods ``assert_frame_equal`` and
|
||||
``assert_series_equal`` on your base test class.
|
||||
|
||||
"""
|
||||
from .casting import BaseCastingTests # noqa
|
||||
from .constructors import BaseConstructorsTests # noqa
|
||||
from .dtype import BaseDtypeTests # noqa
|
||||
from .getitem import BaseGetitemTests # noqa
|
||||
from .groupby import BaseGroupbyTests # noqa
|
||||
from .interface import BaseInterfaceTests # noqa
|
||||
from .io import BaseParsingTests # noqa
|
||||
from .methods import BaseMethodsTests # noqa
|
||||
from .missing import BaseMissingTests # noqa
|
||||
from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa
|
||||
from .printing import BasePrintingTests # noqa
|
||||
from .reduce import ( # noqa
|
||||
BaseBooleanReduceTests,
|
||||
BaseNoReduceTests,
|
||||
BaseNumericReduceTests,
|
||||
)
|
||||
from .reshaping import BaseReshapingTests # noqa
|
||||
from .setitem import BaseSetitemTests # noqa
|
@@ -0,0 +1,9 @@
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class BaseExtensionTests:
|
||||
|
||||
assert_equal = staticmethod(tm.assert_equal)
|
||||
assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
assert_frame_equal = staticmethod(tm.assert_frame_equal)
|
||||
assert_extension_array_equal = staticmethod(tm.assert_extension_array_equal)
|
@@ -0,0 +1,23 @@
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ObjectBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseCastingTests(BaseExtensionTests):
|
||||
"""Casting to and from ExtensionDtypes"""
|
||||
|
||||
def test_astype_object_series(self, all_data):
|
||||
ser = pd.Series({"A": all_data})
|
||||
result = ser.astype(object)
|
||||
assert isinstance(result._data.blocks[0], ObjectBlock)
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = pd.Series(data).tolist()
|
||||
expected = list(data)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str(self, data):
|
||||
result = pd.Series(data[:5]).astype(str)
|
||||
expected = pd.Series(data[:5].astype(str))
|
||||
self.assert_series_equal(result, expected)
|
@@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseConstructorsTests(BaseExtensionTests):
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
data = data[:0]
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_array_from_scalars(self, data):
|
||||
scalars = [data[0], data[1], data[2]]
|
||||
result = data._from_sequence(scalars)
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result.dtype == data.dtype
|
||||
assert len(result) == len(data)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
assert result._data.blocks[0].values is data
|
||||
|
||||
# Series[EA] is unboxed / boxed correctly
|
||||
result2 = pd.Series(result)
|
||||
assert result2.dtype == data.dtype
|
||||
assert isinstance(result2._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize("from_series", [True, False])
|
||||
def test_dataframe_constructor_from_dict(self, data, from_series):
|
||||
if from_series:
|
||||
data = pd.Series(data)
|
||||
result = pd.DataFrame({"A": data})
|
||||
assert result.dtypes["A"] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_dataframe_from_series(self, data):
|
||||
result = pd.DataFrame(pd.Series(data))
|
||||
assert result.dtypes[0] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_series_given_mismatched_index_raises(self, data):
|
||||
msg = "Length of passed values is 3, index implies 5"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(list(data), dtype=dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(list(data), dtype=str(dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_pandas_array(self, data):
|
||||
# pd.array(extension_array) should be idempotent...
|
||||
result = pd.array(data)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_pandas_array_dtype(self, data):
|
||||
# ... but specifying dtype will override idempotency
|
||||
result = pd.array(data, dtype=np.dtype(object))
|
||||
expected = pd.arrays.PandasArray(np.asarray(data, dtype=object))
|
||||
self.assert_equal(result, expected)
|
@@ -0,0 +1,102 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseDtypeTests(BaseExtensionTests):
|
||||
"""Base class for ExtensionDtype classes"""
|
||||
|
||||
def test_name(self, dtype):
|
||||
assert isinstance(dtype.name, str)
|
||||
|
||||
def test_kind(self, dtype):
|
||||
valid = set("biufcmMOSUV")
|
||||
if dtype.kind is not None:
|
||||
assert dtype.kind in valid
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype):
|
||||
result = dtype.construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
# check OK as classmethod
|
||||
result = type(dtype).construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
def test_is_dtype_from_name(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_unboxes_dtype(self, data, dtype):
|
||||
assert dtype.is_dtype(data) is True
|
||||
|
||||
def test_is_dtype_from_self(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype)
|
||||
assert result is True
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
return not pd.api.types.is_string_dtype(dtype)
|
||||
|
||||
def test_is_not_object_type(self, dtype):
|
||||
return not pd.api.types.is_object_dtype(dtype)
|
||||
|
||||
def test_eq_with_str(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != dtype.name + "-suffix"
|
||||
|
||||
def test_eq_with_numpy_object(self, dtype):
|
||||
assert dtype != np.dtype("object")
|
||||
|
||||
def test_eq_with_self(self, dtype):
|
||||
assert dtype == dtype
|
||||
assert dtype != object()
|
||||
|
||||
def test_array_type(self, data, dtype):
|
||||
assert dtype.construct_array_type() is type(data)
|
||||
|
||||
def test_check_dtype(self, data):
|
||||
dtype = data.dtype
|
||||
|
||||
# check equivalency for using .dtypes
|
||||
df = pd.DataFrame(
|
||||
{"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1}
|
||||
)
|
||||
|
||||
# np.dtype('int64') == 'Int64' == 'int64'
|
||||
# so can't distinguish
|
||||
if dtype.name == "Int64":
|
||||
expected = pd.Series([True, True, False, True], index=list("ABCD"))
|
||||
else:
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
|
||||
# XXX: This should probably be *fixed* not ignored.
|
||||
# See libops.scalar_compare
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", DeprecationWarning)
|
||||
result = df.dtypes == str(dtype)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
result = df.dtypes.apply(str) == str(dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_hashable(self, dtype):
|
||||
hash(dtype) # no error
|
||||
|
||||
def test_str(self, dtype):
|
||||
assert str(dtype) == dtype.name
|
||||
|
||||
def test_eq(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != "anonther_type"
|
||||
|
||||
def test_construct_from_string(self, dtype):
|
||||
dtype_instance = dtype.__class__.construct_from_string(dtype.name)
|
||||
assert isinstance(dtype_instance, dtype.__class__)
|
||||
with pytest.raises(TypeError):
|
||||
dtype.__class__.construct_from_string("another_type")
|
@@ -0,0 +1,262 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGetitemTests(BaseExtensionTests):
|
||||
"""Tests for ExtensionArray.__getitem__."""
|
||||
|
||||
def test_iloc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.iloc[:4]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.iloc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.iloc[:4, [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.iloc[[0, 1, 2, 3], [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.loc[:3]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.loc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.loc[:3, ["A"]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.loc[[0, 1, 2, 3], ["A"]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.loc[:3, "A"]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.loc[:3, "A"]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_iloc_frame_single_dtype(self, data):
|
||||
# GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly
|
||||
# return a scalar
|
||||
df = pd.DataFrame({"A": data})
|
||||
expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype)
|
||||
|
||||
result = df.loc[2]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series(
|
||||
[data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype
|
||||
)
|
||||
result = df.iloc[-1]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_scalar(self, data):
|
||||
result = data[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
result = pd.Series(data)[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
|
||||
result = data_missing[0]
|
||||
assert na_cmp(result, na_value)
|
||||
|
||||
def test_getitem_mask(self, data):
|
||||
# Empty mask, raw array
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# Empty mask, in series
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
# non-empty mask, raw array
|
||||
mask[0] = True
|
||||
result = data[mask]
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# non-empty mask, in series
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 1
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
def test_getitem_slice(self, data):
|
||||
# getitem[slice] should return an array
|
||||
result = data[slice(0)] # empty
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = data[slice(1)] # scalar
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_get(self, data):
|
||||
# GH 20882
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = pd.Series(data[:6], index=list("abcdef"))
|
||||
assert s.get("c") == s.iloc[2]
|
||||
|
||||
result = s.get(slice("b", "d"))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get("Z")
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(data)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
def test_take_sequence(self, data):
|
||||
result = pd.Series(data)[[0, 1, 3]]
|
||||
assert result.iloc[0] == data[0]
|
||||
assert result.iloc[1] == data[1]
|
||||
assert result.iloc[2] == data[3]
|
||||
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
result = data.take([0, -1])
|
||||
assert result.dtype == data.dtype
|
||||
assert result[0] == data[0]
|
||||
assert result[1] == data[-1]
|
||||
|
||||
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
|
||||
assert result[0] == data[0]
|
||||
assert na_cmp(result[1], na_value)
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds"):
|
||||
data.take([len(data) + 1])
|
||||
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
empty = data[:0]
|
||||
|
||||
result = empty.take([-1], allow_fill=True)
|
||||
assert na_cmp(result[0], na_value)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
empty.take([-1])
|
||||
|
||||
with pytest.raises(IndexError, match="cannot do a non-empty take"):
|
||||
empty.take([0, 1])
|
||||
|
||||
def test_take_negative(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20640
|
||||
n = len(data)
|
||||
result = data.take([0, -n, n - 1, -1])
|
||||
expected = data.take([0, 0, n - 1, n - 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
fill_value = data_missing[1] # valid
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, fill_value, na])
|
||||
result = array.take([-1, 1], fill_value=fill_value, allow_fill=True)
|
||||
expected = array.take([1, 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
with pytest.raises(ValueError):
|
||||
data.take([0, -2], fill_value=na_value, allow_fill=True)
|
||||
|
||||
@pytest.mark.parametrize("allow_fill", [True, False])
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
arr = data[:3]
|
||||
with pytest.raises(IndexError):
|
||||
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
|
||||
|
||||
def test_take_series(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.take([0, -1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
|
||||
index=[0, len(data) - 1],
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
s = pd.Series(data)
|
||||
result = s.reindex([0, 1, 3])
|
||||
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
n = len(data)
|
||||
result = s.reindex([-1, 0, n])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, data[0], na_value], dtype=s.dtype),
|
||||
index=[-1, 0, n],
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.reindex([n, n + 1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1]
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, valid])
|
||||
ser = pd.Series(array)
|
||||
result = ser.reindex([0, 1, 2], fill_value=valid)
|
||||
expected = pd.Series(data_missing._from_sequence([na, valid, valid]))
|
||||
|
||||
self.assert_series_equal(result, expected)
|
@@ -0,0 +1,91 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGroupbyTests(BaseExtensionTests):
|
||||
"""Groupby-specific tests."""
|
||||
|
||||
def test_grouping_grouper(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping}
|
||||
)
|
||||
gr1 = df.groupby("A").grouper.groupings[0]
|
||||
gr2 = df.groupby("B").grouper.groupings[0]
|
||||
|
||||
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
|
||||
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
result = df.groupby("B", as_index=as_index).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=True)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([3, 1, 4], index=index, name="A")
|
||||
if as_index:
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected = expected.reset_index()
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_no_sort(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
result = df.groupby("B", sort=False).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=False)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([1, 3, 4], index=index, name="A")
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
valid = data_for_grouping[~data_for_grouping.isna()]
|
||||
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid})
|
||||
|
||||
result = df.groupby("B").A.transform(len)
|
||||
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
df.groupby("B").apply(groupby_apply_op)
|
||||
df.groupby("B").A.apply(groupby_apply_op)
|
||||
df.groupby("A").apply(groupby_apply_op)
|
||||
df.groupby("A").B.apply(groupby_apply_op)
|
||||
|
||||
def test_groupby_apply_identity(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
result = df.groupby("A").B.apply(lambda x: x.array)
|
||||
expected = pd.Series(
|
||||
[
|
||||
df.B.iloc[[0, 1, 6]].array,
|
||||
df.B.iloc[[2, 3]].array,
|
||||
df.B.iloc[[4, 5]].array,
|
||||
df.B.iloc[[7]].array,
|
||||
],
|
||||
index=pd.Index([1, 2, 3, 4], name="A"),
|
||||
name="B",
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_in_numeric_groupby(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping,
|
||||
"C": [1, 1, 1, 1, 1, 1, 1, 1],
|
||||
}
|
||||
)
|
||||
result = df.groupby("A").sum().columns
|
||||
|
||||
if data_for_grouping.dtype._is_numeric:
|
||||
expected = pd.Index(["B", "C"])
|
||||
else:
|
||||
expected = pd.Index(["C"])
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseInterfaceTests(BaseExtensionTests):
|
||||
"""Tests that the basic interface is satisfied."""
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Interface
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_len(self, data):
|
||||
assert len(data) == 100
|
||||
|
||||
def test_ndim(self, data):
|
||||
assert data.ndim == 1
|
||||
|
||||
def test_can_hold_na_valid(self, data):
|
||||
# GH-20761
|
||||
assert data._can_hold_na is True
|
||||
|
||||
def test_memory_usage(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.memory_usage(index=False)
|
||||
assert result == s.nbytes
|
||||
|
||||
def test_array_interface(self, data):
|
||||
result = np.array(data)
|
||||
assert result[0] == data[0]
|
||||
|
||||
result = np.array(data, dtype=object)
|
||||
expected = np.array(list(data), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_is_extension_array_dtype(self, data):
|
||||
assert is_extension_array_dtype(data)
|
||||
assert is_extension_array_dtype(data.dtype)
|
||||
assert is_extension_array_dtype(pd.Series(data))
|
||||
assert isinstance(data.dtype, ExtensionDtype)
|
||||
|
||||
def test_no_values_attribute(self, data):
|
||||
# GH-20735: EA's with .values attribute give problems with internal
|
||||
# code, disallowing this for now until solved
|
||||
assert not hasattr(data, "values")
|
||||
assert not hasattr(data, "_values")
|
||||
|
||||
def test_is_numeric_honored(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
|
||||
|
||||
def test_isna_extension_array(self, data_missing):
|
||||
# If your `isna` returns an ExtensionArray, you must also implement
|
||||
# _reduce. At the *very* least, you must implement any and all
|
||||
na = data_missing.isna()
|
||||
if is_extension_array_dtype(na):
|
||||
assert na._reduce("any")
|
||||
assert na.any()
|
||||
|
||||
assert not na._reduce("all")
|
||||
assert not na.all()
|
||||
|
||||
assert na.dtype._is_boolean
|
||||
|
||||
def test_copy(self, data):
|
||||
# GH#27083 removing deep keyword from EA.copy
|
||||
assert data[0] != data[1]
|
||||
result = data.copy()
|
||||
|
||||
data[1] = data[0]
|
||||
assert result[1] != result[0]
|
@@ -0,0 +1,20 @@
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseParsingTests(BaseExtensionTests):
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data):
|
||||
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
|
||||
csv_output = df.to_csv(index=False, na_rep=np.nan)
|
||||
result = pd.read_csv(
|
||||
StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine
|
||||
)
|
||||
expected = df
|
||||
self.assert_frame_equal(result, expected)
|
@@ -0,0 +1,360 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sorting import nargsort
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMethodsTests(BaseExtensionTests):
|
||||
"""Various Series and DataFrame methods."""
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_count(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df.count(axis="columns")
|
||||
expected = pd.Series([0, 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_count(self, data_missing):
|
||||
# GH#26835
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.count()
|
||||
expected = 1
|
||||
assert result == expected
|
||||
|
||||
def test_apply_simple_series(self, data):
|
||||
result = pd.Series(data).apply(id)
|
||||
assert isinstance(result, pd.Series)
|
||||
|
||||
def test_argsort(self, data_for_sorting):
|
||||
result = pd.Series(data_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_missing_array(self, data_missing_for_sorting):
|
||||
result = data_missing_for_sorting.argsort()
|
||||
expected = np.array([2, 0, 1], dtype=np.dtype("int"))
|
||||
# we don't care whether it's int32 or int64
|
||||
result = result.astype("int64", casting="safe")
|
||||
expected = expected.astype("int64", casting="safe")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
result = pd.Series(data_missing_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_position, expected",
|
||||
[
|
||||
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
|
||||
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
|
||||
],
|
||||
)
|
||||
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
|
||||
# GH 25439
|
||||
result = nargsort(data_missing_for_sorting, na_position=na_position)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending):
|
||||
ser = pd.Series(data_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
if not ascending:
|
||||
expected = expected[::-1]
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_missing(self, data_missing_for_sorting, ascending):
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
if ascending:
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
else:
|
||||
expected = ser.iloc[[0, 2, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting})
|
||||
result = df.sort_values(["A", "B"])
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1]
|
||||
)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
duplicated = box(data._from_sequence([data[0], data[0]]))
|
||||
|
||||
result = method(duplicated)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
assert result[0] == duplicated[0]
|
||||
|
||||
@pytest.mark.parametrize("na_sentinel", [-1, -2])
|
||||
def test_factorize(self, data_for_grouping, na_sentinel):
|
||||
labels, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
|
||||
expected_labels = np.array(
|
||||
[0, 0, na_sentinel, na_sentinel, 1, 1, 0, 2], dtype=np.intp
|
||||
)
|
||||
expected_uniques = data_for_grouping.take([0, 4, 7])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
@pytest.mark.parametrize("na_sentinel", [-1, -2])
|
||||
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
|
||||
l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
|
||||
l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
|
||||
|
||||
tm.assert_numpy_array_equal(l1, l2)
|
||||
self.assert_extension_array_equal(u1, u2)
|
||||
|
||||
def test_factorize_empty(self, data):
|
||||
labels, uniques = pd.factorize(data[:0])
|
||||
expected_labels = np.array([], dtype=np.intp)
|
||||
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
assert df.A.values is not result.A.values
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
assert ser._values is not result._values
|
||||
assert ser._values is arr
|
||||
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# GH 20825
|
||||
# Test that combine works when doing a <= (le) comparison
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series([a <= val for a in list(orig_data1)])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
with np.errstate(over="ignore"):
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence(
|
||||
[a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]
|
||||
)
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + val for a in list(orig_data1)])
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/24147
|
||||
a = pd.Series(data[:3])
|
||||
b = pd.Series(data[2:5], index=[2, 3, 4])
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series(data[:5])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])],
|
||||
)
|
||||
def test_container_shift(self, data, frame, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/22386
|
||||
subset = data[:5]
|
||||
data = pd.Series(subset, name="A")
|
||||
expected = pd.Series(subset.take(indices, allow_fill=True), name="A")
|
||||
|
||||
if frame:
|
||||
result = data.to_frame(name="A").assign(B=1).shift(periods)
|
||||
expected = pd.concat(
|
||||
[expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1
|
||||
)
|
||||
compare = self.assert_frame_equal
|
||||
else:
|
||||
result = data.shift(periods)
|
||||
compare = self.assert_series_equal
|
||||
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]],
|
||||
)
|
||||
def test_shift_non_empty_array(self, data, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
subset = data[:2]
|
||||
result = subset.shift(periods)
|
||||
expected = subset.take(indices, allow_fill=True)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4])
|
||||
def test_shift_empty_array(self, data, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
empty = data[:0]
|
||||
result = empty.shift(periods)
|
||||
expected = empty
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_shift_fill_value(self, data):
|
||||
arr = data[:4]
|
||||
fill_value = data[0]
|
||||
result = arr.shift(1, fill_value=fill_value)
|
||||
expected = data.take([0, 0, 1, 2])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr.shift(-2, fill_value=fill_value)
|
||||
expected = data.take([2, 3, 0, 0])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# https://github.com/pandas-dev/pandas/issues/23066
|
||||
data = pd.Series(data)
|
||||
if as_frame:
|
||||
data = data.to_frame()
|
||||
a = pd.util.hash_pandas_object(data)
|
||||
b = pd.util.hash_pandas_object(data)
|
||||
self.assert_equal(a, b)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
b, c, a = data_for_sorting
|
||||
arr = type(data_for_sorting)._from_sequence([a, b, c])
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
assert arr.searchsorted(c) == 2
|
||||
assert arr.searchsorted(c, side="right") == 3
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 2]))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 2, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
cond = np.array([True, True, False, False])
|
||||
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name="a")
|
||||
cond = cond.reshape(-1, 1)
|
||||
|
||||
result = ser.where(cond)
|
||||
expected = pd.Series(
|
||||
cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype)
|
||||
)
|
||||
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
# array other
|
||||
cond = np.array([True, False, True, True])
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
if as_frame:
|
||||
other = pd.DataFrame({"a": other})
|
||||
cond = pd.DataFrame({"a": cond})
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
arr = type(data)._from_sequence(data[:3], dtype=data.dtype)
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats)
|
||||
|
||||
repeats = [repeats] * 3 if isinstance(repeats, int) else repeats
|
||||
expected = [x for x, n in zip(arr, repeats) for _ in range(n)]
|
||||
expected = type(data)._from_sequence(expected, dtype=data.dtype)
|
||||
if as_series:
|
||||
expected = pd.Series(expected, index=arr.index.repeat(repeats))
|
||||
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"repeats, kwargs, error, msg",
|
||||
[
|
||||
(2, dict(axis=1), ValueError, "'axis"),
|
||||
(-1, dict(), ValueError, "negative"),
|
||||
([1, 2], dict(), ValueError, "shape"),
|
||||
(2, dict(foo="bar"), TypeError, "'foo'"),
|
||||
],
|
||||
)
|
||||
def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy):
|
||||
with pytest.raises(error, match=msg):
|
||||
if use_numpy:
|
||||
np.repeat(data, repeats, **kwargs)
|
||||
else:
|
||||
data.repeat(repeats, **kwargs)
|
@@ -0,0 +1,129 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMissingTests(BaseExtensionTests):
|
||||
def test_isna(self, data_missing):
|
||||
expected = np.array([True, False])
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=bool)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_dropna_series(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[[1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_frame(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
|
||||
# defaults
|
||||
result = df.dropna()
|
||||
expected = df.iloc[[1]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.dropna(axis="columns")
|
||||
expected = pd.DataFrame(index=[0, 1])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple
|
||||
df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
|
||||
result = df.dropna()
|
||||
expected = df.iloc[:0]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
result = data_missing.fillna(valid)
|
||||
expected = data_missing.fillna(valid)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method="ffill", limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method="backfill", limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
result = ser.fillna(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
result = ser.fillna(ser)
|
||||
self.assert_series_equal(result, ser)
|
||||
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
if fillna_method == "ffill":
|
||||
data_missing = data_missing[::-1]
|
||||
|
||||
result = pd.Series(data_missing).fillna(method=fillna_method)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
),
|
||||
"B": [1, 2],
|
||||
}
|
||||
)
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_fill_other(self, data):
|
||||
result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
|
||||
|
||||
expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
@@ -0,0 +1,173 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import ops
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseOpsUtil(BaseExtensionTests):
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip("_")
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=Exception):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
self._check_op(s, op, other, op_name, exc)
|
||||
|
||||
def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
|
||||
if exc is None:
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(s, other)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=Exception):
|
||||
# divmod has multiple return values, so check separately
|
||||
if exc is None:
|
||||
result_div, result_mod = op(s, other)
|
||||
if op is divmod:
|
||||
expected_div, expected_mod = s // other, s % other
|
||||
else:
|
||||
expected_div, expected_mod = other // s, other % s
|
||||
self.assert_series_equal(result_div, expected_div)
|
||||
self.assert_series_equal(result_mod, expected_mod)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
divmod(s, other)
|
||||
|
||||
|
||||
class BaseArithmeticOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame arithmetic ops methods.
|
||||
|
||||
Subclasses supporting various ops should set the class variables
|
||||
to indicate that they support ops of that kind
|
||||
|
||||
* series_scalar_exc = TypeError
|
||||
* frame_scalar_exc = TypeError
|
||||
* series_array_exc = TypeError
|
||||
* divmod_exc = TypeError
|
||||
"""
|
||||
|
||||
series_scalar_exc = TypeError
|
||||
frame_scalar_exc = TypeError
|
||||
series_array_exc = TypeError
|
||||
divmod_exc = TypeError
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# series & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc)
|
||||
|
||||
@pytest.mark.xfail(run=False, reason="_reduce needs implementation")
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
df = pd.DataFrame({"A": data})
|
||||
self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(
|
||||
s, op_name, pd.Series([s.iloc[0]] * len(s)), exc=self.series_array_exc
|
||||
)
|
||||
|
||||
def test_divmod(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, 1, exc=self.divmod_exc)
|
||||
self._check_divmod_op(1, ops.rdivmod, s, exc=self.divmod_exc)
|
||||
|
||||
def test_divmod_series_array(self, data, data_for_twos):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, data)
|
||||
|
||||
other = data_for_twos
|
||||
self._check_divmod_op(other, ops.rdivmod, s)
|
||||
|
||||
other = pd.Series(other)
|
||||
self._check_divmod_op(other, ops.rdivmod, s)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s + data
|
||||
expected = pd.Series(data + data)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
op_name = all_arithmetic_operators
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(data, op_name)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, "__add__"):
|
||||
result = data.__add__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement add".format(data.__class__.__name__)
|
||||
)
|
||||
|
||||
|
||||
class BaseComparisonOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame comparison ops methods."""
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
if op_name == "__eq__":
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert not op(s, other).all()
|
||||
elif op_name == "__ne__":
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert op(s, other).all()
|
||||
|
||||
else:
|
||||
|
||||
# array
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
with pytest.raises(TypeError):
|
||||
op(s, other)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, 0)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data))
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, "__eq__"):
|
||||
result = data.__eq__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement __eq__".format(data.__class__.__name__)
|
||||
)
|
@@ -0,0 +1,43 @@
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BasePrintingTests(BaseExtensionTests):
|
||||
"""Tests checking the formatting of your EA when printed."""
|
||||
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
if size == "small":
|
||||
data = data[:5]
|
||||
else:
|
||||
data = type(data)._concat_same_type([data] * 5)
|
||||
|
||||
result = repr(data)
|
||||
assert data.__class__.__name__ in result
|
||||
assert "Length: {}".format(len(data)) in result
|
||||
assert str(data.dtype) in result
|
||||
if size == "big":
|
||||
assert "..." in result
|
||||
|
||||
def test_array_repr_unicode(self, data):
|
||||
result = str(data)
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
|
||||
def test_dataframe_repr(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
repr(df)
|
||||
|
||||
def test_dtype_name_in_info(self, data):
|
||||
buf = io.StringIO()
|
||||
pd.DataFrame({"A": data}).info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert data.dtype.name in result
|
@@ -0,0 +1,60 @@
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReduceTests(BaseExtensionTests):
|
||||
"""
|
||||
Reduction specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class BaseNoReduceTests(BaseReduceTests):
|
||||
""" we don't define any reductions """
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
|
||||
class BaseNumericReduceTests(BaseReduceTests):
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
# min/max with empty produce numpy warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
self.check_reduce(s, op_name, skipna)
|
||||
|
||||
|
||||
class BaseBooleanReduceTests(BaseReduceTests):
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
self.check_reduce(s, op_name, skipna)
|
@@ -0,0 +1,297 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReshapingTests(BaseExtensionTests):
|
||||
"""Tests for reshaping and concatenation."""
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat(self, data, in_frame):
|
||||
wrapped = pd.Series(data)
|
||||
if in_frame:
|
||||
wrapped = pd.DataFrame(wrapped)
|
||||
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
||||
|
||||
assert len(result) == len(data) * 2
|
||||
|
||||
if in_frame:
|
||||
dtype = result.dtypes[0]
|
||||
else:
|
||||
dtype = result.dtype
|
||||
|
||||
assert dtype == data.dtype
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat_all_na_block(self, data_missing, in_frame):
|
||||
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
||||
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
||||
if in_frame:
|
||||
valid_block = pd.DataFrame({"a": valid_block})
|
||||
na_block = pd.DataFrame({"a": na_block})
|
||||
result = pd.concat([valid_block, na_block])
|
||||
if in_frame:
|
||||
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
||||
self.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category")
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.astype(object) for x in dfs])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = pd.concat([x["A"] for x in dfs])
|
||||
expected = pd.concat([x["A"].astype(object) for x in dfs])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# simple test for just EA and one other
|
||||
result = pd.concat([df1, df2])
|
||||
expected = pd.concat([df1.astype("object"), df2.astype("object")])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat([df1["A"], df2["A"]])
|
||||
expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]})
|
||||
|
||||
expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]})
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# non-aligned
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3])
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype),
|
||||
"B": [np.nan, 1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype))
|
||||
e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype))
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_series_equal(r2, e2)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.DataFrame(
|
||||
{"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)}
|
||||
)
|
||||
e2 = pd.DataFrame(
|
||||
{"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)}
|
||||
)
|
||||
self.assert_frame_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, e2)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/20576
|
||||
ser = pd.Series(data, name="a")
|
||||
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
||||
r1, r2 = ser.align(df)
|
||||
|
||||
e1 = pd.Series(
|
||||
data._from_sequence(list(data) + [na_value], dtype=data.dtype),
|
||||
name=ser.name,
|
||||
)
|
||||
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, df)
|
||||
|
||||
def test_set_frame_expand_regular_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
df["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_expand_extension_with_regular(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
df["B"] = [1] * len(data)
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_overwrite_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20555
|
||||
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
||||
df["A"] = data
|
||||
assert df.dtypes["A"] == data.dtype
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
# GH-20743
|
||||
df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]})
|
||||
df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]})
|
||||
|
||||
res = pd.merge(df1, df2)
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2],
|
||||
"int2": [1, 2, 3],
|
||||
"key": [0, 0, 1],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1]], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
res = pd.merge(df1, df2, how="outer")
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2, 3, np.nan],
|
||||
"int2": [1, 2, 3, np.nan, 4],
|
||||
"key": [0, 0, 1, 2, 3],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1], data[2], na_value], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b], dtype=data.dtype)
|
||||
|
||||
df = pd.DataFrame({"key": key, "val": [1, 2]})
|
||||
result = pd.merge(df, df, on="key")
|
||||
expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# order
|
||||
result = pd.merge(df.iloc[[1, 0]], df, on="key")
|
||||
expected = expected.iloc[[1, 0]].reset_index(drop=True)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b, a], dtype=data.dtype)
|
||||
df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
|
||||
result = pd.merge(df1, df2, on="key")
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"key": key.take([0, 0, 0, 0, 1]),
|
||||
"val_x": [1, 1, 3, 3, 2],
|
||||
"val_y": [1, 3, 1, 3, 2],
|
||||
}
|
||||
)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b")], names=["outer", "inner"]
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_stack(self, data, columns):
|
||||
df = pd.DataFrame({"A": data[:5], "B": data[:5]})
|
||||
df.columns = columns
|
||||
result = df.stack()
|
||||
expected = df.astype(object).stack()
|
||||
# we need a second astype(object), in case the constructor inferred
|
||||
# object -> specialized, as is done for period.
|
||||
expected = expected.astype(object)
|
||||
|
||||
if isinstance(expected, pd.Series):
|
||||
assert result.dtype == df.iloc[:, 0].dtype
|
||||
else:
|
||||
assert all(result.dtypes == df.iloc[:, 0].dtype)
|
||||
|
||||
result = result.astype(object)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "a", 1),
|
||||
("A", "b", 0),
|
||||
("A", "a", 0),
|
||||
("B", "a", 0),
|
||||
("B", "c", 1),
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[: len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(
|
||||
isinstance(result[col].array, type(data)) for col in result.columns
|
||||
)
|
||||
expected = ser.astype(object).unstack(level=level)
|
||||
result = result.astype(object)
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ravel(self, data):
|
||||
# as long as EA is 1D-only, ravel is a no-op
|
||||
result = data.ravel()
|
||||
assert type(result) == type(data)
|
||||
|
||||
# Check that we have a view, not a copy
|
||||
result[0] = result[1]
|
||||
assert data[0] == data[1]
|
@@ -0,0 +1,188 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseSetitemTests(BaseExtensionTests):
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
|
||||
data[[0, 1]] = [data[1], data[0]]
|
||||
assert data[0] == original[1]
|
||||
assert data[1] == original[0]
|
||||
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
ser = pd.Series(data)
|
||||
original = ser.copy()
|
||||
value = [data[0]]
|
||||
if as_array:
|
||||
value = data._from_sequence(value)
|
||||
|
||||
xpr = "cannot set using a {} indexer with a different length"
|
||||
with pytest.raises(ValueError, match=xpr.format("list-like")):
|
||||
ser[[0, 1]] = value
|
||||
# Ensure no modifications made before the exception
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
with pytest.raises(ValueError, match=xpr.format("slice")):
|
||||
ser[slice(3)] = value
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
def test_setitem_empty_indxer(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
data[np.array([], dtype=int)] = []
|
||||
self.assert_equal(data, original)
|
||||
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[[0, 1]] = data[2]
|
||||
assert data[0] == data[2]
|
||||
assert data[1] == data[2]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", "iloc"])
|
||||
def test_setitem_scalar(self, data, setter):
|
||||
arr = pd.Series(data)
|
||||
setter = getattr(arr, setter)
|
||||
operator.setitem(setter, 0, data[1])
|
||||
assert arr[0] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.loc[0, "B"] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.iloc[0, 1] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.iloc[10, 0] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.iloc[10, 1] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
@pytest.mark.parametrize("as_callable", [True, False])
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if as_callable:
|
||||
mask2 = lambda x: mask
|
||||
else:
|
||||
mask2 = mask
|
||||
|
||||
if setter:
|
||||
# loc
|
||||
target = getattr(ser, setter)
|
||||
else:
|
||||
# Series.__setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask2, data[5:7])
|
||||
|
||||
ser[mask2] = data[5:7]
|
||||
assert ser[0] == data[5]
|
||||
assert ser[1] == data[6]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if setter: # loc
|
||||
target = getattr(ser, setter)
|
||||
else: # __setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask, data[10])
|
||||
assert ser[0] == data[10]
|
||||
assert ser[1] == data[10]
|
||||
|
||||
def test_setitem_expand_columns(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
result = df.copy()
|
||||
result["B"] = 1
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = 1
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite with new type
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": data, "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_expand_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
result = df.copy()
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = data
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_invalid_length(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
xpr = "Length of values does not match length of index"
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
df["B"] = data[:5]
|
||||
|
||||
@pytest.mark.xfail(reason="GH#20441: setitem on extension types.")
|
||||
def test_setitem_tuple_index(self, data):
|
||||
s = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
||||
expected = pd.Series(data.take([1, 1]), index=s.index)
|
||||
s[(0, 1)] = data[1]
|
||||
self.assert_series_equal(s, expected)
|
||||
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
arr = data[:5]
|
||||
with pytest.raises(ValueError):
|
||||
arr[:1] = arr[:2]
|
||||
|
||||
def test_setitem_slice_array(self, data):
|
||||
arr = data[:5].copy()
|
||||
arr[:5] = data[-5:]
|
||||
self.assert_extension_array_equal(arr, data[-5:])
|
||||
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
arr = data[:5].copy()
|
||||
with pytest.raises(ValueError):
|
||||
arr[0] = arr[[0, 1]]
|
@@ -0,0 +1,178 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""A fixture providing the ExtensionDtype to validate."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not be equal
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
"""Length-100 array in which all the elements are two."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length-2 array with [NA, Valid]"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_repeated(data):
|
||||
"""
|
||||
Generate many datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : fixture implementing `data`
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[int], Generator]:
|
||||
A callable that takes a `count` argument and
|
||||
returns a generator yielding `count` datasets.
|
||||
"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield data
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
"""Binary operator for comparing NA values.
|
||||
|
||||
Should return a function of two arguments that returns
|
||||
True if both arguments are (scalar) NA for your type.
|
||||
|
||||
By default, uses ``operator.is_``
|
||||
"""
|
||||
return operator.is_
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
"""The scalar missing value for this type. Default 'None'"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def box_in_series(request):
|
||||
"""Whether to box the data in a Series"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def groupby_apply_op(request):
|
||||
"""
|
||||
Functions to test groupby.apply().
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_frame(request):
|
||||
"""
|
||||
Boolean fixture to support Series and Series.to_frame() comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_series(request):
|
||||
"""
|
||||
Boolean fixture to support arr and Series(arr) comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def use_numpy(request):
|
||||
"""
|
||||
Boolean fixture to support comparison testing of ExtensionDtype array
|
||||
and numpy array.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["ffill", "bfill"])
|
||||
def fillna_method(request):
|
||||
"""
|
||||
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
|
||||
Series.fillna(method=<method>) testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_array(request):
|
||||
"""
|
||||
Boolean fixture to support ExtensionDtype _from_sequence method testing.
|
||||
"""
|
||||
return request.param
|
@@ -0,0 +1,3 @@
|
||||
from .array import DecimalArray, DecimalDtype, make_data, to_decimal
|
||||
|
||||
__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]
|
@@ -0,0 +1,189 @@
|
||||
import decimal
|
||||
import numbers
|
||||
import random
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import register_extension_dtype
|
||||
from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DecimalDtype(ExtensionDtype):
|
||||
type = decimal.Decimal
|
||||
name = "decimal"
|
||||
na_value = decimal.Decimal("NaN")
|
||||
_metadata = ("context",)
|
||||
|
||||
def __init__(self, context=None):
|
||||
self.context = context or decimal.getcontext()
|
||||
|
||||
def __repr__(self):
|
||||
return "DecimalDtype(context={})".format(self.context)
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return DecimalArray
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
|
||||
|
||||
@property
|
||||
def _is_numeric(self):
|
||||
return True
|
||||
|
||||
|
||||
class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False, context=None):
|
||||
for val in values:
|
||||
if not isinstance(val, decimal.Decimal):
|
||||
raise TypeError("All values must be of type " + str(decimal.Decimal))
|
||||
values = np.asarray(values, dtype=object)
|
||||
|
||||
self._data = values
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self.data = self._data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
self._dtype = DecimalDtype(context)
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
|
||||
return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls(values)
|
||||
|
||||
_HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
|
||||
|
||||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
||||
#
|
||||
if not all(
|
||||
isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
|
||||
):
|
||||
return NotImplemented
|
||||
|
||||
inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
|
||||
result = getattr(ufunc, method)(*inputs, **kwargs)
|
||||
|
||||
def reconstruct(x):
|
||||
if isinstance(x, (decimal.Decimal, numbers.Number)):
|
||||
return x
|
||||
else:
|
||||
return DecimalArray._from_sequence(x)
|
||||
|
||||
if isinstance(result, tuple):
|
||||
return tuple(reconstruct(x) for x in result)
|
||||
else:
|
||||
return reconstruct(result)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self._data[item]
|
||||
else:
|
||||
return type(self)(self._data[item])
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
from pandas.api.extensions import take
|
||||
|
||||
data = self._data
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
|
||||
return self._from_sequence(result)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self._data.copy())
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
if isinstance(dtype, type(self.dtype)):
|
||||
return type(self)(self._data, context=dtype.context)
|
||||
return np.asarray(self, dtype=dtype)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if pd.api.types.is_list_like(value):
|
||||
if pd.api.types.is_scalar(key):
|
||||
raise ValueError("setting an array element with a sequence.")
|
||||
value = [decimal.Decimal(v) for v in value]
|
||||
else:
|
||||
value = decimal.Decimal(value)
|
||||
self._data[key] = value
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
n = len(self)
|
||||
if n:
|
||||
return n * sys.getsizeof(self[0])
|
||||
return 0
|
||||
|
||||
def isna(self):
|
||||
return np.array([x.is_nan() for x in self._data], dtype=bool)
|
||||
|
||||
@property
|
||||
def _na_value(self):
|
||||
return decimal.Decimal("NaN")
|
||||
|
||||
def _formatter(self, boxed=False):
|
||||
if boxed:
|
||||
return "Decimal: {0}".format
|
||||
return repr
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
return cls(np.concatenate([x._data for x in to_concat]))
|
||||
|
||||
def _reduce(self, name, skipna=True, **kwargs):
|
||||
|
||||
if skipna:
|
||||
raise NotImplementedError("decimal does not support skipna=True")
|
||||
|
||||
try:
|
||||
op = getattr(self.data, name)
|
||||
except AttributeError:
|
||||
raise NotImplementedError(
|
||||
"decimal does not support the {} operation".format(name)
|
||||
)
|
||||
return op(axis=0)
|
||||
|
||||
|
||||
def to_decimal(values, context=None):
|
||||
return DecimalArray([decimal.Decimal(x) for x in values], context=context)
|
||||
|
||||
|
||||
def make_data():
|
||||
return [decimal.Decimal(random.random()) for _ in range(100)]
|
||||
|
||||
|
||||
DecimalArray._add_arithmetic_ops()
|
||||
DecimalArray._add_comparison_ops()
|
@@ -0,0 +1,439 @@
|
||||
import decimal
|
||||
import math
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .array import DecimalArray, DecimalDtype, make_data, to_decimal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return DecimalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return DecimalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
return DecimalArray([decimal.Decimal(2) for _ in range(100)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return DecimalArray(
|
||||
[decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return DecimalArray(
|
||||
[decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda x, y: x.is_nan() and y.is_nan()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return decimal.Decimal("NaN")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
b = decimal.Decimal("1.0")
|
||||
a = decimal.Decimal("0.0")
|
||||
c = decimal.Decimal("2.0")
|
||||
na = decimal.Decimal("NaN")
|
||||
return DecimalArray([b, b, na, na, a, a, b, c])
|
||||
|
||||
|
||||
class BaseDecimal:
|
||||
def assert_series_equal(self, left, right, *args, **kwargs):
|
||||
def convert(x):
|
||||
# need to convert array([Decimal(NaN)], dtype='object') to np.NaN
|
||||
# because Series[object].isnan doesn't recognize decimal(NaN) as
|
||||
# NA.
|
||||
try:
|
||||
return math.isnan(x)
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
if left.dtype == "object":
|
||||
left_na = left.apply(convert)
|
||||
else:
|
||||
left_na = left.isna()
|
||||
if right.dtype == "object":
|
||||
right_na = right.apply(convert)
|
||||
else:
|
||||
right_na = right.isna()
|
||||
|
||||
tm.assert_series_equal(left_na, right_na)
|
||||
return tm.assert_series_equal(left[~left_na], right[~right_na], *args, **kwargs)
|
||||
|
||||
def assert_frame_equal(self, left, right, *args, **kwargs):
|
||||
# TODO(EA): select_dtypes
|
||||
tm.assert_index_equal(
|
||||
left.columns,
|
||||
right.columns,
|
||||
exact=kwargs.get("check_column_type", "equiv"),
|
||||
check_names=kwargs.get("check_names", True),
|
||||
check_exact=kwargs.get("check_exact", False),
|
||||
check_categorical=kwargs.get("check_categorical", True),
|
||||
obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")),
|
||||
)
|
||||
|
||||
decimals = (left.dtypes == "decimal").index
|
||||
|
||||
for col in decimals:
|
||||
self.assert_series_equal(left[col], right[col], *args, **kwargs)
|
||||
|
||||
left = left.drop(columns=decimals)
|
||||
right = right.drop(columns=decimals)
|
||||
tm.assert_frame_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
class TestDtype(BaseDecimal, base.BaseDtypeTests):
|
||||
def test_hashable(self, dtype):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseDecimal, base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
|
||||
@pytest.mark.skip(reason="not implemented constructor from dtype")
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseDecimal, base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
|
||||
def test_take_na_value_other_decimal(self):
|
||||
arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
||||
result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
|
||||
expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestMissing(BaseDecimal, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class Reduce:
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
|
||||
if skipna or op_name in ["median", "skew", "kurt"]:
|
||||
with pytest.raises(NotImplementedError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
expected = getattr(np.asarray(s), op_name)()
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseDecimal, base.BaseMethodsTests):
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
@pytest.mark.xfail(reason="value_counts not implemented yet.")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestCasting(BaseDecimal, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
|
||||
@pytest.mark.xfail(
|
||||
reason="needs to correctly define __eq__ to handle nans, xref #27081."
|
||||
)
|
||||
def test_groupby_apply_identity(self, data_for_grouping):
|
||||
super().test_groupby_apply_identity(data_for_grouping)
|
||||
|
||||
|
||||
class TestSetitem(BaseDecimal, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseDecimal, base.BasePrintingTests):
|
||||
def test_series_repr(self, data):
|
||||
# Overriding this base test to explicitly test that
|
||||
# the custom _formatter is used
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
assert "Decimal: " in repr(ser)
|
||||
|
||||
|
||||
# TODO(extension)
|
||||
@pytest.mark.xfail(
|
||||
reason=(
|
||||
"raising AssertionError as this is not implemented, though easy enough to do"
|
||||
)
|
||||
)
|
||||
def test_series_constructor_coerce_data_to_extension_dtype_raises():
|
||||
xpr = (
|
||||
"Cannot cast data to extension dtype 'decimal'. Pass the "
|
||||
"extension array directly."
|
||||
)
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
pd.Series([0, 1, 2], dtype=DecimalDtype())
|
||||
|
||||
|
||||
def test_series_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
result = pd.Series(arr, dtype=DecimalDtype())
|
||||
expected = pd.Series(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(arr, dtype="int64")
|
||||
expected = pd.Series([10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_dataframe_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
|
||||
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
|
||||
expected = pd.DataFrame({"A": arr})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
result = pd.DataFrame({"A": arr}, dtype="int64")
|
||||
expected = pd.DataFrame({"A": [10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_astype_dispatches(frame):
|
||||
# This is a dtype-specific test that ensures Series[decimal].astype
|
||||
# gets all the way through to ExtensionArray.astype
|
||||
# Designing a reliable smoke test that works for arbitrary data types
|
||||
# is difficult.
|
||||
data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
|
||||
ctx = decimal.Context()
|
||||
ctx.prec = 5
|
||||
|
||||
if frame:
|
||||
data = data.to_frame()
|
||||
|
||||
result = data.astype(DecimalDtype(ctx))
|
||||
|
||||
if frame:
|
||||
result = result["a"]
|
||||
|
||||
assert result.dtype.context.prec == ctx.prec
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super().check_opname(s, op_name, other, exc=None)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
context = decimal.getcontext()
|
||||
divbyzerotrap = context.traps[decimal.DivisionByZero]
|
||||
invalidoptrap = context.traps[decimal.InvalidOperation]
|
||||
context.traps[decimal.DivisionByZero] = 0
|
||||
context.traps[decimal.InvalidOperation] = 0
|
||||
|
||||
# Decimal supports ops with int, but not float
|
||||
other = pd.Series([int(d * 100) for d in data])
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
if "mod" not in op_name:
|
||||
self.check_opname(s, op_name, s * 2)
|
||||
|
||||
self.check_opname(s, op_name, 0)
|
||||
self.check_opname(s, op_name, 5)
|
||||
context.traps[decimal.DivisionByZero] = divbyzerotrap
|
||||
context.traps[decimal.InvalidOperation] = invalidoptrap
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
# We implement divmod
|
||||
super()._check_divmod_op(s, op, other, exc=None)
|
||||
|
||||
def test_error(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests):
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super().check_opname(s, op_name, other, exc=None)
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, 0.5)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
alter = np.random.choice([-1, 0, 1], len(data))
|
||||
# Randomly double, halve or keep same value
|
||||
other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
|
||||
class DecimalArrayWithoutFromSequence(DecimalArray):
|
||||
"""Helper class for testing error handling in _from_sequence."""
|
||||
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
raise KeyError("For the test")
|
||||
|
||||
|
||||
class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
|
||||
@classmethod
|
||||
def _create_arithmetic_method(cls, op):
|
||||
return cls._create_method(op, coerce_to_dtype=False)
|
||||
|
||||
|
||||
DecimalArrayWithoutCoercion._add_arithmetic_ops()
|
||||
|
||||
|
||||
def test_combine_from_sequence_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/22850
|
||||
ser = pd.Series(
|
||||
DecimalArrayWithoutFromSequence(
|
||||
[decimal.Decimal("1.0"), decimal.Decimal("2.0")]
|
||||
)
|
||||
)
|
||||
result = ser.combine(ser, operator.add)
|
||||
|
||||
# note: object dtype
|
||||
expected = pd.Series(
|
||||
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
|
||||
)
|
||||
def test_scalar_ops_from_sequence_raises(class_):
|
||||
# op(EA, EA) should return an EA, or an ndarray if it's not possible
|
||||
# to return an EA with the return values.
|
||||
arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
||||
result = arr + arr
|
||||
expected = np.array(
|
||||
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reverse, expected_div, expected_mod",
|
||||
[(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
|
||||
)
|
||||
def test_divmod_array(reverse, expected_div, expected_mod):
|
||||
# https://github.com/pandas-dev/pandas/issues/22930
|
||||
arr = to_decimal([1, 2, 3, 4])
|
||||
if reverse:
|
||||
div, mod = divmod(2, arr)
|
||||
else:
|
||||
div, mod = divmod(arr, 2)
|
||||
expected_div = to_decimal(expected_div)
|
||||
expected_mod = to_decimal(expected_mod)
|
||||
|
||||
tm.assert_extension_array_equal(div, expected_div)
|
||||
tm.assert_extension_array_equal(mod, expected_mod)
|
||||
|
||||
|
||||
def test_ufunc_fallback(data):
|
||||
a = data[:5]
|
||||
s = pd.Series(a, index=range(3, 8))
|
||||
result = np.abs(s)
|
||||
expected = pd.Series(np.abs(a), index=range(3, 8))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_formatting_values_deprecated():
|
||||
class DecimalArray2(DecimalArray):
|
||||
def _formatting_values(self):
|
||||
return np.array(self)
|
||||
|
||||
ser = pd.Series(DecimalArray2([decimal.Decimal("1.0")]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
repr(ser)
|
||||
|
||||
|
||||
def test_array_ufunc():
|
||||
a = to_decimal([1, 2, 3])
|
||||
result = np.exp(a)
|
||||
expected = to_decimal(np.exp(a._data))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series():
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
result = np.exp(s)
|
||||
expected = pd.Series(to_decimal(np.exp(a._data)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series_scalar_other():
|
||||
# check _HANDLED_TYPES
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
result = np.add(s, decimal.Decimal(1))
|
||||
expected = pd.Series(np.add(a, decimal.Decimal(1)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series_defer():
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
|
||||
expected = pd.Series(to_decimal([2, 4, 6]))
|
||||
r1 = np.add(s, a)
|
||||
r2 = np.add(a, s)
|
||||
|
||||
tm.assert_series_equal(r1, expected)
|
||||
tm.assert_series_equal(r2, expected)
|
@@ -0,0 +1,3 @@
|
||||
from .array import JSONArray, JSONDtype, make_data
|
||||
|
||||
__all__ = ["JSONArray", "JSONDtype", "make_data"]
|
@@ -0,0 +1,198 @@
|
||||
"""Test extension array for storing nested data in a pandas container.
|
||||
|
||||
The JSONArray stores lists of dictionaries. The storage mechanism is a list,
|
||||
not an ndarray.
|
||||
|
||||
Note:
|
||||
|
||||
We currently store lists of UserDicts. Pandas has a few places
|
||||
internally that specifically check for dicts, and does non-scalar things
|
||||
in that case. We *want* the dictionaries to be treated as scalars, so we
|
||||
hack around pandas by using UserDicts.
|
||||
"""
|
||||
from collections import UserDict, abc
|
||||
import itertools
|
||||
import numbers
|
||||
import random
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class JSONDtype(ExtensionDtype):
|
||||
type = abc.Mapping
|
||||
name = "json"
|
||||
na_value = UserDict()
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return JSONArray
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
|
||||
|
||||
|
||||
class JSONArray(ExtensionArray):
|
||||
dtype = JSONDtype()
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False):
|
||||
for val in values:
|
||||
if not isinstance(val, self.dtype.type):
|
||||
raise TypeError("All values must be of type " + str(self.dtype.type))
|
||||
self.data = values
|
||||
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self._data = self.data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls([UserDict(x) for x in values if x != ()])
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self.data[item]
|
||||
elif isinstance(item, np.ndarray) and item.dtype == "bool":
|
||||
return self._from_sequence([x for x, m in zip(self, item) if m])
|
||||
elif isinstance(item, abc.Iterable):
|
||||
# fancy indexing
|
||||
return type(self)([self.data[i] for i in item])
|
||||
else:
|
||||
# slice
|
||||
return type(self)(self.data[item])
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(key, numbers.Integral):
|
||||
self.data[key] = value
|
||||
else:
|
||||
if not isinstance(value, (type(self), abc.Sequence)):
|
||||
# broadcast value
|
||||
value = itertools.cycle([value])
|
||||
|
||||
if isinstance(key, np.ndarray) and key.dtype == "bool":
|
||||
# masking
|
||||
for i, (k, v) in enumerate(zip(key, value)):
|
||||
if k:
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[i] = v
|
||||
else:
|
||||
for k, v in zip(key, value):
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[k] = v
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return sys.getsizeof(self.data)
|
||||
|
||||
def isna(self):
|
||||
return np.array([x == self.dtype.na_value for x in self.data], dtype=bool)
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
# re-implement here, since NumPy has trouble setting
|
||||
# sized objects like UserDicts into scalar slots of
|
||||
# an ndarary.
|
||||
indexer = np.asarray(indexer)
|
||||
msg = (
|
||||
"Index is out of bounds or cannot do a "
|
||||
"non-empty take from an empty array."
|
||||
)
|
||||
|
||||
if allow_fill:
|
||||
if fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
# bounds check
|
||||
if (indexer < -1).any():
|
||||
raise ValueError
|
||||
try:
|
||||
output = [
|
||||
self.data[loc] if loc != -1 else fill_value for loc in indexer
|
||||
]
|
||||
except IndexError:
|
||||
raise IndexError(msg)
|
||||
else:
|
||||
try:
|
||||
output = [self.data[loc] for loc in indexer]
|
||||
except IndexError:
|
||||
raise IndexError(msg)
|
||||
|
||||
return self._from_sequence(output)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self.data[:])
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# NumPy has issues when all the dicts are the same length.
|
||||
# np.array([UserDict(...), UserDict(...)]) fails,
|
||||
# but np.array([{...}, {...}]) works, so cast.
|
||||
|
||||
# needed to add this check for the Series constructor
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
|
||||
|
||||
def unique(self):
|
||||
# Parent method doesn't work since np.array will try to infer
|
||||
# a 2-dim object.
|
||||
return type(self)(
|
||||
[dict(x) for x in list({tuple(d.items()) for d in self.data})]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
|
||||
return cls(data)
|
||||
|
||||
def _values_for_factorize(self):
|
||||
frozen = self._values_for_argsort()
|
||||
if len(frozen) == 0:
|
||||
# _factorize_array expects 1-d array, this is a len-0 2-d array.
|
||||
frozen = frozen.ravel()
|
||||
return frozen, ()
|
||||
|
||||
def _values_for_argsort(self):
|
||||
# Disable NumPy's shape inference by including an empty tuple...
|
||||
# If all the elemnts of self are the same size P, NumPy will
|
||||
# cast them to an (N, P) array, instead of an (N,) array of tuples.
|
||||
frozen = [()] + [tuple(x.items()) for x in self]
|
||||
return np.array(frozen, dtype=object)[1:]
|
||||
|
||||
|
||||
def make_data():
|
||||
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
|
||||
return [
|
||||
UserDict(
|
||||
[
|
||||
(random.choice(string.ascii_letters), random.randint(0, 100))
|
||||
for _ in range(random.randint(0, 10))
|
||||
]
|
||||
)
|
||||
for _ in range(100)
|
||||
]
|
@@ -0,0 +1,312 @@
|
||||
import collections
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY36
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .array import JSONArray, JSONDtype, make_data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return JSONDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
data = make_data()
|
||||
|
||||
# Why the while loop? NumPy is unable to construct an ndarray from
|
||||
# equal-length ndarrays. Many of our operations involve coercing the
|
||||
# EA to an ndarray of objects. To avoid random test failures, we ensure
|
||||
# that our data is coercible to an ndarray. Several tests deal with only
|
||||
# the first two elements, so that's what we'll check.
|
||||
|
||||
while len(data[0]) == len(data[1]):
|
||||
data = make_data()
|
||||
|
||||
return JSONArray(data)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return JSONArray([{}, {"a": 10}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return JSONArray([{"b": 1}, {}, {"a": 4}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value(dtype):
|
||||
return dtype.na_value
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return operator.eq
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return JSONArray(
|
||||
[
|
||||
{"b": 1},
|
||||
{"b": 1},
|
||||
{},
|
||||
{},
|
||||
{"a": 0, "c": 2},
|
||||
{"a": 0, "c": 2},
|
||||
{"b": 1},
|
||||
{"c": 2},
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class BaseJSON:
|
||||
# NumPy doesn't handle an array of equal-length UserDicts.
|
||||
# The default assert_series_equal eventually does a
|
||||
# Series.values, which raises. We work around it by
|
||||
# converting the UserDicts to dicts.
|
||||
def assert_series_equal(self, left, right, **kwargs):
|
||||
if left.dtype.name == "json":
|
||||
assert left.dtype == right.dtype
|
||||
left = pd.Series(
|
||||
JSONArray(left.values.astype(object)), index=left.index, name=left.name
|
||||
)
|
||||
right = pd.Series(
|
||||
JSONArray(right.values.astype(object)),
|
||||
index=right.index,
|
||||
name=right.name,
|
||||
)
|
||||
tm.assert_series_equal(left, right, **kwargs)
|
||||
|
||||
def assert_frame_equal(self, left, right, *args, **kwargs):
|
||||
tm.assert_index_equal(
|
||||
left.columns,
|
||||
right.columns,
|
||||
exact=kwargs.get("check_column_type", "equiv"),
|
||||
check_names=kwargs.get("check_names", True),
|
||||
check_exact=kwargs.get("check_exact", False),
|
||||
check_categorical=kwargs.get("check_categorical", True),
|
||||
obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")),
|
||||
)
|
||||
|
||||
jsons = (left.dtypes == "json").index
|
||||
|
||||
for col in jsons:
|
||||
self.assert_series_equal(left[col], right[col], *args, **kwargs)
|
||||
|
||||
left = left.drop(columns=jsons)
|
||||
right = right.drop(columns=jsons)
|
||||
tm.assert_frame_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
class TestDtype(BaseJSON, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseJSON, base.BaseInterfaceTests):
|
||||
def test_custom_asserts(self):
|
||||
# This would always trigger the KeyError from trying to put
|
||||
# an array of equal-length UserDicts inside an ndarray.
|
||||
data = JSONArray(
|
||||
[
|
||||
collections.UserDict({"a": 1}),
|
||||
collections.UserDict({"b": 2}),
|
||||
collections.UserDict({"c": 3}),
|
||||
]
|
||||
)
|
||||
a = pd.Series(data)
|
||||
self.assert_series_equal(a, a)
|
||||
self.assert_frame_equal(a.to_frame(), a.to_frame())
|
||||
|
||||
b = pd.Series(data.take([0, 0, 1]))
|
||||
with pytest.raises(AssertionError):
|
||||
self.assert_series_equal(a, b)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
self.assert_frame_equal(a.to_frame(), b.to_frame())
|
||||
|
||||
|
||||
class TestConstructors(BaseJSON, base.BaseConstructorsTests):
|
||||
@pytest.mark.skip(reason="not implemented constructor from dtype")
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseJSON, base.BaseReshapingTests):
|
||||
@pytest.mark.skip(reason="Different definitions of NA")
|
||||
def test_stack(self):
|
||||
"""
|
||||
The test does .astype(object).stack(). If we happen to have
|
||||
any missing values in `data`, then we'll end up with different
|
||||
rows since we consider `{}` NA, but `.astype(object)` doesn't.
|
||||
"""
|
||||
|
||||
@pytest.mark.xfail(reason="dict for NA")
|
||||
def test_unstack(self, data, index):
|
||||
# The base test has NaN for the expected NA value.
|
||||
# this matches otherwise
|
||||
return super().test_unstack(data, index)
|
||||
|
||||
|
||||
class TestGetitem(BaseJSON, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BaseJSON, base.BaseMissingTests):
|
||||
@pytest.mark.skip(reason="Setting a dict as a scalar")
|
||||
def test_fillna_series(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
|
||||
@pytest.mark.skip(reason="Setting a dict as a scalar")
|
||||
def test_fillna_frame(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
|
||||
|
||||
unhashable = pytest.mark.skip(reason="Unhashable")
|
||||
unstable = pytest.mark.skipif(
|
||||
not PY36, reason="Dictionary order unstable" # 3.6 or higher
|
||||
)
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseJSON, base.BaseMethodsTests):
|
||||
@unhashable
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@unhashable
|
||||
def test_sort_values_frame(self):
|
||||
# TODO (EA.factorize): see if _values_for_factorize allows this.
|
||||
pass
|
||||
|
||||
@unstable
|
||||
def test_argsort(self, data_for_sorting):
|
||||
super().test_argsort(data_for_sorting)
|
||||
|
||||
@unstable
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
super().test_argsort_missing(data_missing_for_sorting)
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending):
|
||||
super().test_sort_values(data_for_sorting, ascending)
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_missing(self, data_missing_for_sorting, ascending):
|
||||
super().test_sort_values_missing(data_missing_for_sorting, ascending)
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_le(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_add(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_first(self, data):
|
||||
pass
|
||||
|
||||
@unhashable
|
||||
def test_hash_pandas_object_works(self, data, kind):
|
||||
super().test_hash_pandas_object_works(data, kind)
|
||||
|
||||
@pytest.mark.skip(reason="broadcasting error")
|
||||
def test_where_series(self, data, na_value):
|
||||
# Fails with
|
||||
# *** ValueError: operands could not be broadcast together
|
||||
# with shapes (4,) (4,) (0,)
|
||||
super().test_where_series(data, na_value)
|
||||
|
||||
@pytest.mark.skip(reason="Can't compare dicts.")
|
||||
def test_searchsorted(self, data_for_sorting):
|
||||
super().test_searchsorted(data_for_sorting)
|
||||
|
||||
|
||||
class TestCasting(BaseJSON, base.BaseCastingTests):
|
||||
@pytest.mark.skip(reason="failing on np.array(self, dtype=str)")
|
||||
def test_astype_str(self):
|
||||
"""This currently fails in NumPy on np.array(self, dtype=str) with
|
||||
|
||||
*** ValueError: setting an array element with a sequence
|
||||
"""
|
||||
|
||||
|
||||
# We intentionally don't run base.BaseSetitemTests because pandas'
|
||||
# internals has trouble setting sequences of values into scalar positions.
|
||||
|
||||
|
||||
class TestGroupby(BaseJSON, base.BaseGroupbyTests):
|
||||
@unhashable
|
||||
def test_groupby_extension_transform(self):
|
||||
"""
|
||||
This currently fails in Series.name.setter, since the
|
||||
name must be hashable, but the value is a dictionary.
|
||||
I think this is what we want, i.e. `.name` should be the original
|
||||
values, and not the values for factorization.
|
||||
"""
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_apply(self):
|
||||
"""
|
||||
This fails in Index._do_unique_check with
|
||||
|
||||
> hash(val)
|
||||
E TypeError: unhashable type: 'UserDict' with
|
||||
|
||||
I suspect that once we support Index[ExtensionArray],
|
||||
we'll be able to dispatch unique.
|
||||
"""
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super().test_groupby_extension_agg(as_index, data_for_grouping)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
with pytest.raises(TypeError, match="unsupported"):
|
||||
ser + data
|
||||
|
||||
def test_divmod_series_array(self):
|
||||
# GH 23287
|
||||
# skipping because it is not implemented
|
||||
pass
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
return super()._check_divmod_op(s, op, other, exc=TypeError)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseJSON, base.BasePrintingTests):
|
||||
pass
|
@@ -0,0 +1,245 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical
|
||||
from pandas.api.types import CategoricalDtype
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def make_data():
|
||||
while True:
|
||||
values = np.random.choice(list(string.ascii_letters), size=100)
|
||||
# ensure we meet the requirements
|
||||
# 1. first two not null
|
||||
# 2. first and second are different
|
||||
if values[0] != values[1]:
|
||||
break
|
||||
return values
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return CategoricalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not gbe equal
|
||||
"""
|
||||
return Categorical(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return Categorical([np.nan, "A"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return Categorical(["A", "B", "C"], categories=["C", "A", "B"], ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return Categorical(["A", None, "B"], categories=["B", "A"], ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return Categorical(["a", "a", None, None, "b", "b", "a", "c"])
|
||||
|
||||
|
||||
class TestDtype(base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(base.BaseInterfaceTests):
|
||||
@pytest.mark.skip(reason="Memory usage doesn't match")
|
||||
def test_memory_usage(self, data):
|
||||
# Is this deliberate?
|
||||
super().test_memory_usage(data)
|
||||
|
||||
|
||||
class TestConstructors(base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(base.BaseReshapingTests):
|
||||
def test_ravel(self, data):
|
||||
# GH#27199 Categorical.ravel returns self until after deprecation cycle
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
data.ravel()
|
||||
|
||||
|
||||
class TestGetitem(base.BaseGetitemTests):
|
||||
skip_take = pytest.mark.skip(reason="GH-20664.")
|
||||
|
||||
@pytest.mark.skip(reason="Backwards compatibility")
|
||||
def test_getitem_scalar(self, data):
|
||||
# CategoricalDtype.type isn't "correct" since it should
|
||||
# be a parent of the elements (object). But don't want
|
||||
# to break things by changing.
|
||||
super().test_getitem_scalar(data)
|
||||
|
||||
@skip_take
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
# TODO remove this once Categorical.take is fixed
|
||||
super().test_take(data, na_value, na_cmp)
|
||||
|
||||
@skip_take
|
||||
def test_take_negative(self, data):
|
||||
super().test_take_negative(data)
|
||||
|
||||
@skip_take
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
super().test_take_pandas_style_negative_raises(data, na_value)
|
||||
|
||||
@skip_take
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
super().test_take_non_na_fill_value(data_missing)
|
||||
|
||||
@skip_take
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
return super().test_take_out_of_bounds_raises(data, allow_fill)
|
||||
|
||||
@pytest.mark.skip(reason="GH-20747. Unobserved categories.")
|
||||
def test_take_series(self, data):
|
||||
super().test_take_series(data)
|
||||
|
||||
@skip_take
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
super().test_reindex_non_na_fill_value(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Categorical.take buggy")
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
super().test_take_empty(data, na_value, na_cmp)
|
||||
|
||||
@pytest.mark.skip(reason="test not written correctly for categorical")
|
||||
def test_reindex(self, data, na_value):
|
||||
super().test_reindex(data, na_value)
|
||||
|
||||
|
||||
class TestSetitem(base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(base.BaseMissingTests):
|
||||
@pytest.mark.skip(reason="Not implemented")
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Not implemented")
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="Unobserved categories included")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
return super().test_value_counts(all_data, dropna)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
# When adding categoricals in combine, result is a string
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
([a + b for (a, b) in zip(list(orig_data1), list(orig_data2))])
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series([a + val for a in list(orig_data1)])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
super().test_fillna_length_mismatch(data_missing)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting):
|
||||
if not data_for_sorting.ordered:
|
||||
raise pytest.skip(reason="searchsorted requires ordered data.")
|
||||
|
||||
|
||||
class TestCasting(base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(base.BaseArithmeticOpsTests):
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
if op_name != "__rmod__":
|
||||
super().test_arith_series_with_scalar(data, op_name)
|
||||
else:
|
||||
pytest.skip("rmod never called when string is first argument")
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
with pytest.raises(TypeError, match="cannot perform"):
|
||||
ser + data
|
||||
|
||||
def test_divmod_series_array(self):
|
||||
# GH 23287
|
||||
# skipping because it is not implemented
|
||||
pass
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
return super()._check_divmod_op(s, op, other, exc=TypeError)
|
||||
|
||||
|
||||
class TestComparisonOps(base.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
if op_name == "__eq__":
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, lambda x, y: x == y)
|
||||
assert (result == expected).all()
|
||||
|
||||
elif op_name == "__ne__":
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, lambda x, y: x != y)
|
||||
assert (result == expected).all()
|
||||
|
||||
else:
|
||||
with pytest.raises(TypeError):
|
||||
op(data, other)
|
||||
|
||||
|
||||
class TestParsing(base.BaseParsingTests):
|
||||
pass
|
@@ -0,0 +1,81 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes import dtypes
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class DummyDtype(dtypes.ExtensionDtype):
|
||||
pass
|
||||
|
||||
|
||||
class DummyArray(ExtensionArray):
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def __array__(self, dtype):
|
||||
return self.data
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return DummyDtype()
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# we don't support anything but a single dtype
|
||||
if isinstance(dtype, DummyDtype):
|
||||
if copy:
|
||||
return type(self)(self.data)
|
||||
return self
|
||||
|
||||
return np.array(self, dtype=dtype, copy=copy)
|
||||
|
||||
|
||||
class TestExtensionArrayDtype:
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
pd.Categorical([]),
|
||||
pd.Categorical([]).dtype,
|
||||
pd.Series(pd.Categorical([])),
|
||||
DummyDtype(),
|
||||
DummyArray(np.array([1, 2])),
|
||||
],
|
||||
)
|
||||
def test_is_extension_array_dtype(self, values):
|
||||
assert is_extension_array_dtype(values)
|
||||
|
||||
@pytest.mark.parametrize("values", [np.array([]), pd.Series(np.array([]))])
|
||||
def test_is_not_extension_array_dtype(self, values):
|
||||
assert not is_extension_array_dtype(values)
|
||||
|
||||
|
||||
def test_astype():
|
||||
|
||||
arr = DummyArray(np.array([1, 2, 3]))
|
||||
expected = np.array([1, 2, 3], dtype=object)
|
||||
|
||||
result = arr.astype(object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_no_copy():
|
||||
arr = DummyArray(np.array([1, 2, 3], dtype=np.int64))
|
||||
result = arr.astype(arr.dtype, copy=False)
|
||||
|
||||
assert arr is result
|
||||
|
||||
result = arr.astype(arr.dtype)
|
||||
assert arr is not result
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [dtypes.CategoricalDtype(), dtypes.IntervalDtype()])
|
||||
def test_is_extension_array_dtype(dtype):
|
||||
assert isinstance(dtype, dtypes.ExtensionDtype)
|
||||
assert is_extension_array_dtype(dtype)
|
@@ -0,0 +1,230 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
@pytest.fixture(params=["US/Central"])
|
||||
def dtype(request):
|
||||
return DatetimeTZDtype(unit="ns", tz=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype)
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return DatetimeArray(
|
||||
np.array(["NaT", "2000-01-01"], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
c = pd.Timestamp("2000-01-03")
|
||||
return DatetimeArray(np.array([b, c, a], dtype="datetime64[ns]"), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
return DatetimeArray(np.array([b, "NaT", a], dtype="datetime64[ns]"), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
"""
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
c = pd.Timestamp("2000-01-03")
|
||||
na = "NaT"
|
||||
return DatetimeArray(
|
||||
np.array([b, b, na, na, a, a, b, c], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return a is pd.NaT and a is b
|
||||
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return pd.NaT
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
class BaseDatetimeTests:
|
||||
pass
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Tests
|
||||
class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseDatetimeTests, base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="Incorrect expected")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# Timestamp.__add__(Timestamp) not defined
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests):
|
||||
def test_array_interface(self, data):
|
||||
if data.tz:
|
||||
# np.asarray(DTA) is currently always tz-naive.
|
||||
pytest.skip("GH-23569")
|
||||
else:
|
||||
super().test_array_interface(data)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests):
|
||||
implements = {"__sub__", "__rsub__"}
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# Datetime + Datetime not implemented
|
||||
s = pd.Series(data)
|
||||
msg = "cannot add DatetimeArray and DatetimeArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s + data
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_divmod_series_array(self):
|
||||
# GH 23287
|
||||
# skipping because it is not implemented
|
||||
pass
|
||||
|
||||
@pytest.mark.xfail(reason="different implementation", strict=False)
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# Right now, we have trouble with this. Returning NotImplemented
|
||||
# fails other tests like
|
||||
# tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic::
|
||||
# test_dt64_seris_add_intlike
|
||||
return super(
|
||||
TestArithmeticOps, self
|
||||
).test_direct_arith_with_series_returns_not_implemented(data)
|
||||
|
||||
|
||||
class TestCasting(BaseDatetimeTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
# the base test is not appropriate for us. We raise on comparison
|
||||
# with (some) integers, depending on the value.
|
||||
pass
|
||||
|
||||
@pytest.mark.xfail(reason="different implementation", strict=False)
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
return super(
|
||||
TestComparisonOps, self
|
||||
).test_direct_arith_with_series_returns_not_implemented(data)
|
||||
|
||||
|
||||
class TestMissing(BaseDatetimeTests, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests):
|
||||
@pytest.mark.skip(reason="We have DatetimeTZBlock")
|
||||
def test_concat(self, data, in_frame):
|
||||
pass
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# concat(Series[datetimetz], Series[category]) uses a
|
||||
# plain np.array(values) on the DatetimeArray, which
|
||||
# drops the tz.
|
||||
super().test_concat_mixed_dtypes(data)
|
||||
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, obj):
|
||||
# GH-13287: can't use base test, since building the expected fails.
|
||||
data = DatetimeArray._from_sequence(
|
||||
["2000", "2001", "2002", "2003"], tz="US/Central"
|
||||
)
|
||||
index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"])
|
||||
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
expected = pd.DataFrame(
|
||||
{"A": data.take([0, 1]), "B": data.take([2, 3])},
|
||||
index=pd.Index(["a", "b"], name="b"),
|
||||
)
|
||||
expected.columns.name = "a"
|
||||
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
("A", "A"): data.take([0, 1]),
|
||||
("A", "B"): data.take([2, 3]),
|
||||
("B", "A"): data.take([0, 1]),
|
||||
("B", "B"): data.take([2, 3]),
|
||||
},
|
||||
index=pd.Index(["a", "b"], name="b"),
|
||||
)
|
||||
expected.columns.names = [None, "a"]
|
||||
|
||||
result = ser.unstack(0)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
|
||||
class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseDatetimeTests, base.BasePrintingTests):
|
||||
pass
|
@@ -0,0 +1,74 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import BlockManager, SingleBlockManager
|
||||
from pandas.core.internals.blocks import Block, NonConsolidatableMixIn
|
||||
|
||||
|
||||
class CustomBlock(NonConsolidatableMixIn, Block):
|
||||
|
||||
_holder = np.ndarray
|
||||
|
||||
def formatting_values(self):
|
||||
return np.array(["Val: {}".format(i) for i in self.values])
|
||||
|
||||
def concat_same_type(self, to_concat, placement=None):
|
||||
"""
|
||||
Always concatenate disregarding self.ndim as the values are
|
||||
always 1D in this custom Block
|
||||
"""
|
||||
values = np.concatenate([blk.values for blk in to_concat])
|
||||
return self.make_block_same_class(
|
||||
values, placement=placement or slice(0, len(values), 1)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
df1 = pd.DataFrame({"a": [1, 2, 3]})
|
||||
blocks = df1._data.blocks
|
||||
values = np.arange(3, dtype="int64")
|
||||
custom_block = CustomBlock(values, placement=slice(1, 2))
|
||||
blocks = blocks + (custom_block,)
|
||||
block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index])
|
||||
return pd.DataFrame(block_manager)
|
||||
|
||||
|
||||
def test_custom_repr():
|
||||
values = np.arange(3, dtype="int64")
|
||||
|
||||
# series
|
||||
block = CustomBlock(values, placement=slice(0, 3))
|
||||
|
||||
s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
|
||||
assert repr(s) == "0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64"
|
||||
|
||||
# dataframe
|
||||
block = CustomBlock(values, placement=slice(0, 1))
|
||||
blk_mgr = BlockManager([block], [["col"], range(3)])
|
||||
df = pd.DataFrame(blk_mgr)
|
||||
assert repr(df) == " col\n0 Val: 0\n1 Val: 1\n2 Val: 2"
|
||||
|
||||
|
||||
def test_concat_series():
|
||||
# GH17728
|
||||
values = np.arange(3, dtype="int64")
|
||||
block = CustomBlock(values, placement=slice(0, 3))
|
||||
s = pd.Series(block, pd.RangeIndex(3), fastpath=True)
|
||||
|
||||
res = pd.concat([s, s])
|
||||
assert isinstance(res._data.blocks[0], CustomBlock)
|
||||
|
||||
|
||||
def test_concat_dataframe(df):
|
||||
# GH17728
|
||||
res = pd.concat([df, df])
|
||||
assert isinstance(res._data.blocks[1], CustomBlock)
|
||||
|
||||
|
||||
def test_concat_axis1(df):
|
||||
# GH17954
|
||||
df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})
|
||||
res = pd.concat([df, df2], axis=1)
|
||||
assert isinstance(res._data.blocks[1], CustomBlock)
|
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import integer_array
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
return list(range(1, 9)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return integer_array(make_data(), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
return integer_array(np.ones(100) * 2, dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return integer_array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
return integer_array([1, 2, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
return integer_array([1, np.nan, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
# we are np.nan
|
||||
return lambda x, y: np.isnan(x) and np.isnan(y)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
b = 1
|
||||
a = 0
|
||||
c = 2
|
||||
na = np.nan
|
||||
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
|
||||
|
||||
|
||||
class TestDtype(base.BaseDtypeTests):
|
||||
@pytest.mark.skip(reason="using multiple dtypes")
|
||||
def test_is_dtype_unboxes_dtype(self):
|
||||
# we have multiple dtypes, so skip
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(base.BaseArithmeticOpsTests):
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
# overwriting to indicate ops don't raise an error
|
||||
super().check_opname(s, op_name, other, exc=None)
|
||||
|
||||
def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
|
||||
if exc is None:
|
||||
if s.dtype.is_unsigned_integer and (op_name == "__rsub__"):
|
||||
# TODO see https://github.com/pandas-dev/pandas/issues/22023
|
||||
pytest.skip("unsigned subtraction gives negative values")
|
||||
|
||||
if (
|
||||
hasattr(other, "dtype")
|
||||
and not is_extension_array_dtype(other.dtype)
|
||||
and pd.api.types.is_integer_dtype(other.dtype)
|
||||
):
|
||||
# other is np.int64 and would therefore always result in
|
||||
# upcasting, so keeping other as same numpy_dtype
|
||||
other = other.astype(s.dtype.numpy_dtype)
|
||||
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
|
||||
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
|
||||
expected = expected.astype(float)
|
||||
if op_name == "__rtruediv__":
|
||||
# TODO reverse operators result in object dtype
|
||||
result = result.astype(float)
|
||||
elif op_name.startswith("__r"):
|
||||
# TODO reverse operators result in object dtype
|
||||
# see https://github.com/pandas-dev/pandas/issues/22024
|
||||
expected = expected.astype(s.dtype)
|
||||
result = result.astype(s.dtype)
|
||||
else:
|
||||
# combine method result in 'biggest' (int64) dtype
|
||||
expected = expected.astype(s.dtype)
|
||||
pass
|
||||
if (op_name == "__rpow__") and isinstance(other, pd.Series):
|
||||
# TODO pow on Int arrays gives different result with NA
|
||||
# see https://github.com/pandas-dev/pandas/issues/22022
|
||||
result = result.fillna(1)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(s, other)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=None):
|
||||
super()._check_divmod_op(s, op, other, None)
|
||||
|
||||
@pytest.mark.skip(reason="intNA does not error on ops")
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# other specific errors tested in the integer array specific tests
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(base.BaseComparisonOpsTests):
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super().check_opname(s, op_name, other, exc=None)
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
|
||||
class TestInterface(base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
# for test_concat_mixed_dtypes test
|
||||
# concat of an Integer and Int coerces to object dtype
|
||||
# TODO(jreback) once integrated this would
|
||||
|
||||
|
||||
class TestGetitem(base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(base.BaseMethodsTests):
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
expected.index = expected.index.astype(all_data.dtype)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestCasting(base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestNumericReduce(base.BaseNumericReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestBooleanReduce(base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(base.BaseParsingTests):
|
||||
pass
|
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import Interval
|
||||
from pandas.core.arrays import IntervalArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
N = 100
|
||||
left = np.random.uniform(size=N).cumsum()
|
||||
right = left + np.random.uniform(size=N)
|
||||
return [Interval(l, r) for l, r in zip(left, right)]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return IntervalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
return IntervalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return IntervalArray.from_tuples([None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
a = (0, 1)
|
||||
b = (1, 2)
|
||||
c = (2, 3)
|
||||
return IntervalArray.from_tuples([b, b, None, None, a, a, b, c])
|
||||
|
||||
|
||||
class BaseInterval:
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype(BaseInterval, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(BaseInterval, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseInterval, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseInterval, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGrouping(BaseInterval, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseInterval, base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseInterval, base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="addition is not defined for intervals")
|
||||
def test_combine_add(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BaseInterval, base.BaseMissingTests):
|
||||
# Index.fillna only accepts scalar `value`, so we have to skip all
|
||||
# non-scalar fill tests.
|
||||
unsupported_fill = pytest.mark.skip("Unsupported fillna option.")
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_limit_pad(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_series_method(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_limit_backfill(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_series(self):
|
||||
pass
|
||||
|
||||
def test_non_scalar_raises(self, data_missing):
|
||||
msg = "Got a 'list' instead."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data_missing.fillna([1, 1])
|
||||
|
||||
|
||||
class TestReshaping(BaseInterval, base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(BaseInterval, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseInterval, base.BasePrintingTests):
|
||||
@pytest.mark.skip(reason="custom repr")
|
||||
def test_array_repr(self, data, size):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(BaseInterval, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data):
|
||||
expected_msg = r".*must implement _from_sequence_of_strings.*"
|
||||
with pytest.raises(NotImplementedError, match=expected_msg):
|
||||
super().test_EA_types(engine, data)
|
@@ -0,0 +1,392 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import _np_version_under1p16
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from . import base
|
||||
|
||||
|
||||
@pytest.fixture(params=["float", "object"])
|
||||
def dtype(request):
|
||||
return PandasDtype(np.dtype(request.param))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def allow_in_pandas(monkeypatch):
|
||||
"""
|
||||
A monkeypatch to tells pandas to let us in.
|
||||
|
||||
By default, passing a PandasArray to an index / series / frame
|
||||
constructor will unbox that PandasArray to an ndarray, and treat
|
||||
it as a non-EA column. We don't want people using EAs without
|
||||
reason.
|
||||
|
||||
The mechanism for this is a check against ABCPandasArray
|
||||
in each constructor.
|
||||
|
||||
But, for testing, we need to allow them in pandas. So we patch
|
||||
the _typ of PandasArray, so that we evade the ABCPandasArray
|
||||
check.
|
||||
"""
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(PandasArray, "_typ", "extension")
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(allow_in_pandas, dtype):
|
||||
if dtype.numpy_dtype == "object":
|
||||
return pd.Series([(i,) for i in range(100)]).array
|
||||
return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(allow_in_pandas, dtype):
|
||||
# For NumPy <1.16, np.array([np.nan, (1,)]) raises
|
||||
# ValueError: setting an array element with a sequence.
|
||||
if dtype.numpy_dtype == "object":
|
||||
if _np_version_under1p16:
|
||||
raise pytest.skip("Skipping for NumPy <1.16")
|
||||
return PandasArray(np.array([np.nan, (1,)]))
|
||||
return PandasArray(np.array([np.nan, 1.0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return np.isnan(a) and np.isnan(b)
|
||||
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
# Use an empty tuple for first element, then remove,
|
||||
# to disable np.array's shape inference.
|
||||
return PandasArray(np.array([(), (2,), (3,), (1,)])[1:])
|
||||
return PandasArray(np.array([1, 2, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
return PandasArray(np.array([(1,), np.nan, (0,)]))
|
||||
return PandasArray(np.array([1, np.nan, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(allow_in_pandas, dtype):
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
a, b, c = (1,), (2,), (3,)
|
||||
else:
|
||||
a, b, c = np.arange(3)
|
||||
return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def skip_numpy_object(dtype):
|
||||
"""
|
||||
Tests for PandasArray with nested data. Users typically won't create
|
||||
these objects via `pd.array`, but they can show up through `.array`
|
||||
on a Series with nested data. Many of the base tests fail, as they aren't
|
||||
appropriate for nested data.
|
||||
|
||||
This fixture allows these tests to be skipped when used as a usefixtures
|
||||
marker to either an individual test or a test class.
|
||||
"""
|
||||
if dtype == "object":
|
||||
raise pytest.skip("Skipping for object dtype.")
|
||||
|
||||
|
||||
skip_nested = pytest.mark.usefixtures("skip_numpy_object")
|
||||
|
||||
|
||||
class BaseNumPyTests:
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(BaseNumPyTests, base.BaseCastingTests):
|
||||
@skip_nested
|
||||
def test_astype_str(self, data):
|
||||
# ValueError: setting an array element with a sequence
|
||||
super().test_astype_str(data)
|
||||
|
||||
|
||||
class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests):
|
||||
@pytest.mark.skip(reason="We don't register our dtype")
|
||||
# We don't want to register. This test should probably be split in two.
|
||||
def test_from_dtype(self, data):
|
||||
pass
|
||||
|
||||
@skip_nested
|
||||
def test_array_from_scalars(self, data):
|
||||
# ValueError: PandasArray must be 1-dimensional.
|
||||
super().test_array_from_scalars(data)
|
||||
|
||||
|
||||
class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
|
||||
@pytest.mark.skip(reason="Incorrect expected.")
|
||||
# we unsurprisingly clash with a NumPy name.
|
||||
def test_check_dtype(self, data):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
|
||||
@skip_nested
|
||||
def test_getitem_scalar(self, data):
|
||||
# AssertionError
|
||||
super().test_getitem_scalar(data)
|
||||
|
||||
@skip_nested
|
||||
def test_take_series(self, data):
|
||||
# ValueError: PandasArray must be 1-dimensional.
|
||||
super().test_take_series(data)
|
||||
|
||||
@pytest.mark.xfail(reason="astype doesn't recognize data.dtype")
|
||||
def test_loc_iloc_frame_single_dtype(self, data):
|
||||
super().test_loc_iloc_frame_single_dtype(data)
|
||||
|
||||
|
||||
class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
|
||||
@skip_nested
|
||||
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
|
||||
# ValueError: Names should be list-like for a MultiIndex
|
||||
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
|
||||
|
||||
|
||||
class TestInterface(BaseNumPyTests, base.BaseInterfaceTests):
|
||||
@skip_nested
|
||||
def test_array_interface(self, data):
|
||||
# NumPy array shape inference
|
||||
super().test_array_interface(data)
|
||||
|
||||
|
||||
class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="TODO: remove?")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="Incorrect expected")
|
||||
# We have a bool dtype, so the result is an ExtensionArray
|
||||
# but expected is not
|
||||
def test_combine_le(self, data_repeated):
|
||||
super().test_combine_le(data_repeated)
|
||||
|
||||
@skip_nested
|
||||
def test_combine_add(self, data_repeated):
|
||||
# Not numeric
|
||||
super().test_combine_add(data_repeated)
|
||||
|
||||
@skip_nested
|
||||
def test_shift_fill_value(self, data):
|
||||
# np.array shape inference. Shift implementation fails.
|
||||
super().test_shift_fill_value(data)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
# Fails creating expected
|
||||
super().test_unique(data, box, method)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_frame(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# ndarray of tuples not hashable
|
||||
super().test_hash_pandas_object_works(data, as_frame)
|
||||
|
||||
@skip_nested
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
# Test setup fails.
|
||||
super().test_searchsorted(data_for_sorting, as_series)
|
||||
|
||||
@skip_nested
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
# Test setup fails.
|
||||
super().test_where_series(data, na_value, as_frame)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
# Fails creating expected
|
||||
super().test_repeat(data, repeats, as_series, use_numpy)
|
||||
|
||||
|
||||
@skip_nested
|
||||
class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests):
|
||||
divmod_exc = None
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, data, exc=None)
|
||||
|
||||
@pytest.mark.skip("We implement ops")
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
|
||||
class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
@skip_nested
|
||||
class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
# avoid coercing int -> float. Just cast to the actual numpy type.
|
||||
expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@skip_nested
|
||||
class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BaseNumPyTests, base.BaseMissingTests):
|
||||
@skip_nested
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_scalar(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_series_method(data_missing, fillna_method)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_series(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_frame(data_missing)
|
||||
|
||||
|
||||
class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
|
||||
@pytest.mark.skip("Incorrect parent test")
|
||||
# not actually a mixed concat, since we concat int and int.
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
super().test_concat_mixed_dtypes(data)
|
||||
|
||||
@skip_nested
|
||||
def test_merge(self, data, na_value):
|
||||
# Fails creating expected
|
||||
super().test_merge(data, na_value)
|
||||
|
||||
@skip_nested
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# Fails creating expected
|
||||
super().test_merge_on_extension_array(data)
|
||||
|
||||
@skip_nested
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# Fails creating expected
|
||||
super().test_merge_on_extension_array_duplicates(data)
|
||||
|
||||
|
||||
class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
|
||||
@skip_nested
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
# AssertionError
|
||||
super().test_setitem_scalar_series(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
# ValueError: shape mismatch: value array of shape (2,1) could not
|
||||
# be broadcast to indexing result of shape (2,)
|
||||
super().test_setitem_sequence(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
# ValueError: PandasArray must be 1-dimensional.
|
||||
super().test_setitem_sequence_mismatched_length_raises(data, as_array)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_sequence_broadcasts(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
# AssertionError
|
||||
super().test_setitem_loc_scalar_mixed(data)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
# AssertionError
|
||||
super().test_setitem_loc_scalar_multiple_homogoneous(data)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
# AssertionError
|
||||
super().test_setitem_iloc_scalar_mixed(data)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
# AssertionError
|
||||
super().test_setitem_iloc_scalar_multiple_homogoneous(data)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_mask_broadcast(data, setter)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
# Failed: DID NOT RAISE <class 'ValueError'>
|
||||
super().test_setitem_scalar_key_sequence_raise(data)
|
||||
|
||||
|
||||
@skip_nested
|
||||
class TestParsing(BaseNumPyTests, base.BaseParsingTests):
|
||||
pass
|
@@ -0,0 +1,161 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return PeriodDtype(freq="D")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return PeriodArray(np.arange(1970, 2070), freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
return PeriodArray(np.ones(100) * 2, freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
return PeriodArray([2018, 2019, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return PeriodArray([iNaT, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
return PeriodArray([2018, iNaT, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
B = 2018
|
||||
NA = iNaT
|
||||
A = 2017
|
||||
C = 2019
|
||||
return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return pd.NaT
|
||||
|
||||
|
||||
class BasePeriodTests:
|
||||
pass
|
||||
|
||||
|
||||
class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BasePeriodTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BasePeriodTests, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BasePeriodTests, base.BaseMethodsTests):
|
||||
def test_combine_add(self, data_repeated):
|
||||
# Period + Period is not defined.
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BasePeriodTests, base.BaseInterfaceTests):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests):
|
||||
implements = {"__sub__", "__rsub__"}
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# we implement substitution...
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
super()._check_divmod_op(s, op, other, exc=TypeError)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# we don't implement + for Period
|
||||
s = pd.Series(data)
|
||||
msg = (
|
||||
r"unsupported operand type\(s\) for \+: "
|
||||
r"\'PeriodArray\' and \'PeriodArray\'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s + data
|
||||
|
||||
def test_error(self):
|
||||
pass
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# Override to use __sub__ instead of __add__
|
||||
other = pd.Series(data)
|
||||
result = data.__sub__(other)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
class TestCasting(BasePeriodTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
# the base test is not appropriate for us. We raise on comparison
|
||||
# with (some) integers, depending on the value.
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BasePeriodTests, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BasePeriodTests, base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(BasePeriodTests, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BasePeriodTests, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BasePeriodTests, base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(BasePeriodTests, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data):
|
||||
super().test_EA_types(engine, data)
|
@@ -0,0 +1,370 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseArray, SparseDtype
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def make_data(fill_value):
|
||||
if np.isnan(fill_value):
|
||||
data = np.random.uniform(size=100)
|
||||
else:
|
||||
data = np.random.randint(1, 100, size=100)
|
||||
if data[0] == data[1]:
|
||||
data[0] += 1
|
||||
|
||||
data[2::3] = fill_value
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return SparseDtype()
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data(request):
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
res = SparseArray(make_data(request.param), fill_value=request.param)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(request):
|
||||
return SparseArray(np.ones(100) * 2)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing(request):
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return SparseArray([np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_repeated(request):
|
||||
"""Return different versions of data for count times"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield SparseArray(make_data(request.param), fill_value=request.param)
|
||||
|
||||
yield gen
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_sorting(request):
|
||||
return SparseArray([2, 3, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing_for_sorting(request):
|
||||
return SparseArray([2, np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda left, right: pd.isna(left) and pd.isna(right)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_grouping(request):
|
||||
return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
|
||||
|
||||
|
||||
class BaseSparseTests:
|
||||
def _check_unsupported(self, data):
|
||||
if data.dtype == SparseDtype(int, 0):
|
||||
pytest.skip("Can't store nan in int array.")
|
||||
|
||||
@pytest.mark.xfail(reason="SparseArray does not support setitem")
|
||||
def test_ravel(self, data):
|
||||
super().test_ravel(data)
|
||||
|
||||
|
||||
class TestDtype(BaseSparseTests, base.BaseDtypeTests):
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
assert dtype.construct_array_type() is SparseArray
|
||||
|
||||
|
||||
class TestInterface(BaseSparseTests, base.BaseInterfaceTests):
|
||||
def test_no_values_attribute(self, data):
|
||||
pytest.skip("We have values")
|
||||
|
||||
def test_copy(self, data):
|
||||
# __setitem__ does not work, so we only have a smoke-test
|
||||
data.copy()
|
||||
|
||||
|
||||
class TestConstructors(BaseSparseTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseSparseTests, base.BaseReshapingTests):
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
# This should be the same, aside from concat([sparse, float])
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category")
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat(
|
||||
[x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs]
|
||||
)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_concat_columns(data, na_value)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align(data, na_value)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align_frame(data, na_value)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align_series_frame(data, na_value)
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_merge(data, na_value)
|
||||
|
||||
|
||||
class TestGetitem(BaseSparseTests, base.BaseGetitemTests):
|
||||
def test_get(self, data):
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
if np.isnan(s.values.fill_value):
|
||||
assert np.isnan(s.get(4)) and np.isnan(s.iloc[2])
|
||||
else:
|
||||
assert s.get(4) == s.iloc[2]
|
||||
assert s.get(2) == s.iloc[1]
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_reindex(data, na_value)
|
||||
|
||||
|
||||
# Skipping TestSetitem, since we don't implement it.
|
||||
|
||||
|
||||
class TestMissing(BaseSparseTests, base.BaseMissingTests):
|
||||
def test_isna(self, data_missing):
|
||||
expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
|
||||
expected = SparseArray([True, False], dtype=expected_dtype)
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=expected_dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
def test_fillna_series_method(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Unsupported")
|
||||
def test_fillna_series(self):
|
||||
# this one looks doable.
|
||||
pass
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Have to override to specify that fill_value will change.
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
||||
|
||||
if pd.isna(data_missing.fill_value):
|
||||
dtype = SparseDtype(data_missing.dtype, fill_value)
|
||||
else:
|
||||
dtype = data_missing.dtype
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype),
|
||||
"B": [1, 2],
|
||||
}
|
||||
)
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestMethods(BaseSparseTests, base.BaseMethodsTests):
|
||||
def test_combine_le(self, data_repeated):
|
||||
# We return a Series[SparseArray].__le__ returns a
|
||||
# Series[Sparse[bool]]
|
||||
# rather than Series[bool]
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.SparseArray(
|
||||
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
|
||||
fill_value=False,
|
||||
)
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.SparseArray([a <= val for a in list(orig_data1)], fill_value=False)
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
assert df.values.base is not result.values.base
|
||||
assert df.A._values.to_dense() is arr.to_dense()
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
assert ser._values is not result._values
|
||||
assert ser._values.to_dense() is arr.to_dense()
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
pass
|
||||
|
||||
def test_where_series(self, data, na_value):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
|
||||
cond = np.array([True, True, False, False])
|
||||
result = ser.where(cond)
|
||||
|
||||
new_dtype = SparseDtype("float", 0.0)
|
||||
expected = pd.Series(
|
||||
cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)
|
||||
)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
cond = np.array([True, False, True, True])
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
if data.dtype.subtype == "int":
|
||||
# Right now this is upcasted to float, just like combine_first
|
||||
# for Series[int]
|
||||
pytest.skip("TODO(SparseArray.__setitem__ will preserve dtype.")
|
||||
super().test_combine_first(data)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super().test_searchsorted(data_for_sorting, as_series)
|
||||
|
||||
|
||||
class TestCasting(BaseSparseTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests):
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
divmod_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def _skip_if_different_combine(self, data):
|
||||
if data.fill_value == 0:
|
||||
# arith ops call on dtype.fill_value so that the sparsity
|
||||
# is maintained. Combine can't be called on a dtype in
|
||||
# general, so we can't make the expected. This is tested elsewhere
|
||||
raise pytest.skip("Incorrected expected from Series.combine")
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
# array
|
||||
result = pd.Series(op(data, other))
|
||||
# hard to test the fill value, since we don't know what expected
|
||||
# is in general.
|
||||
# Rely on tests in `tests/sparse` to validate that.
|
||||
assert isinstance(result.dtype, SparseDtype)
|
||||
assert result.dtype.subtype == np.dtype("bool")
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
expected = pd.Series(
|
||||
pd.SparseArray(
|
||||
op(np.asarray(data), np.asarray(other)),
|
||||
fill_value=result.values.fill_value,
|
||||
)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
result = op(s, other)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestPrinting(BaseSparseTests, base.BasePrintingTests):
|
||||
@pytest.mark.xfail(reason="Different repr", strict=True)
|
||||
def test_array_repr(self, data, size):
|
||||
super().test_array_repr(data, size)
|
||||
|
||||
|
||||
class TestParsing(BaseSparseTests, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data):
|
||||
expected_msg = r".*must implement _from_sequence_of_strings.*"
|
||||
with pytest.raises(NotImplementedError, match=expected_msg):
|
||||
super().test_EA_types(engine, data)
|
Reference in New Issue
Block a user