8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
from pandas import Categorical
|
||||
|
||||
|
||||
class TestCategorical:
|
||||
def setup_method(self, method):
|
||||
self.factor = Categorical(
|
||||
["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def allow_fill(request):
|
||||
"""Boolean 'allow_fill' parameter for Categorical.take"""
|
||||
return request.param
|
||||
@@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]])
|
||||
def test_factorize(categories, ordered):
|
||||
cat = pd.Categorical(
|
||||
["b", "b", "a", "c", None], categories=categories, ordered=ordered
|
||||
)
|
||||
labels, uniques = pd.factorize(cat)
|
||||
expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort():
|
||||
cat = pd.Categorical(["b", "b", None, "a"])
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(["a", "b"])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort_ordered():
|
||||
cat = pd.Categorical(
|
||||
["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_isin_cats():
|
||||
# GH2003
|
||||
cat = pd.Categorical(["a", "b", np.nan])
|
||||
|
||||
result = cat.isin(["a", np.nan])
|
||||
expected = np.array([True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
result = cat.isin(["a", "c"])
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
|
||||
def test_isin_empty(empty):
|
||||
s = pd.Categorical(["a", "b"])
|
||||
expected = np.array([False, False], dtype=bool)
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
class TestTake:
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
|
||||
def test_take_warns(self):
|
||||
cat = pd.Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat.take([0, -1])
|
||||
|
||||
def test_take_positive_no_warning(self):
|
||||
cat = pd.Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(None):
|
||||
cat.take([0, 0])
|
||||
|
||||
def test_take_bounds(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical(["a", "b", "a"])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([4, 5], allow_fill=allow_fill)
|
||||
|
||||
def test_take_empty(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical([], categories=["a", "b"])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([0], allow_fill=allow_fill)
|
||||
|
||||
def test_positional_take(self, ordered_fixture):
|
||||
cat = pd.Categorical(
|
||||
["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered_fixture
|
||||
)
|
||||
result = cat.take([0, 1, 2], allow_fill=False)
|
||||
expected = pd.Categorical(
|
||||
["a", "a", "b"], categories=cat.categories, ordered=ordered_fixture
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_positional_take_unobserved(self, ordered_fixture):
|
||||
cat = pd.Categorical(
|
||||
["a", "b"], categories=["a", "b", "c"], ordered=ordered_fixture
|
||||
)
|
||||
result = cat.take([1, 0], allow_fill=False)
|
||||
expected = pd.Categorical(
|
||||
["b", "a"], categories=cat.categories, ordered=ordered_fixture
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_allow_fill(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(["a", "a", "b"])
|
||||
result = cat.take([0, -1, -1], allow_fill=True)
|
||||
expected = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_with_negative_one(self):
|
||||
# -1 was a category
|
||||
cat = pd.Categorical([-1, 0, 1])
|
||||
result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
|
||||
expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(["a", "b", "c"])
|
||||
result = cat.take([0, 1, -1], fill_value="a", allow_fill=True)
|
||||
expected = pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value_new_raises(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(["a", "b", "c"])
|
||||
xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
cat.take([0, 1, -1], fill_value="d", allow_fill=True)
|
||||
@@ -0,0 +1,316 @@
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
from pandas import Categorical, Index, Series
|
||||
from pandas.api.types import is_scalar
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalAnalytics:
|
||||
def test_min_max(self):
|
||||
|
||||
# unordered cats have no min/max
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=False)
|
||||
msg = "Categorical is not ordered for operation {}"
|
||||
with pytest.raises(TypeError, match=msg.format("min")):
|
||||
cat.min()
|
||||
with pytest.raises(TypeError, match=msg.format("max")):
|
||||
cat.max()
|
||||
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "a"
|
||||
assert _max == "d"
|
||||
|
||||
cat = Categorical(
|
||||
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "d"
|
||||
assert _max == "a"
|
||||
|
||||
cat = Categorical(
|
||||
[np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == "b"
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == "c"
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == "b"
|
||||
|
||||
cat = Categorical(
|
||||
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
|
||||
)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == 1
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == 2
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == 1
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values,categories,exp_mode",
|
||||
[
|
||||
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
|
||||
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
|
||||
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
|
||||
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
|
||||
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
],
|
||||
)
|
||||
def test_mode(self, values, categories, exp_mode):
|
||||
s = Categorical(values, categories=categories, ordered=True)
|
||||
res = s.mode()
|
||||
exp = Categorical(exp_mode, categories=categories, ordered=True)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_searchsorted(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/8420
|
||||
# https://github.com/pandas-dev/pandas/issues/14522
|
||||
|
||||
c1 = Categorical(
|
||||
["cheese", "milk", "apple", "bread", "bread"],
|
||||
categories=["cheese", "milk", "apple", "bread"],
|
||||
ordered=True,
|
||||
)
|
||||
s1 = Series(c1)
|
||||
c2 = Categorical(
|
||||
["cheese", "milk", "apple", "bread", "bread"],
|
||||
categories=["cheese", "milk", "apple", "bread"],
|
||||
ordered=False,
|
||||
)
|
||||
s2 = Series(c2)
|
||||
|
||||
# Searching for single item argument, side='left' (default)
|
||||
res_cat = c1.searchsorted("apple")
|
||||
assert res_cat == 2
|
||||
assert is_scalar(res_cat)
|
||||
|
||||
res_ser = s1.searchsorted("apple")
|
||||
assert res_ser == 2
|
||||
assert is_scalar(res_ser)
|
||||
|
||||
# Searching for single item array, side='left' (default)
|
||||
res_cat = c1.searchsorted(["bread"])
|
||||
res_ser = s1.searchsorted(["bread"])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for several items array, side='right'
|
||||
res_cat = c1.searchsorted(["apple", "bread"], side="right")
|
||||
res_ser = s1.searchsorted(["apple", "bread"], side="right")
|
||||
exp = np.array([3, 5], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for a single value that is not from the Categorical
|
||||
msg = r"Value\(s\) to be inserted must be in categories"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
c1.searchsorted("cucumber")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s1.searchsorted("cucumber")
|
||||
|
||||
# Searching for multiple values one of each is not from the Categorical
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
c1.searchsorted(["bread", "cucumber"])
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s1.searchsorted(["bread", "cucumber"])
|
||||
|
||||
# searchsorted call for unordered Categorical
|
||||
msg = "Categorical not ordered"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c2.searchsorted("apple")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s2.searchsorted("apple")
|
||||
|
||||
def test_unique(self):
|
||||
# categories are reordered based on value when ordered=False
|
||||
cat = Categorical(["a", "b"])
|
||||
exp = Index(["a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, cat)
|
||||
|
||||
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, Categorical(exp))
|
||||
|
||||
cat = Categorical(["c", "a", "b", "a", "a"], categories=["a", "b", "c"])
|
||||
exp = Index(["c", "a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(exp, categories=["c", "a", "b"])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
# nan must be removed
|
||||
cat = Categorical(["b", np.nan, "b", np.nan, "a"], categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
exp = Index(["b", "a"])
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_ordered(self):
|
||||
# keep categories order when ordered=True
|
||||
cat = Categorical(["b", "a", "b"], categories=["a", "b"], ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(
|
||||
["c", "b", "a", "a"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["c", "b", "a"], categories=["a", "b", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(["b", "a", "a"], categories=["a", "b", "c"], ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(
|
||||
["b", "b", np.nan, "a"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["b", np.nan, "a"], categories=["a", "b"], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_index_series(self):
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
|
||||
# Categorical.unique sorts categories by appearance order
|
||||
# if ordered=False
|
||||
exp = Categorical([3, 1, 2], categories=[3, 1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
|
||||
exp = Categorical([1, 2], categories=[1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
|
||||
# Categorical.unique keeps categories order if ordered=True
|
||||
exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
def test_shift(self):
|
||||
# GH 9416
|
||||
cat = Categorical(["a", "b", "c", "d", "a"])
|
||||
|
||||
# shift forward
|
||||
sp1 = cat.shift(1)
|
||||
xp1 = Categorical([np.nan, "a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(sp1, xp1)
|
||||
tm.assert_categorical_equal(cat[:-1], sp1[1:])
|
||||
|
||||
# shift back
|
||||
sn2 = cat.shift(-2)
|
||||
xp2 = Categorical(
|
||||
["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"]
|
||||
)
|
||||
tm.assert_categorical_equal(sn2, xp2)
|
||||
tm.assert_categorical_equal(cat[2:], sn2[:-2])
|
||||
|
||||
# shift by zero
|
||||
tm.assert_categorical_equal(cat, cat.shift(0))
|
||||
|
||||
def test_nbytes(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
|
||||
assert cat.nbytes == exp
|
||||
|
||||
def test_memory_usage(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
# .categories is an index, so we include the hashtable
|
||||
assert 0 < cat.nbytes <= cat.memory_usage()
|
||||
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
|
||||
|
||||
cat = Categorical(["foo", "foo", "bar"])
|
||||
assert cat.memory_usage(deep=True) > cat.nbytes
|
||||
|
||||
if not PYPY:
|
||||
# sys.getsizeof will call the .memory_usage with
|
||||
# deep=True, and add on some GC overhead
|
||||
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
|
||||
assert abs(diff) < 100
|
||||
|
||||
def test_map(self):
|
||||
c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list("ababc"), categories=list("abc"), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = c.map(lambda x: 1)
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
|
||||
|
||||
def test_validate_inplace(self):
|
||||
cat = Categorical(["A", "B", "B", "C", "A"])
|
||||
invalid_values = [1, "True", [1, 2, 3], 5.0]
|
||||
|
||||
for value in invalid_values:
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_ordered(value=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_ordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_unordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories(["X", "Y", "Z"], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.add_categories(new_categories=["D", "E", "F"], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_categories(removals=["D", "E", "F"], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_unused_categories(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.sort_values(inplace=value)
|
||||
|
||||
def test_isna(self):
|
||||
exp = np.array([False, False, True])
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
res = c.isna()
|
||||
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
@@ -0,0 +1,506 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series
|
||||
from pandas.core.arrays.categorical import _recode_for_categories
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalAPI:
|
||||
def test_ordered_api(self):
|
||||
# GH 9347
|
||||
cat1 = Categorical(list("acb"), ordered=False)
|
||||
tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"]))
|
||||
assert not cat1.ordered
|
||||
|
||||
cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False)
|
||||
tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"]))
|
||||
assert not cat2.ordered
|
||||
|
||||
cat3 = Categorical(list("acb"), ordered=True)
|
||||
tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"]))
|
||||
assert cat3.ordered
|
||||
|
||||
cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True)
|
||||
tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"]))
|
||||
assert cat4.ordered
|
||||
|
||||
def test_set_ordered(self):
|
||||
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
cat2 = cat.as_unordered()
|
||||
assert not cat2.ordered
|
||||
cat2 = cat.as_ordered()
|
||||
assert cat2.ordered
|
||||
cat2.as_unordered(inplace=True)
|
||||
assert not cat2.ordered
|
||||
cat2.as_ordered(inplace=True)
|
||||
assert cat2.ordered
|
||||
|
||||
assert cat2.set_ordered(True).ordered
|
||||
assert not cat2.set_ordered(False).ordered
|
||||
cat2.set_ordered(True, inplace=True)
|
||||
assert cat2.ordered
|
||||
cat2.set_ordered(False, inplace=True)
|
||||
assert not cat2.ordered
|
||||
|
||||
# removed in 0.19.0
|
||||
msg = "can't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = True
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = False
|
||||
|
||||
def test_rename_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
|
||||
# inplace=False: the old one must not be changed
|
||||
res = cat.rename_categories([1, 2, 3])
|
||||
tm.assert_numpy_array_equal(
|
||||
res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
)
|
||||
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
|
||||
|
||||
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
|
||||
|
||||
exp_cat = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(cat.categories, exp_cat)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = cat.rename_categories(lambda x: x.upper())
|
||||
expected = Categorical(["A", "B", "C", "A"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# and now inplace
|
||||
res = cat.rename_categories([1, 2, 3], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_numpy_array_equal(
|
||||
cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
)
|
||||
tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
|
||||
|
||||
# Lengthen
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2, 3, 4])
|
||||
|
||||
# Shorten
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2])
|
||||
|
||||
def test_rename_categories_series(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/17981
|
||||
c = Categorical(["a", "b"])
|
||||
result = c.rename_categories(Series([0, 1], index=["a", "b"]))
|
||||
expected = Categorical([0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_rename_categories_dict(self):
|
||||
# GH 17336
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1})
|
||||
expected = Index([4, 3, 2, 1])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for inplace
|
||||
res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True)
|
||||
assert res is None
|
||||
tm.assert_index_equal(cat.categories, expected)
|
||||
|
||||
# Test for dicts of smaller length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "c": 3})
|
||||
|
||||
expected = Index([1, "b", 3, "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with bigger length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6})
|
||||
expected = Index([1, 2, 3, 4])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with no items from old categories
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"f": 1, "g": 3})
|
||||
|
||||
expected = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
def test_reorder_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.reorder_categories(["c", "b", "a"])
|
||||
# cat must be the same as before
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
# only res is changed
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.reorder_categories(["c", "b", "a"], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
|
||||
# not all "old" included in "new"
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a"])
|
||||
|
||||
# still not all "old" in "new"
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a", "b", "d"])
|
||||
|
||||
# all "old" included in "new", but too long
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a", "b", "c", "d"])
|
||||
|
||||
def test_add_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.add_categories("d")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.add_categories(["d"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.add_categories("d", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# new is in old categories
|
||||
with pytest.raises(ValueError):
|
||||
cat.add_categories(["d"])
|
||||
|
||||
# GH 9927
|
||||
cat = Categorical(list("abc"), ordered=True)
|
||||
expected = Categorical(list("abc"), categories=list("abcde"), ordered=True)
|
||||
# test with Series, np.array, index, list
|
||||
res = cat.add_categories(Series(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(np.array(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(Index(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(["d", "e"])
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
|
||||
def test_set_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
|
||||
res = cat.set_categories(["c", "b", "a"], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
assert res is None
|
||||
|
||||
res = cat.set_categories(["a", "b", "c"])
|
||||
# cat must be the same as before
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
# only res is changed
|
||||
exp_categories_back = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(res.categories, exp_categories_back)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_values)
|
||||
|
||||
# not all "old" included in "new" -> all not included ones are now
|
||||
# np.nan
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
res = cat.set_categories(["a"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# still not all "old" in "new"
|
||||
res = cat.set_categories(["a", "b", "d"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
|
||||
|
||||
# all "old" included in "new"
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
|
||||
# internals...
|
||||
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
|
||||
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.to_dense(), exp)
|
||||
|
||||
# all "pointers" to '4' must be changed from 3 to 0,...
|
||||
c = c.set_categories([4, 3, 2, 1])
|
||||
|
||||
# positions are changed
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8))
|
||||
|
||||
# categories are now in new order
|
||||
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
|
||||
|
||||
# output is the same
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.to_dense(), exp)
|
||||
assert c.min() == 4
|
||||
assert c.max() == 1
|
||||
|
||||
# set_categories should set the ordering if specified
|
||||
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense())
|
||||
|
||||
# set_categories should pass thru the ordering
|
||||
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense())
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_categories_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c.set_categories(new_categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_rename_less(self):
|
||||
# GH 24675
|
||||
cat = Categorical(["A", "B"])
|
||||
result = cat.set_categories(["A"], rename=True)
|
||||
expected = Categorical(["A", np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_private(self):
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"])
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
# fastpath
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"], fastpath=True)
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
def test_remove_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.remove_categories("c")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.remove_categories(["c"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.remove_categories("c", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# removal is not in categories
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_categories(["c"])
|
||||
|
||||
def test_remove_unused_categories(self):
|
||||
c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"])
|
||||
exp_categories_all = Index(["a", "b", "c", "d", "e"])
|
||||
exp_categories_dropped = Index(["a", "b", "c", "d"])
|
||||
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, exp_categories_dropped)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories(inplace=True)
|
||||
tm.assert_index_equal(c.categories, exp_categories_dropped)
|
||||
assert res is None
|
||||
|
||||
# with NaN values (GH11599)
|
||||
c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"])
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"])))
|
||||
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(res.codes, exp_codes)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
val = ["F", np.nan, "D", "B", "D", "F", np.nan]
|
||||
cat = Categorical(values=val, categories=list("ABCDEFG"))
|
||||
out = cat.remove_unused_categories()
|
||||
tm.assert_index_equal(out.categories, Index(["B", "D", "F"]))
|
||||
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(out.codes, exp_codes)
|
||||
assert out.tolist() == val
|
||||
|
||||
alpha = list("abcdefghijklmnopqrstuvwxyz")
|
||||
val = np.random.choice(alpha[::2], 10000).astype("object")
|
||||
val[np.random.choice(len(val), 100)] = np.nan
|
||||
|
||||
cat = Categorical(values=val, categories=alpha)
|
||||
out = cat.remove_unused_categories()
|
||||
assert out.tolist() == val.tolist()
|
||||
|
||||
|
||||
class TestCategoricalAPIWithFactor(TestCategorical):
|
||||
def test_describe(self):
|
||||
# string type
|
||||
desc = self.factor.describe()
|
||||
assert self.factor.ordered
|
||||
exp_index = CategoricalIndex(
|
||||
["a", "b", "c"], name="categories", ordered=self.factor.ordered
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check unused categories
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(["a", "b", "c", "d"], inplace=True)
|
||||
desc = cat.describe()
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("abcd"), ordered=self.factor.ordered, name="categories"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check an integer one
|
||||
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
|
||||
desc = cat.describe()
|
||||
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories")
|
||||
expected = DataFrame(
|
||||
{"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
# describe should work with NaN
|
||||
cat = Categorical([np.nan, 1, 2, 2])
|
||||
desc = cat.describe()
|
||||
expected = DataFrame(
|
||||
{"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]},
|
||||
index=CategoricalIndex(
|
||||
[1, 2, np.nan], categories=[1, 2], name="categories"
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
def test_set_categories_inplace(self):
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(["a", "b", "c", "d"], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"]))
|
||||
|
||||
|
||||
class TestPrivateCategoricalAPI:
|
||||
def test_codes_immutable(self):
|
||||
|
||||
# Codes should be read only
|
||||
c = Categorical(["a", "b", "c", "a", np.nan])
|
||||
exp = np.array([0, 1, 2, 0, -1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
# Assignments to codes should raise
|
||||
with pytest.raises(ValueError):
|
||||
c.codes = np.array([0, 1, 2, 0, 1], dtype="int8")
|
||||
|
||||
# changes in the codes array should raise
|
||||
codes = c.codes
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
codes[4] = 1
|
||||
|
||||
# But even after getting the codes, the original array should still be
|
||||
# writeable!
|
||||
c[4] = "a"
|
||||
exp = np.array([0, 1, 2, 0, 0], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
c._codes[4] = 2
|
||||
exp = np.array([0, 1, 2, 0, 2], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"codes, old, new, expected",
|
||||
[
|
||||
([0, 1], ["a", "b"], ["a", "b"], [0, 1]),
|
||||
([0, 1], ["b", "a"], ["b", "a"], [0, 1]),
|
||||
([0, 1], ["a", "b"], ["b", "a"], [1, 0]),
|
||||
([0, 1], ["b", "a"], ["a", "b"], [1, 0]),
|
||||
([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]),
|
||||
([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]),
|
||||
([-1, -1], [], ["a", "b"], [-1, -1]),
|
||||
([1, 0], ["b", "a"], ["a", "b"], [0, 1]),
|
||||
],
|
||||
)
|
||||
def test_recode_to_categories(self, codes, old, new, expected):
|
||||
codes = np.asanyarray(codes, dtype=np.int8)
|
||||
expected = np.asanyarray(expected, dtype=np.int8)
|
||||
old = Index(old)
|
||||
new = Index(new)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_recode_to_categories_large(self):
|
||||
N = 1000
|
||||
codes = np.arange(N)
|
||||
old = Index(codes)
|
||||
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
|
||||
new = Index(expected)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_deprecated_get_values(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = cat.get_values()
|
||||
tm.assert_numpy_array_equal(res, np.array(cat))
|
||||
@@ -0,0 +1,603 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalConstructors:
|
||||
def test_validate_ordered(self):
|
||||
# see gh-14058
|
||||
exp_msg = "'ordered' must either be 'True' or 'False'"
|
||||
exp_err = TypeError
|
||||
|
||||
# This should be a boolean.
|
||||
ordered = np.array([0, 1, 2])
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical([1, 2, 3], ordered=ordered)
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical.from_codes(
|
||||
[0, 0, 1], categories=["a", "b", "c"], ordered=ordered
|
||||
)
|
||||
|
||||
def test_constructor_empty(self):
|
||||
# GH 17248
|
||||
c = Categorical([])
|
||||
expected = Index([])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
c = Categorical([], categories=[1, 2, 3])
|
||||
expected = pd.Int64Index([1, 2, 3])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
def test_constructor_empty_boolean(self):
|
||||
# see gh-22702
|
||||
cat = pd.Categorical([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_constructor_tuples(self):
|
||||
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
|
||||
result = Categorical(values)
|
||||
expected = Index([(1,), (1, 2)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
assert result.ordered is False
|
||||
|
||||
def test_constructor_tuples_datetimes(self):
|
||||
# numpy will auto reshape when all of the tuples are the
|
||||
# same len, so add an extra one with 2 items and slice it off
|
||||
values = np.array(
|
||||
[
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
("a", "b"),
|
||||
],
|
||||
dtype=object,
|
||||
)[:-1]
|
||||
result = Categorical(values)
|
||||
expected = Index(
|
||||
[(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)],
|
||||
tupleize_cols=False,
|
||||
)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
|
||||
def test_constructor_unsortable(self):
|
||||
|
||||
# it works!
|
||||
arr = np.array([1, 2, 3, datetime.now()], dtype="O")
|
||||
factor = Categorical(arr, ordered=False)
|
||||
assert not factor.ordered
|
||||
|
||||
# this however will raise as cannot be sorted
|
||||
msg = (
|
||||
"'values' is not ordered, please explicitly specify the "
|
||||
"categories order by passing in a categories argument."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(arr, ordered=True)
|
||||
|
||||
def test_constructor_interval(self):
|
||||
result = Categorical(
|
||||
[Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True
|
||||
)
|
||||
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
|
||||
exp = Categorical(ii, ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
tm.assert_index_equal(result.categories, ii)
|
||||
|
||||
def test_constructor(self):
|
||||
|
||||
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
|
||||
c1 = Categorical(exp_arr)
|
||||
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
|
||||
# categories must be unique
|
||||
msg = "Categorical categories must be unique"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([1, 2], [1, 2, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ["a", "b", "b"])
|
||||
|
||||
# The default should be unordered
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
assert not c1.ordered
|
||||
|
||||
# Categorical as input
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
|
||||
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
|
||||
|
||||
# Series of dtype category
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# Series
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# This should result in integer categories, not float!
|
||||
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
cat = Categorical([np.nan, 1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# this should result in floats
|
||||
cat = Categorical([np.nan, 1, 2.0, 3])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
cat = Categorical([np.nan, 1.0, 2.0, 3.0])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
# This doesn't work -> this would probably need some kind of "remember
|
||||
# the original type" feature to try to cast the array interface result
|
||||
# to...
|
||||
|
||||
# vals = np.asarray(cat[cat.notna()])
|
||||
# assert is_integer_dtype(vals)
|
||||
|
||||
# corner cases
|
||||
cat = Categorical([1])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
cat = Categorical(["a"])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == "a"
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# Scalars should be converted to lists
|
||||
cat = Categorical(1)
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# two arrays
|
||||
# - when the first is an integer dtype and the second is not
|
||||
# - when the resulting codes are all -1/NaN
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # noqa
|
||||
|
||||
# the next one are from the old docs
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
|
||||
cat = Categorical([1, 2], categories=[1, 2, 3])
|
||||
|
||||
# this is a legitimate constructor
|
||||
with tm.assert_produces_warning(None):
|
||||
c = Categorical( # noqa
|
||||
np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True
|
||||
)
|
||||
|
||||
def test_constructor_with_existing_categories(self):
|
||||
# GH25318: constructing with pd.Series used to bogusly skip recoding
|
||||
# categories
|
||||
c0 = Categorical(["a", "b", "c", "a"])
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])
|
||||
|
||||
c2 = Categorical(c0, categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c3 = Categorical(Series(c0), categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c3)
|
||||
|
||||
def test_constructor_not_sequence(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16022
|
||||
msg = r"^Parameter 'categories' must be list-like, was"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(["a", "b"], categories="a")
|
||||
|
||||
def test_constructor_with_null(self):
|
||||
|
||||
# Cannot have NaN in categories
|
||||
msg = "Categorial categories cannot be null"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(
|
||||
DatetimeIndex(["nat", "20160101"]),
|
||||
categories=[NaT, Timestamp("20160101")],
|
||||
)
|
||||
|
||||
def test_constructor_with_index(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(ci.values, Categorical(ci))
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(
|
||||
ci.values, Categorical(ci.astype(object), categories=ci.categories)
|
||||
)
|
||||
|
||||
def test_constructor_with_generator(self):
|
||||
# This was raising an Error in isna(single_val).any() because isna
|
||||
# returned a scalar for a generator
|
||||
xrange = range
|
||||
|
||||
exp = Categorical([0, 1, 2])
|
||||
cat = Categorical((x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical(xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
# This uses xrange internally
|
||||
from pandas.core.index import MultiIndex
|
||||
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]])
|
||||
|
||||
# check that categories accept generators and sequences
|
||||
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical([0, 1, 2], categories=xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtl",
|
||||
[
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s"),
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=5, freq="s"),
|
||||
],
|
||||
)
|
||||
def test_constructor_with_datetimelike(self, dtl):
|
||||
# see gh-12077
|
||||
# constructor with a datetimelike and NaT
|
||||
|
||||
s = Series(dtl)
|
||||
c = Categorical(s)
|
||||
|
||||
expected = type(dtl)(s)
|
||||
expected.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8"))
|
||||
|
||||
# with NaT
|
||||
s2 = s.copy()
|
||||
s2.iloc[-1] = NaT
|
||||
c = Categorical(s2)
|
||||
|
||||
expected = type(dtl)(s2.dropna())
|
||||
expected.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
result = repr(c)
|
||||
assert "NaT" in result
|
||||
|
||||
def test_constructor_from_index_series_datetimetz(self):
|
||||
idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_timedelta(self):
|
||||
idx = timedelta_range("1 days", freq="D", periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_period(self):
|
||||
idx = period_range("2015-01-01", freq="D", periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_invariant(self):
|
||||
# GH 14190
|
||||
vals = [
|
||||
np.array([1.0, 1.2, 1.8, np.nan]),
|
||||
np.array([1, 2, 3], dtype="int64"),
|
||||
["a", "b", "c", np.nan],
|
||||
[pd.Period("2014-01"), pd.Period("2014-02"), NaT],
|
||||
[Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT],
|
||||
[
|
||||
Timestamp("2014-01-01", tz="US/Eastern"),
|
||||
Timestamp("2014-01-02", tz="US/Eastern"),
|
||||
NaT,
|
||||
],
|
||||
]
|
||||
for val in vals:
|
||||
c = Categorical(val)
|
||||
c2 = Categorical(c)
|
||||
tm.assert_categorical_equal(c, c2)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_with_dtype(self, ordered):
|
||||
categories = ["b", "a", "c"]
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
result = Categorical(["a", "b", "a", "c"], dtype=dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert result.ordered is ordered
|
||||
|
||||
def test_constructor_dtype_and_others_raises(self):
|
||||
dtype = CategoricalDtype(["a", "b"], ordered=True)
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], categories=["a", "b"], dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=True, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=False, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]])
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_str_category(self, categories, ordered):
|
||||
result = Categorical(
|
||||
["a", "b"], categories=categories, ordered=ordered, dtype="category"
|
||||
)
|
||||
expected = Categorical(["a", "b"], categories=categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_str_unknown(self):
|
||||
with pytest.raises(ValueError, match="Unknown dtype"):
|
||||
Categorical([1, 2], dtype="foo")
|
||||
|
||||
def test_constructor_from_categorical_with_dtype(self):
|
||||
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use dtype.categories, not values.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_with_unknown_dtype(self):
|
||||
dtype = CategoricalDtype(None, ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use values.categories, not dtype.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "d"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_string(self):
|
||||
values = Categorical(["a", "b", "d"])
|
||||
# use categories, ordered
|
||||
result = Categorical(
|
||||
values, categories=["a", "b", "c"], ordered=True, dtype="category"
|
||||
)
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# No string
|
||||
result = Categorical(values, categories=["a", "b", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_codes(self):
|
||||
|
||||
# too few categories
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be between "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], dtype=dtype)
|
||||
|
||||
# no int codes
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], dtype=dtype)
|
||||
|
||||
# no unique categories
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
|
||||
|
||||
# NaN categories included
|
||||
with pytest.raises(ValueError, match="Categorial categories cannot be null"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
|
||||
|
||||
# too negative
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
msg = r"codes need to be between -1 and len\(categories\)-1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], dtype=dtype)
|
||||
|
||||
exp = Categorical(["a", "b", "c"], ordered=False)
|
||||
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
def test_from_codes_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical.from_codes(
|
||||
[0, 1], categories=CategoricalIndex(["a", "b", "c"])
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-unique Categorical still raises
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1], Categorical(["a", "b", "a"]))
|
||||
|
||||
def test_from_codes_with_nan_code(self):
|
||||
# GH21767
|
||||
codes = [1, 2, np.nan]
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_float(self):
|
||||
# GH21767
|
||||
codes = [1.0, 2.0, 0] # integer, but in float dtype
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat = Categorical.from_codes(codes, dtype.categories)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1"))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat = Categorical.from_codes(codes, dtype=dtype)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1"))
|
||||
|
||||
codes = [1.1, 2.0, 0] # non-integer
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype.categories)
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_dtype_raises(self):
|
||||
msg = "Cannot specify"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
def test_from_codes_neither(self):
|
||||
msg = "Both were None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1])
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories(self, dtype):
|
||||
cats = ["a", "b"]
|
||||
codes = np.array([0, 0, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes(codes, cats)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories_sorts(self, dtype):
|
||||
cats = ["b", "a"]
|
||||
codes = np.array([0, 1, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_dtype(self):
|
||||
cats = ["a", "b", "d"]
|
||||
codes = np.array([0, 1, 0, 2], dtype="i8")
|
||||
dtype = CategoricalDtype(["c", "b", "a"], ordered=True)
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_coerces(self):
|
||||
cats = ["1", "2", "bad"]
|
||||
codes = np.array([0, 0, 1, 2], dtype="i8")
|
||||
dtype = CategoricalDtype([1, 2])
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical([1, 1, 2, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [None, True, False])
|
||||
def test_construction_with_ordered(self, ordered):
|
||||
# GH 9347, 9190
|
||||
cat = Categorical([0, 1, 2], ordered=ordered)
|
||||
assert cat.ordered == bool(ordered)
|
||||
|
||||
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
|
||||
def test_constructor_imaginary(self):
|
||||
values = [1, 2, 3 + 1j]
|
||||
c1 = Categorical(values)
|
||||
tm.assert_index_equal(c1.categories, Index(values))
|
||||
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
|
||||
@@ -0,0 +1,181 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalDtypes:
|
||||
def test_is_equal_dtype(self):
|
||||
|
||||
# test dtype comparisons between cats
|
||||
|
||||
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
|
||||
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
|
||||
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
|
||||
assert c1.is_dtype_equal(c1)
|
||||
assert c2.is_dtype_equal(c2)
|
||||
assert c3.is_dtype_equal(c3)
|
||||
assert c1.is_dtype_equal(c2)
|
||||
assert not c1.is_dtype_equal(c3)
|
||||
assert not c1.is_dtype_equal(Index(list("aabca")))
|
||||
assert not c1.is_dtype_equal(c1.astype(object))
|
||||
assert c1.is_dtype_equal(CategoricalIndex(c1))
|
||||
assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
|
||||
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
|
||||
|
||||
# GH 16659
|
||||
s1 = Series(c1)
|
||||
s2 = Series(c2)
|
||||
s3 = Series(c3)
|
||||
assert c1.is_dtype_equal(s1)
|
||||
assert c2.is_dtype_equal(s2)
|
||||
assert c3.is_dtype_equal(s3)
|
||||
assert c1.is_dtype_equal(s2)
|
||||
assert not c1.is_dtype_equal(s3)
|
||||
assert not c1.is_dtype_equal(s1.astype(object))
|
||||
|
||||
def test_set_dtype_same(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, c)
|
||||
|
||||
def test_set_dtype_new_categories(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(list("abcd")))
|
||||
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||||
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_dtype_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c._set_dtype(expected.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_dtype_no_overlap(self):
|
||||
c = Categorical(["a", "b", "c"], ["d", "e"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b"]))
|
||||
expected = Categorical([None, None, None], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_codes_dtypes(self):
|
||||
|
||||
# GH 8453
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
result = Categorical(["foo{i:05d}".format(i=i) for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
result = Categorical(["foo{i:05d}".format(i=i) for i in range(40000)])
|
||||
assert result.codes.dtype == "int32"
|
||||
|
||||
# adding cats
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
result = result.add_categories(["foo{i:05d}".format(i=i) for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
# removing cats
|
||||
result = result.remove_categories(
|
||||
["foo{i:05d}".format(i=i) for i in range(300)]
|
||||
)
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype(self, ordered):
|
||||
# string
|
||||
cat = Categorical(list("abbaaccc"), ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
msg = "could not convert string to float"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.astype(float)
|
||||
|
||||
# numeric
|
||||
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(int)
|
||||
expected = np.array(cat, dtype=np.int)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(float)
|
||||
expected = np.array(cat, dtype=np.float)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("cat_ordered", [True, False])
|
||||
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||||
# GH 10696/18593
|
||||
data = list("abcaacbab")
|
||||
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, dtype=dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = cat.astype("category")
|
||||
expected = cat
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_astype_category_ordered_none_deprecated(self):
|
||||
# GH 26336
|
||||
cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
|
||||
cdt2 = CategoricalDtype(categories=list("cedafb"))
|
||||
cat = Categorical(list("abcdaba"), dtype=cdt1)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat.astype(cdt2)
|
||||
|
||||
def test_iter_python_types(self):
|
||||
# GH-19909
|
||||
cat = Categorical([1, 2])
|
||||
assert isinstance(list(cat)[0], int)
|
||||
assert isinstance(cat.tolist()[0], int)
|
||||
|
||||
def test_iter_python_types_datetime(self):
|
||||
cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
|
||||
assert isinstance(list(cat)[0], Timestamp)
|
||||
assert isinstance(cat.tolist()[0], Timestamp)
|
||||
@@ -0,0 +1,279 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
|
||||
import pandas.core.common as com
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexingWithFactor(TestCategorical):
|
||||
def test_getitem(self):
|
||||
assert self.factor[0] == "a"
|
||||
assert self.factor[-1] == "c"
|
||||
|
||||
subf = self.factor[[0, 1, 2]]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))
|
||||
|
||||
subf = self.factor[np.asarray(self.factor) == "c"]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))
|
||||
|
||||
def test_setitem(self):
|
||||
|
||||
# int/positional
|
||||
c = self.factor.copy()
|
||||
c[0] = "b"
|
||||
assert c[0] == "b"
|
||||
c[-1] = "a"
|
||||
assert c[-1] == "a"
|
||||
|
||||
# boolean
|
||||
c = self.factor.copy()
|
||||
indexer = np.zeros(len(c), dtype="bool")
|
||||
indexer[0] = True
|
||||
indexer[-1] = True
|
||||
c[indexer] = "c"
|
||||
expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
|
||||
tm.assert_categorical_equal(c, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[pd.Categorical(["b", "a"]), pd.Categorical(["b", "a"], categories=["b", "a"])],
|
||||
)
|
||||
def test_setitem_same_but_unordered(self, other):
|
||||
# GH-24142
|
||||
target = pd.Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
target[mask] = other[mask]
|
||||
expected = pd.Categorical(["b", "b"], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(target, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
pd.Categorical(["b", "a"], categories=["b", "a", "c"]),
|
||||
pd.Categorical(["b", "a"], categories=["a", "b", "c"]),
|
||||
pd.Categorical(["a", "a"], categories=["a"]),
|
||||
pd.Categorical(["b", "b"], categories=["b"]),
|
||||
],
|
||||
)
|
||||
def test_setitem_different_unordered_raises(self, other):
|
||||
# GH-24142
|
||||
target = pd.Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
with pytest.raises(ValueError):
|
||||
target[mask] = other[mask]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
pd.Categorical(["b", "a"]),
|
||||
pd.Categorical(["b", "a"], categories=["b", "a"], ordered=True),
|
||||
pd.Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
|
||||
],
|
||||
)
|
||||
def test_setitem_same_ordered_rasies(self, other):
|
||||
# Gh-24142
|
||||
target = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
mask = np.array([True, False])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
target[mask] = other[mask]
|
||||
|
||||
|
||||
class TestCategoricalIndexing:
|
||||
def test_getitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
expected = c[np.array([100000]).astype(np.int64)].codes
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
idx1 = PeriodIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
|
||||
)
|
||||
|
||||
cat1 = Categorical(idx1)
|
||||
str(cat1)
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
|
||||
tm.assert_index_equal(cat1.categories, exp_idx)
|
||||
|
||||
idx2 = PeriodIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
|
||||
)
|
||||
cat2 = Categorical(idx2, ordered=True)
|
||||
str(cat2)
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
|
||||
exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
|
||||
tm.assert_index_equal(cat2.categories, exp_idx2)
|
||||
|
||||
idx3 = PeriodIndex(
|
||||
[
|
||||
"2013-12",
|
||||
"2013-11",
|
||||
"2013-10",
|
||||
"2013-09",
|
||||
"2013-08",
|
||||
"2013-07",
|
||||
"2013-05",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
cat3 = Categorical(idx3, ordered=True)
|
||||
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(
|
||||
[
|
||||
"2013-05",
|
||||
"2013-07",
|
||||
"2013-08",
|
||||
"2013-09",
|
||||
"2013-10",
|
||||
"2013-11",
|
||||
"2013-12",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
|
||||
tm.assert_index_equal(cat3.categories, exp_idx)
|
||||
|
||||
def test_categories_assigments(self):
|
||||
s = Categorical(["a", "b", "c", "a"])
|
||||
exp = np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
s.categories = [1, 2, 3]
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp)
|
||||
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
|
||||
|
||||
# lengthen
|
||||
with pytest.raises(ValueError):
|
||||
s.categories = [1, 2, 3, 4]
|
||||
|
||||
# shorten
|
||||
with pytest.raises(ValueError):
|
||||
s.categories = [1, 2]
|
||||
|
||||
# Combinations of sorted/unique:
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
|
||||
)
|
||||
# Combinations of missing/unique
|
||||
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
|
||||
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
|
||||
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
|
||||
# GH 21448
|
||||
key = key_class(key_values, categories=range(1, 5))
|
||||
# Test for flat index and CategoricalIndex with same/different cats:
|
||||
for dtype in None, "category", key.dtype:
|
||||
idx = Index(idx_values, dtype=dtype)
|
||||
expected, exp_miss = idx.get_indexer_non_unique(key_values)
|
||||
result, res_miss = idx.get_indexer_non_unique(key)
|
||||
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
tm.assert_numpy_array_equal(exp_miss, res_miss)
|
||||
|
||||
def test_where_unobserved_nan(self):
|
||||
ser = pd.Series(pd.Categorical(["a", "b"]))
|
||||
result = ser.where([True, False])
|
||||
expected = pd.Series(pd.Categorical(["a", None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NA
|
||||
ser = pd.Series(pd.Categorical(["a", "b"]))
|
||||
result = ser.where([False, False])
|
||||
expected = pd.Series(pd.Categorical([None, None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_unobserved_categories(self):
|
||||
ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
result = ser.where([True, True, False], other="b")
|
||||
expected = pd.Series(
|
||||
Categorical(["a", "b", "b"], categories=ser.cat.categories)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_other_categorical(self):
|
||||
ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
|
||||
result = ser.where([True, False, True], other)
|
||||
expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_warns(self):
|
||||
ser = pd.Series(Categorical(["a", "b", "c"]))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.where([True, False, True], "d")
|
||||
|
||||
expected = pd.Series(np.array(["a", "d", "c"], dtype="object"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_ordered_differs_rasies(self):
|
||||
ser = pd.Series(
|
||||
Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
|
||||
)
|
||||
other = Categorical(
|
||||
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.where([True, False, True], other)
|
||||
|
||||
expected = pd.Series(np.array(["a", "c", "c"], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean(index):
|
||||
s = Series(range(3))
|
||||
idx = Categorical([True, False, True])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
assert com.is_bool_indexer(idx)
|
||||
result = s[idx]
|
||||
expected = s[idx.astype("object")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean_raises(index):
|
||||
s = Series(range(3))
|
||||
idx = Categorical([True, False, None])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
with pytest.raises(ValueError, match="NA / NaN"):
|
||||
s[idx]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_coercible_categorical(monkeypatch):
|
||||
"""
|
||||
Monkeypatch Categorical.__array__ to ensure no implicit conversion.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When Categorical.__array__ is called.
|
||||
"""
|
||||
# TODO(Categorical): identify other places where this may be
|
||||
# useful and move to a conftest.py
|
||||
def array(self, dtype=None):
|
||||
raise ValueError("I cannot be converted.")
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Categorical, "__array__", array)
|
||||
yield
|
||||
|
||||
|
||||
def test_series_at(non_coercible_categorical):
|
||||
arr = Categorical(["a", "b", "c"])
|
||||
ser = Series(arr)
|
||||
result = ser.at[0]
|
||||
assert result == "a"
|
||||
@@ -0,0 +1,82 @@
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import Categorical, Index, isna
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalMissing:
|
||||
def test_na_flags_int_categories(self):
|
||||
# #1457
|
||||
|
||||
categories = list(range(10))
|
||||
labels = np.random.randint(0, 10, 20)
|
||||
labels[::5] = -1
|
||||
|
||||
cat = Categorical(labels, categories, fastpath=True)
|
||||
repr(cat)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(cat), labels == -1)
|
||||
|
||||
def test_nan_handling(self):
|
||||
|
||||
# Nans are represented as -1 in codes
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
c[1] = np.nan
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# Adding nan to categories should make assigned nan point to the
|
||||
# category!
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
|
||||
def test_set_dtype_nans(self):
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "c"]))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
|
||||
|
||||
def test_set_item_nan(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
cat[1] = np.nan
|
||||
|
||||
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fillna_kwargs, msg",
|
||||
[
|
||||
(
|
||||
dict(value=1, method="ffill"),
|
||||
"Cannot specify both 'value' and 'method'.",
|
||||
),
|
||||
(dict(), "Must specify a fill 'value' or 'method'."),
|
||||
(dict(method="bad"), "Invalid fill method. Expecting .* bad"),
|
||||
],
|
||||
)
|
||||
def test_fillna_raises(self, fillna_kwargs, msg):
|
||||
# https://github.com/pandas-dev/pandas/issues/19682
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.fillna(**fillna_kwargs)
|
||||
|
||||
@pytest.mark.parametrize("named", [True, False])
|
||||
def test_fillna_iterable_category(self, named):
|
||||
# https://github.com/pandas-dev/pandas/issues/21097
|
||||
if named:
|
||||
Point = collections.namedtuple("Point", "x y")
|
||||
else:
|
||||
Point = lambda *args: args # tuple
|
||||
cat = Categorical([Point(0, 0), Point(0, 1), None])
|
||||
result = cat.fillna(Point(0, 0))
|
||||
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
@@ -0,0 +1,431 @@
|
||||
import operator
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, DataFrame, Series, date_range
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalOpsWithFactor(TestCategorical):
|
||||
def test_categories_none_comparisons(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(factor, self.factor)
|
||||
|
||||
def test_comparisons(self):
|
||||
result = self.factor[self.factor == "a"]
|
||||
expected = self.factor[np.asarray(self.factor) == "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor != "a"]
|
||||
expected = self.factor[np.asarray(self.factor) != "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor < "c"]
|
||||
expected = self.factor[np.asarray(self.factor) < "c"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor > "a"]
|
||||
expected = self.factor[np.asarray(self.factor) > "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor >= "b"]
|
||||
expected = self.factor[np.asarray(self.factor) >= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor <= "b"]
|
||||
expected = self.factor[np.asarray(self.factor) <= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
n = len(self.factor)
|
||||
|
||||
other = self.factor[np.random.permutation(n)]
|
||||
result = self.factor == other
|
||||
expected = np.asarray(self.factor) == np.asarray(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = self.factor == "d"
|
||||
expected = np.repeat(False, len(self.factor))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# comparisons with categoricals
|
||||
cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
|
||||
cat_rev_base = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
cat = Categorical(["a", "b", "c"], ordered=True)
|
||||
cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = np.array([True, False, False])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_rev
|
||||
|
||||
cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat_rev > cat_rev_base2
|
||||
|
||||
# Only categories with same ordering information can be compared
|
||||
cat_unorderd = cat.set_ordered(False)
|
||||
assert not (cat > cat).any()
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_unorderd
|
||||
|
||||
# comparison (in both directions) with Series will raise
|
||||
s = Series(["b", "b", "b"])
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type"
|
||||
r" <class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
# comparison with numpy.array will raise in both direction, but only on
|
||||
# newer numpy versions
|
||||
a = np.array(["b", "b", "b"])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
# Make sure that unequal comparison take the categories order in
|
||||
# account
|
||||
cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True)
|
||||
exp = np.array([True, False, False])
|
||||
res = cat_rev > "b"
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# check that zero-dim array gets unboxed
|
||||
res = cat_rev > np.array("b")
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestCategoricalOps:
|
||||
def test_compare_frame(self):
|
||||
# GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
|
||||
data = ["a", "b", 2, "a"]
|
||||
cat = Categorical(data)
|
||||
|
||||
df = DataFrame(cat)
|
||||
|
||||
for op in [
|
||||
operator.eq,
|
||||
operator.ne,
|
||||
operator.ge,
|
||||
operator.gt,
|
||||
operator.le,
|
||||
operator.lt,
|
||||
]:
|
||||
with pytest.raises(ValueError):
|
||||
# alignment raises unless we transpose
|
||||
op(cat, df)
|
||||
|
||||
result = cat == df.T
|
||||
expected = DataFrame([[True, True, True, True]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = cat[::-1] != df.T
|
||||
expected = DataFrame([[False, True, True, False]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_datetime_categorical_comparison(self):
|
||||
dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True)
|
||||
tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True]))
|
||||
|
||||
def test_reflected_comparison_with_scalars(self):
|
||||
# GH8658
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True]))
|
||||
|
||||
def test_comparison_with_unknown_scalars(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __{}__ with a scalar,"
|
||||
" which is not a category"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format("lt")):
|
||||
cat < 4
|
||||
with pytest.raises(TypeError, match=msg.format("gt")):
|
||||
cat > 4
|
||||
with pytest.raises(TypeError, match=msg.format("gt")):
|
||||
4 < cat
|
||||
with pytest.raises(TypeError, match=msg.format("lt")):
|
||||
4 > cat
|
||||
|
||||
tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False]))
|
||||
tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True]))
|
||||
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_scalar(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# BUG: fix ordered categorical comparison with missing values (#26504 )
|
||||
# and following comparisons with scalars in categories with missing
|
||||
# values should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
scalar = 2
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_listlike(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# and following comparisons of missing values in ordered Categorical
|
||||
# with listlike should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(other)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,reverse,base",
|
||||
[(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])],
|
||||
)
|
||||
def test_comparisons(self, data, reverse, base):
|
||||
cat_rev = Series(Categorical(data, categories=reverse, ordered=True))
|
||||
cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True))
|
||||
cat = Series(Categorical(data, ordered=True))
|
||||
cat_base = Series(
|
||||
Categorical(base, categories=cat.cat.categories, ordered=True)
|
||||
)
|
||||
s = Series(base)
|
||||
a = np.array(base)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = Series([True, False, False])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = Series([False, False, True])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = Series([False, False, True])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
scalar = base[1]
|
||||
res = cat > scalar
|
||||
exp = Series([False, False, True])
|
||||
exp2 = cat.values > scalar
|
||||
tm.assert_series_equal(res, exp)
|
||||
tm.assert_numpy_array_equal(res.values, exp2)
|
||||
res_rev = cat_rev > scalar
|
||||
exp_rev = Series([True, False, False])
|
||||
exp_rev2 = cat_rev.values > scalar
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_rev
|
||||
|
||||
# categorical cannot be compared to Series or numpy array, and also
|
||||
# not the other way around
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type"
|
||||
r" <class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat_rev
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ctor",
|
||||
[
|
||||
lambda *args, **kwargs: Categorical(*args, **kwargs),
|
||||
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
|
||||
],
|
||||
)
|
||||
def test_unordered_different_order_equal(self, ctor):
|
||||
# https://github.com/pandas-dev/pandas/issues/16014
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 == c2).all()
|
||||
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
result = c1 == c2
|
||||
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
|
||||
|
||||
def test_unordered_different_categories_raises(self):
|
||||
c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False)
|
||||
|
||||
with pytest.raises(TypeError, match=("Categoricals can only be compared")):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_different_lengths(self):
|
||||
c1 = Categorical([], categories=["a", "b"])
|
||||
c2 = Categorical([], categories=["a"])
|
||||
|
||||
msg = "Categories are different lengths"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_unordered_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
|
||||
# 349290078
|
||||
a = pd.Categorical(["a"], categories=["a", "b"])
|
||||
b = pd.Categorical(["b"], categories=["b", "a"])
|
||||
assert not a.equals(b)
|
||||
|
||||
def test_numeric_like_ops(self):
|
||||
|
||||
df = DataFrame({"value": np.random.randint(0, 10000, 100)})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=["value"], ascending=True)
|
||||
df["value_group"] = pd.cut(
|
||||
df.value, range(0, 10500, 500), right=False, labels=cat_labels
|
||||
)
|
||||
|
||||
# numeric ops should not succeed
|
||||
for op, str_rep in [
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
]:
|
||||
msg = r"Series cannot perform the operation {}".format(str_rep)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(df, op)(df)
|
||||
|
||||
# reduction ops should not succeed (unless specifically defined, e.g.
|
||||
# min/max)
|
||||
s = df["value_group"]
|
||||
for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]:
|
||||
msg = "Categorical cannot perform the operation {}".format(op)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(numeric_only=False)
|
||||
|
||||
# mad technically works because it takes always the numeric data
|
||||
|
||||
# numpy ops
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
with pytest.raises(TypeError):
|
||||
np.sum(s)
|
||||
|
||||
# numeric ops on a Series
|
||||
for op, str_rep in [
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
]:
|
||||
msg = r"Series cannot perform the operation {}".format(str_rep)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(2)
|
||||
|
||||
# invalid ufunc
|
||||
with pytest.raises(TypeError):
|
||||
np.log(s)
|
||||
|
||||
def test_contains(self):
|
||||
# GH21508
|
||||
c = pd.Categorical(list("aabbca"), categories=list("cab"))
|
||||
|
||||
assert "b" in c
|
||||
assert "z" not in c
|
||||
assert np.nan not in c
|
||||
with pytest.raises(TypeError):
|
||||
assert [1] in c
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in c
|
||||
assert 1 not in c
|
||||
|
||||
c = pd.Categorical(list("aabbca") + [np.nan], categories=list("cab"))
|
||||
assert np.nan in c
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(pd.Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(pd.Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(pd.Timestamp(1), False),
|
||||
(pd.Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH 23705
|
||||
cat = Categorical(pd.IntervalIndex.from_breaks(range(3)))
|
||||
result = item in cat
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
assert "a" not in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in cat
|
||||
@@ -0,0 +1,527 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Series,
|
||||
date_range,
|
||||
option_context,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalReprWithFactor(TestCategorical):
|
||||
def test_print(self):
|
||||
expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"]
|
||||
expected = "\n".join(expected)
|
||||
actual = repr(self.factor)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestCategoricalRepr:
|
||||
def test_big_print(self):
|
||||
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
|
||||
expected = [
|
||||
"[a, b, c, a, b, ..., b, c, a, b, c]",
|
||||
"Length: 600",
|
||||
"Categories (3, object): [a, b, c]",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
|
||||
actual = repr(factor)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
def test_empty_print(self):
|
||||
factor = Categorical([], ["a", "b", "c"])
|
||||
expected = "[], Categories (3, object): [a, b, c]"
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
assert expected == actual
|
||||
factor = Categorical([], ["a", "b", "c"], ordered=True)
|
||||
expected = "[], Categories (3, object): [a < b < c]"
|
||||
actual = repr(factor)
|
||||
assert expected == actual
|
||||
|
||||
factor = Categorical([], [])
|
||||
expected = "[], Categories (0, object): []"
|
||||
assert expected == repr(factor)
|
||||
|
||||
def test_print_none_width(self):
|
||||
# GH10087
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = (
|
||||
"0 1\n1 2\n2 3\n3 4\n"
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
|
||||
)
|
||||
|
||||
with option_context("display.width", None):
|
||||
assert exp == repr(a)
|
||||
|
||||
def test_unicode_print(self):
|
||||
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
|
||||
expected = """\
|
||||
[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
|
||||
Length: 60
|
||||
Categories (3, object): [aaaaa, bb, cccc]"""
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """\
|
||||
[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
# unicode option should not affect to Categorical, as it doesn't care
|
||||
# the repr width
|
||||
with option_context("display.unicode.east_asian_width", True):
|
||||
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
def test_categorical_repr(self):
|
||||
c = Categorical([1, 2, 3])
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1, 2, 3, 4, 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20))
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_ordered(self):
|
||||
c = Categorical([1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20), ordered=True)
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
c = Categorical(idx)
|
||||
|
||||
# TODO(wesm): exceeding 80 characters in the console is not good
|
||||
# behavior
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
""
|
||||
)
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
|
||||
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
|
||||
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_int_with_nan(self):
|
||||
c = Categorical([1, 2, np.nan])
|
||||
c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
|
||||
assert repr(c) == c_exp
|
||||
|
||||
s = Series([1, 2, np.nan], dtype="object").astype("category")
|
||||
s_exp = """0 1\n1 2\n2 NaN
|
||||
dtype: category
|
||||
Categories (2, int64): [1, 2]"""
|
||||
assert repr(s) == s_exp
|
||||
|
||||
def test_categorical_repr_period(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_index_repr(self):
|
||||
idx = CategoricalIndex(Categorical([1, 2, 3]))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(idx) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10)))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_ordered(self):
|
||||
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10), ordered=True))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
|
||||
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
|
||||
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period(self):
|
||||
# test all length
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=1)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=2)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=3)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx)))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
|
||||
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
|
||||
'2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
@@ -0,0 +1,124 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, Index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSort:
|
||||
def test_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=True), expected, check_dtype=False
|
||||
)
|
||||
|
||||
expected = expected[::-1]
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=False), expected, check_dtype=False
|
||||
)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
np.argsort(c, kind="mergesort"), expected, check_dtype=False
|
||||
)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, axis=0)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, order="C")
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
# unordered cats are sortable
|
||||
cat = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat.sort_values()
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"], ordered=True)
|
||||
|
||||
# sort_values
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
cat = Categorical(
|
||||
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# sort (inplace order)
|
||||
cat1 = cat.copy()
|
||||
cat1.sort_values(inplace=True)
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(cat1.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# reverse
|
||||
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
def test_sort_values_na_position(self):
|
||||
# see gh-12882
|
||||
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
|
||||
exp_categories = Index([2, 5])
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values() # default arguments
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
|
||||
res = cat.sort_values(ascending=True, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=True, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
@@ -0,0 +1,22 @@
|
||||
from pandas import Categorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSubclassing:
|
||||
def test_constructor(self):
|
||||
sc = tm.SubclassedCategorical(["a", "b", "c"])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"]))
|
||||
|
||||
def test_from_codes(self):
|
||||
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
tm.assert_categorical_equal(sc, exp)
|
||||
|
||||
def test_map(self):
|
||||
sc = tm.SubclassedCategorical(["a", "b", "c"])
|
||||
res = sc.map(lambda x: x.upper())
|
||||
assert isinstance(res, tm.SubclassedCategorical)
|
||||
exp = Categorical(["A", "B", "C"])
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
@@ -0,0 +1,29 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalWarnings:
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip("IPython", minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; c = Categorical([])"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("c.", 1))
|
||||
|
||||
def test_CategoricalAccessor_categorical_deprecation(self):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(["a", "b"], dtype="category").cat.categorical
|
||||
|
||||
def test_CategoricalAccessor_name_deprecation(self):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(["a", "b"], dtype="category").cat.name
|
||||
|
||||
def test_CategoricalAccessor_index_deprecation(self):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(["a", "b"], dtype="category").cat.index
|
||||
@@ -0,0 +1,101 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Index([0, 2, 4]), Index([1, 3, 5])),
|
||||
(Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])),
|
||||
(timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)),
|
||||
(date_range("20170101", periods=3), date_range("20170102", periods=3)),
|
||||
(
|
||||
date_range("20170101", periods=3, tz="US/Eastern"),
|
||||
date_range("20170102", periods=3, tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestAttributes:
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(0, 1),
|
||||
(Timedelta("0 days"), Timedelta("1 day")),
|
||||
(Timestamp("2018-01-01"), Timestamp("2018-01-02")),
|
||||
pytest.param(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-02", tz="US/Eastern"),
|
||||
marks=pytest.mark.xfail(strict=True, reason="GH 27011"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex])
|
||||
def test_is_empty(self, constructor, left, right, closed):
|
||||
# GH27219
|
||||
tuples = [(left, left), (left, right), np.nan]
|
||||
expected = np.array([closed != "both", False, False])
|
||||
result = constructor.from_tuples(tuples, closed=closed).is_empty
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestMethods:
|
||||
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||||
def test_set_closed(self, closed, new_closed):
|
||||
# GH 21670
|
||||
array = IntervalArray.from_breaks(range(10), closed=closed)
|
||||
result = array.set_closed(new_closed)
|
||||
expected = IntervalArray.from_breaks(range(10), closed=new_closed)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
IntervalArray.from_breaks([1, 2, 3, 4], closed="right"),
|
||||
],
|
||||
)
|
||||
def test_where_raises(self, other):
|
||||
ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
|
||||
match = "'value.closed' is 'right', expected 'left'."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
ser.where([True, False, True], other=other)
|
||||
|
||||
|
||||
class TestSetitem:
|
||||
def test_set_na(self, left_right_dtypes):
|
||||
left, right = left_right_dtypes
|
||||
result = IntervalArray.from_arrays(left, right)
|
||||
result[0] = np.nan
|
||||
|
||||
expected_left = Index([left._na_value] + list(left[1:]))
|
||||
expected_right = Index([right._na_value] + list(right[1:]))
|
||||
expected = IntervalArray.from_arrays(expected_left, expected_right)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_repr_matches():
|
||||
idx = IntervalIndex.from_breaks([1, 2, 3])
|
||||
a = repr(idx)
|
||||
b = repr(idx.values)
|
||||
assert a.replace("Index", "Array") == b
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Interval, IntervalIndex, Timedelta, Timestamp
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[IntervalArray, IntervalIndex])
|
||||
def constructor(request):
|
||||
"""
|
||||
Fixture for testing both interval container classes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Timedelta("0 days"), Timedelta("1 day")),
|
||||
(Timestamp("2018-01-01"), Timedelta("1 day")),
|
||||
(0, 1),
|
||||
],
|
||||
ids=lambda x: type(x[0]).__name__,
|
||||
)
|
||||
def start_shift(request):
|
||||
"""
|
||||
Fixture for generating intervals of different types from a start value
|
||||
and a shift value that can be added to start to generate an endpoint.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestOverlaps:
|
||||
def test_overlaps_interval(self, constructor, start_shift, closed, other_closed):
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + 3 * shift, other_closed)
|
||||
|
||||
# intervals: identical, nested, spanning, partial, adjacent, disjoint
|
||||
tuples = [
|
||||
(start, start + 3 * shift),
|
||||
(start + shift, start + 2 * shift),
|
||||
(start - shift, start + 4 * shift),
|
||||
(start + 2 * shift, start + 4 * shift),
|
||||
(start + 3 * shift, start + 4 * shift),
|
||||
(start + 4 * shift, start + 5 * shift),
|
||||
]
|
||||
interval_container = constructor.from_tuples(tuples, closed)
|
||||
|
||||
adjacent = interval.closed_right and interval_container.closed_left
|
||||
expected = np.array([True, True, True, True, adjacent, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex])
|
||||
def test_overlaps_interval_container(self, constructor, other_constructor):
|
||||
# TODO: modify this test when implemented
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
other_container = other_constructor.from_breaks(range(5))
|
||||
with pytest.raises(NotImplementedError):
|
||||
interval_container.overlaps(other_container)
|
||||
|
||||
def test_overlaps_na(self, constructor, start_shift):
|
||||
"""NA values are marked as False"""
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + shift)
|
||||
|
||||
tuples = [
|
||||
(start, start + shift),
|
||||
np.nan,
|
||||
(start + 2 * shift, start + 3 * shift),
|
||||
]
|
||||
interval_container = constructor.from_tuples(tuples)
|
||||
|
||||
expected = np.array([True, False, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_overlaps_invalid_type(self, constructor, other):
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
msg = "`other` must be Interval-like, got {other}".format(
|
||||
other=type(other).__name__
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_container.overlaps(other)
|
||||
@@ -0,0 +1,119 @@
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAccessor:
|
||||
# TODO: collect other Series accessor tests
|
||||
def test_to_dense(self):
|
||||
s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
|
||||
result = s.sparse.to_dense()
|
||||
expected = pd.Series([0, 1, 0, 10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestFrameAccessor:
|
||||
def test_accessor_raises(self):
|
||||
df = pd.DataFrame({"A": [0, 1]})
|
||||
with pytest.raises(AttributeError, match="sparse"):
|
||||
df.sparse
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
@pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int64"])
|
||||
@td.skip_if_no_scipy
|
||||
def test_from_spmatrix(self, format, labels, dtype):
|
||||
import scipy.sparse
|
||||
|
||||
sp_dtype = pd.SparseDtype(dtype, np.array(0, dtype=dtype).item())
|
||||
|
||||
mat = scipy.sparse.eye(10, format=format, dtype=dtype)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
|
||||
expected = pd.DataFrame(
|
||||
np.eye(10, dtype=dtype), index=labels, columns=labels
|
||||
).astype(sp_dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
|
||||
)
|
||||
@td.skip_if_no_scipy
|
||||
def test_from_spmatrix_columns(self, columns):
|
||||
import scipy.sparse
|
||||
|
||||
dtype = pd.SparseDtype("float64", 0.0)
|
||||
|
||||
mat = scipy.sparse.random(10, 2, density=0.5)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
|
||||
expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_to_coo(self):
|
||||
import scipy.sparse
|
||||
|
||||
df = pd.DataFrame({"A": [0, 1, 0], "B": [1, 0, 0]}, dtype="Sparse[int64, 0]")
|
||||
result = df.sparse.to_coo()
|
||||
expected = scipy.sparse.coo_matrix(np.asarray(df))
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
def test_to_dense(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)),
|
||||
"B": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)),
|
||||
"C": pd.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)),
|
||||
},
|
||||
index=["b", "a"],
|
||||
)
|
||||
result = df.sparse.to_dense()
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_density(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.SparseArray([1, 0, 2, 1], fill_value=0),
|
||||
"B": pd.SparseArray([0, 1, 1, 1], fill_value=0),
|
||||
}
|
||||
)
|
||||
res = df.sparse.density
|
||||
expected = 0.75
|
||||
assert res == expected
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "float64"])
|
||||
@pytest.mark.parametrize("dense_index", [True, False])
|
||||
@td.skip_if_no_scipy
|
||||
def test_series_from_coo(self, dtype, dense_index):
|
||||
import scipy.sparse
|
||||
|
||||
A = scipy.sparse.eye(3, format="coo", dtype=dtype)
|
||||
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
|
||||
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
expected = pd.Series(
|
||||
pd.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index
|
||||
)
|
||||
if dense_index:
|
||||
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_series_from_coo_incorrect_format_raises(self):
|
||||
# gh-26554
|
||||
import scipy.sparse
|
||||
|
||||
m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
|
||||
with pytest.raises(
|
||||
TypeError, match="Expected coo_matrix. Got csr_matrix instead."
|
||||
):
|
||||
pd.Series.sparse.from_coo(m)
|
||||
@@ -0,0 +1,506 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import ops
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=["integer", "block"])
|
||||
def kind(request):
|
||||
"""kind kwarg to pass to SparseArray/SparseSeries"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def mix(request):
|
||||
# whether to operate op(sparse, dense) instead of op(sparse, sparse)
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
class TestSparseArrayArithmetics:
|
||||
|
||||
_base = np.array
|
||||
_klass = pd.SparseArray
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
|
||||
with np.errstate(invalid="ignore", divide="ignore"):
|
||||
if op in [operator.floordiv, ops.rfloordiv]:
|
||||
# FIXME: GH#13843
|
||||
if self._base == pd.Series and a.dtype.subtype == np.dtype("int64"):
|
||||
pytest.xfail("Not defined/working. See GH#13843")
|
||||
|
||||
if mix:
|
||||
result = op(a, b_dense).to_dense()
|
||||
else:
|
||||
result = op(a, b).to_dense()
|
||||
|
||||
if op in [operator.truediv, ops.rtruediv]:
|
||||
# pandas uses future division
|
||||
expected = op(a_dense * 1.0, b_dense)
|
||||
else:
|
||||
expected = op(a_dense, b_dense)
|
||||
|
||||
if op in [operator.floordiv, ops.rfloordiv]:
|
||||
# Series sets 1//0 to np.inf, which SparseArray does not do (yet)
|
||||
mask = np.isinf(expected)
|
||||
if mask.any():
|
||||
expected[mask] = np.nan
|
||||
|
||||
self._assert(result, expected)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, self._klass)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid="ignore"):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [0, 1, 3])
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, 2])
|
||||
def test_float_scalar(
|
||||
self, kind, mix, all_arithmetic_functions, fill_value, scalar
|
||||
):
|
||||
op = all_arithmetic_functions
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=fill_value)
|
||||
self._check_numeric_ops(a, scalar, values, scalar, mix, op)
|
||||
|
||||
def test_float_scalar_comparison(self, kind):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index(self, kind, mix, all_arithmetic_functions):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_comparison(self, kind):
|
||||
# when sp_index are the same
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_different_kind(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind="integer")
|
||||
b = self._klass(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=0)
|
||||
b = self._klass(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=0)
|
||||
b = self._klass(rvalues, kind="block", fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=1)
|
||||
b = self._klass(rvalues, kind="block", fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_comparison(self, kind):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_int_array_comparison(self, kind):
|
||||
dtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_same_index(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = self._base([True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, True, True], dtype=np.bool)
|
||||
|
||||
a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_array_logical(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = self._base([True, False, True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, False, True, False, True], dtype=np.bool)
|
||||
|
||||
a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
rdtype = "int64"
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_mixed_array_comparison(self, kind):
|
||||
rdtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
|
||||
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
|
||||
|
||||
_base = pd.Series
|
||||
_klass = pd.SparseSeries
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
def test_alignment(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=0
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=np.nan
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=0
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=np.nan
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.eq, operator.add])
|
||||
def test_with_list(op):
|
||||
arr = pd.SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, pd.SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.exp])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [pd.SparseArray([0, 0, -1, 1]), pd.SparseArray([None, None, -1, 1])]
|
||||
)
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = pd.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(pd.SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.greater])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, pd.SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = pd.SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = pd.SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = pd.SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,183 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", 0),
|
||||
("float", np.nan),
|
||||
("bool", False),
|
||||
("object", np.nan),
|
||||
("datetime64[ns]", pd.NaT),
|
||||
("timedelta64[ns]", pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype("float", 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype("int", 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype("int", 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", None),
|
||||
("float", None),
|
||||
("bool", None),
|
||||
("object", None),
|
||||
("datetime64[ns]", None),
|
||||
("timedelta64[ns]", None),
|
||||
("int", np.nan),
|
||||
("float", 0),
|
||||
],
|
||||
)
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float("nan"))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(SparseDtype("float64"), SparseDtype("float32")),
|
||||
(SparseDtype("float64"), SparseDtype("float64", 0)),
|
||||
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype("float64"), np.dtype("float64")),
|
||||
],
|
||||
)
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(TypeError):
|
||||
SparseDtype.construct_from_string("not a dtype")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype("object")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[float64]", SparseDtype(np.dtype("float64"))),
|
||||
("Sparse[float32]", SparseDtype(np.dtype("float32"))),
|
||||
("Sparse[int]", SparseDtype(np.dtype("int"))),
|
||||
("Sparse[str]", SparseDtype(np.dtype("str"))),
|
||||
("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan)),
|
||||
],
|
||||
)
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected",
|
||||
[
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
],
|
||||
)
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[int]", "int"),
|
||||
("Sparse[int, 0]", "int"),
|
||||
("Sparse[int64]", "int64"),
|
||||
("Sparse[int64, 0]", "int64"),
|
||||
("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
|
||||
)
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match="fill_value in the string is not"):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected",
|
||||
[
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), str, SparseDtype(object, "1")),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
],
|
||||
)
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype",
|
||||
[(SparseDtype(float, np.nan), int), (SparseDtype(str, "abc"), int)],
|
||||
)
|
||||
def test_update_dtype_raises(original, dtype):
|
||||
with pytest.raises(ValueError):
|
||||
original.update_dtype(dtype)
|
||||
@@ -0,0 +1,601 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
TEST_LENGTH = 20
|
||||
|
||||
plain_case = dict(
|
||||
xloc=[0, 7, 15],
|
||||
xlen=[3, 5, 5],
|
||||
yloc=[2, 9, 14],
|
||||
ylen=[2, 3, 5],
|
||||
intersect_loc=[2, 9, 15],
|
||||
intersect_len=[1, 3, 4],
|
||||
)
|
||||
delete_blocks = dict(
|
||||
xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], intersect_loc=[1], intersect_len=[3]
|
||||
)
|
||||
split_blocks = dict(
|
||||
xloc=[0],
|
||||
xlen=[10],
|
||||
yloc=[0, 5],
|
||||
ylen=[3, 7],
|
||||
intersect_loc=[0, 5],
|
||||
intersect_len=[3, 5],
|
||||
)
|
||||
skip_block = dict(
|
||||
xloc=[10],
|
||||
xlen=[5],
|
||||
yloc=[0, 12],
|
||||
ylen=[5, 3],
|
||||
intersect_loc=[12],
|
||||
intersect_len=[3],
|
||||
)
|
||||
|
||||
no_intersect = dict(
|
||||
xloc=[0, 10],
|
||||
xlen=[4, 6],
|
||||
yloc=[5, 17],
|
||||
ylen=[4, 2],
|
||||
intersect_loc=[],
|
||||
intersect_len=[],
|
||||
)
|
||||
|
||||
|
||||
def check_cases(_check_case):
|
||||
def _check_case_dict(case):
|
||||
_check_case(
|
||||
case["xloc"],
|
||||
case["xlen"],
|
||||
case["yloc"],
|
||||
case["ylen"],
|
||||
case["intersect_loc"],
|
||||
case["intersect_len"],
|
||||
)
|
||||
|
||||
_check_case_dict(plain_case)
|
||||
_check_case_dict(delete_blocks)
|
||||
_check_case_dict(split_blocks)
|
||||
_check_case_dict(skip_block)
|
||||
_check_case_dict(no_intersect)
|
||||
|
||||
# one or both is empty
|
||||
_check_case([0], [5], [], [], [], [])
|
||||
_check_case([], [], [], [], [], [])
|
||||
|
||||
|
||||
class TestSparseIndexUnion:
|
||||
def test_index_make_union(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert isinstance(bresult, BlockIndex)
|
||||
tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(
|
||||
bresult.blengths, np.array(elen, dtype=np.int32)
|
||||
)
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert isinstance(iresult, IntIndex)
|
||||
tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices)
|
||||
|
||||
"""
|
||||
x: ----
|
||||
y: ----
|
||||
r: --------
|
||||
"""
|
||||
xloc = [0]
|
||||
xlen = [5]
|
||||
yloc = [5]
|
||||
ylen = [4]
|
||||
eloc = [0]
|
||||
elen = [9]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----- -----
|
||||
y: ----- --
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [5, 5]
|
||||
yloc = [2, 17]
|
||||
ylen = [5, 2]
|
||||
eloc = [0, 10, 17]
|
||||
elen = [7, 5, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------
|
||||
y: -------
|
||||
r: ----------
|
||||
"""
|
||||
xloc = [1]
|
||||
xlen = [5]
|
||||
yloc = [3]
|
||||
ylen = [5]
|
||||
eloc = [1]
|
||||
elen = [7]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4]
|
||||
ylen = [8]
|
||||
eloc = [2]
|
||||
elen = [12]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: --- -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [0, 5]
|
||||
xlen = [3, 5]
|
||||
yloc = [0]
|
||||
ylen = [7]
|
||||
eloc = [0]
|
||||
elen = [10]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: ------- ---
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4, 13]
|
||||
ylen = [8, 4]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----------------------
|
||||
y: ---- ---- ---
|
||||
r: ----------------------
|
||||
"""
|
||||
xloc = [2]
|
||||
xlen = [15]
|
||||
yloc = [4, 9, 14]
|
||||
ylen = [3, 2, 2]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ---- ---
|
||||
y: --- ---
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [3, 3]
|
||||
yloc = [5, 15]
|
||||
ylen = [2, 2]
|
||||
eloc = [0, 5, 10, 15]
|
||||
elen = [3, 2, 3, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
|
||||
def test_int_index_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect:
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self):
|
||||
def _check_correct(a, b, expected):
|
||||
result = a.intersect(b)
|
||||
assert result.equals(expected)
|
||||
|
||||
def _check_length_exc(a, longer):
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
a.intersect(longer)
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
expected = BlockIndex(TEST_LENGTH, eloc, elen)
|
||||
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
|
||||
|
||||
_check_correct(xindex, yindex, expected)
|
||||
_check_correct(
|
||||
xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index()
|
||||
)
|
||||
|
||||
_check_length_exc(xindex, longer_index)
|
||||
_check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
def test_intersect_identical(self):
|
||||
cases = [
|
||||
IntIndex(5, np.array([1, 2], dtype=np.int32)),
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
|
||||
IntIndex(0, np.array([], dtype=np.int32)),
|
||||
IntIndex(5, np.array([], dtype=np.int32)),
|
||||
]
|
||||
|
||||
for case in cases:
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon:
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_lookup(self):
|
||||
for kind in ["integer", "block"]:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
def test_lookup_array(self):
|
||||
for kind in ["integer", "block"]:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_lookup_basics(self):
|
||||
def _check(index):
|
||||
assert index.lookup(0) == -1
|
||||
assert index.lookup(5) == 0
|
||||
assert index.lookup(7) == 2
|
||||
assert index.lookup(8) == -1
|
||||
assert index.lookup(9) == -1
|
||||
assert index.lookup(10) == -1
|
||||
assert index.lookup(11) == -1
|
||||
assert index.lookup(12) == 3
|
||||
assert index.lookup(17) == 8
|
||||
assert index.lookup(18) == -1
|
||||
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
iindex = bindex.to_int_index()
|
||||
|
||||
_check(bindex)
|
||||
_check(iindex)
|
||||
|
||||
# corner cases
|
||||
|
||||
|
||||
class TestBlockIndex:
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_make_block_boundary(self):
|
||||
for i in [5, 10, 100, 101]:
|
||||
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block")
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
# TODO: index variables are not used...is that right?
|
||||
index = BlockIndex(0, locs, lengths) # noqa
|
||||
|
||||
# also OK even though empty
|
||||
index = BlockIndex(1, locs, lengths) # noqa
|
||||
|
||||
msg = "Block 0 extends beyond end"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [5], [10])
|
||||
|
||||
msg = "Block 0 overlaps"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex:
|
||||
def test_check_integrity(self):
|
||||
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators:
|
||||
def _op_tests(self, sparse_op, python_op):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10.0 + 1
|
||||
y = np.arange(yindex.npoints) * 100.0 + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(
|
||||
x, xindex, xfill, y, yindex, yfill
|
||||
)
|
||||
result_int_vals, ri_index, ifill = sparse_op(
|
||||
x, xdindex, xfill, y, ydindex, yfill
|
||||
)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals, series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
@pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
|
||||
def test_op(self, opname):
|
||||
sparse_op = getattr(splib, "sparse_{opname}_float64".format(opname=opname))
|
||||
python_op = getattr(operator, opname)
|
||||
self._op_tests(sparse_op, python_op)
|
||||
@@ -0,0 +1,360 @@
|
||||
import datetime
|
||||
import decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas.core.dtypes.dtypes import registry
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import register_extension_dtype
|
||||
from pandas.api.types import is_scalar
|
||||
from pandas.core.arrays import PandasArray, integer_array, period_array
|
||||
from pandas.tests.extension.decimal import DecimalArray, DecimalDtype, to_decimal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype, expected",
|
||||
[
|
||||
# Basic NumPy defaults.
|
||||
([1, 2], None, PandasArray(np.array([1, 2]))),
|
||||
([1, 2], object, PandasArray(np.array([1, 2], dtype=object))),
|
||||
(
|
||||
[1, 2],
|
||||
np.dtype("float32"),
|
||||
PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
|
||||
),
|
||||
(np.array([1, 2]), None, PandasArray(np.array([1, 2]))),
|
||||
# String alias passes through to NumPy
|
||||
([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
|
||||
# Period alias
|
||||
(
|
||||
[pd.Period("2000", "D"), pd.Period("2001", "D")],
|
||||
"Period[D]",
|
||||
period_array(["2000", "2001"], freq="D"),
|
||||
),
|
||||
# Period dtype
|
||||
(
|
||||
[pd.Period("2000", "D")],
|
||||
pd.PeriodDtype("D"),
|
||||
period_array(["2000"], freq="D"),
|
||||
),
|
||||
# Datetime (naive)
|
||||
(
|
||||
[1, 2],
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
np.array([1, 2], dtype="datetime64[ns]")
|
||||
),
|
||||
),
|
||||
(
|
||||
np.array([1, 2], dtype="datetime64[ns]"),
|
||||
None,
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
np.array([1, 2], dtype="datetime64[ns]")
|
||||
),
|
||||
),
|
||||
(
|
||||
pd.DatetimeIndex(["2000", "2001"]),
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
|
||||
),
|
||||
(
|
||||
pd.DatetimeIndex(["2000", "2001"]),
|
||||
None,
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
|
||||
),
|
||||
(
|
||||
["2000", "2001"],
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
|
||||
),
|
||||
# Datetime (tz-aware)
|
||||
(
|
||||
["2000", "2001"],
|
||||
pd.DatetimeTZDtype(tz="CET"),
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
|
||||
),
|
||||
),
|
||||
# Timedelta
|
||||
(
|
||||
["1H", "2H"],
|
||||
np.dtype("timedelta64[ns]"),
|
||||
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
|
||||
),
|
||||
(
|
||||
pd.TimedeltaIndex(["1H", "2H"]),
|
||||
np.dtype("timedelta64[ns]"),
|
||||
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
|
||||
),
|
||||
(
|
||||
pd.TimedeltaIndex(["1H", "2H"]),
|
||||
None,
|
||||
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
|
||||
),
|
||||
# Category
|
||||
(["a", "b"], "category", pd.Categorical(["a", "b"])),
|
||||
(
|
||||
["a", "b"],
|
||||
pd.CategoricalDtype(None, ordered=True),
|
||||
pd.Categorical(["a", "b"], ordered=True),
|
||||
),
|
||||
# Interval
|
||||
(
|
||||
[pd.Interval(1, 2), pd.Interval(3, 4)],
|
||||
"interval",
|
||||
pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)]),
|
||||
),
|
||||
# Sparse
|
||||
([0, 1], "Sparse[int64]", pd.SparseArray([0, 1], dtype="int64")),
|
||||
# IntegerNA
|
||||
([1, None], "Int16", integer_array([1, None], dtype="Int16")),
|
||||
(pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
|
||||
# Index
|
||||
(pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
|
||||
# Series[EA] returns the EA
|
||||
(
|
||||
pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
|
||||
None,
|
||||
pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
|
||||
),
|
||||
# "3rd party" EAs work
|
||||
([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
|
||||
# pass an ExtensionArray, but a different dtype
|
||||
(
|
||||
period_array(["2000", "2001"], freq="D"),
|
||||
"category",
|
||||
pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_array(data, dtype, expected):
|
||||
result = pd.array(data, dtype=dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_copy():
|
||||
a = np.array([1, 2])
|
||||
# default is to copy
|
||||
b = pd.array(a)
|
||||
assert np.shares_memory(a, b._ndarray) is False
|
||||
|
||||
# copy=True
|
||||
b = pd.array(a, copy=True)
|
||||
assert np.shares_memory(a, b._ndarray) is False
|
||||
|
||||
# copy=False
|
||||
b = pd.array(a, copy=False)
|
||||
assert np.shares_memory(a, b._ndarray) is True
|
||||
|
||||
|
||||
cet = pytz.timezone("CET")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected",
|
||||
[
|
||||
# period
|
||||
(
|
||||
[pd.Period("2000", "D"), pd.Period("2001", "D")],
|
||||
period_array(["2000", "2001"], freq="D"),
|
||||
),
|
||||
# interval
|
||||
(
|
||||
[pd.Interval(0, 1), pd.Interval(1, 2)],
|
||||
pd.arrays.IntervalArray.from_breaks([0, 1, 2]),
|
||||
),
|
||||
# datetime
|
||||
(
|
||||
[pd.Timestamp("2000"), pd.Timestamp("2001")],
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
|
||||
),
|
||||
(
|
||||
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
|
||||
),
|
||||
(
|
||||
np.array([1, 2], dtype="M8[ns]"),
|
||||
pd.arrays.DatetimeArray(np.array([1, 2], dtype="M8[ns]")),
|
||||
),
|
||||
(
|
||||
np.array([1, 2], dtype="M8[us]"),
|
||||
pd.arrays.DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")),
|
||||
),
|
||||
# datetimetz
|
||||
(
|
||||
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
|
||||
),
|
||||
),
|
||||
(
|
||||
[
|
||||
datetime.datetime(2000, 1, 1, tzinfo=cet),
|
||||
datetime.datetime(2001, 1, 1, tzinfo=cet),
|
||||
],
|
||||
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"], tz=cet),
|
||||
),
|
||||
# timedelta
|
||||
(
|
||||
[pd.Timedelta("1H"), pd.Timedelta("2H")],
|
||||
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
|
||||
),
|
||||
(
|
||||
np.array([1, 2], dtype="m8[ns]"),
|
||||
pd.arrays.TimedeltaArray(np.array([1, 2], dtype="m8[ns]")),
|
||||
),
|
||||
(
|
||||
np.array([1, 2], dtype="m8[us]"),
|
||||
pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_array_inference(data, expected):
|
||||
result = pd.array(data)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
# mix of frequencies
|
||||
[pd.Period("2000", "D"), pd.Period("2001", "A")],
|
||||
# mix of closed
|
||||
[pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")],
|
||||
# Mix of timezones
|
||||
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
|
||||
# Mix of tz-aware and tz-naive
|
||||
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
|
||||
np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]),
|
||||
],
|
||||
)
|
||||
def test_array_inference_fails(data):
|
||||
result = pd.array(data)
|
||||
expected = PandasArray(np.array(data, dtype=object))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data", [np.array([[1, 2], [3, 4]]), [[1, 2], [3, 4]]])
|
||||
def test_nd_raises(data):
|
||||
with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"):
|
||||
pd.array(data)
|
||||
|
||||
|
||||
def test_scalar_raises():
|
||||
with pytest.raises(ValueError, match="Cannot pass scalar '1'"):
|
||||
pd.array(1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# A couple dummy classes to ensure that Series and Indexes are unboxed before
|
||||
# getting to the EA classes.
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DecimalDtype2(DecimalDtype):
|
||||
name = "decimal2"
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return DecimalArray2
|
||||
|
||||
|
||||
class DecimalArray2(DecimalArray):
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
if isinstance(scalars, (pd.Series, pd.Index)):
|
||||
raise TypeError
|
||||
|
||||
return super()._from_sequence(scalars, dtype=dtype, copy=copy)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, pd.Index])
|
||||
def test_array_unboxes(box):
|
||||
data = box([decimal.Decimal("1"), decimal.Decimal("2")])
|
||||
# make sure it works
|
||||
with pytest.raises(TypeError):
|
||||
DecimalArray2._from_sequence(data)
|
||||
|
||||
result = pd.array(data, dtype="decimal2")
|
||||
expected = DecimalArray2._from_sequence(data.values)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def registry_without_decimal():
|
||||
idx = registry.dtypes.index(DecimalDtype)
|
||||
registry.dtypes.pop(idx)
|
||||
yield
|
||||
registry.dtypes.append(DecimalDtype)
|
||||
|
||||
|
||||
def test_array_not_registered(registry_without_decimal):
|
||||
# check we aren't on it
|
||||
assert registry.find("decimal") is None
|
||||
data = [decimal.Decimal("1"), decimal.Decimal("2")]
|
||||
|
||||
result = pd.array(data, dtype=DecimalDtype)
|
||||
expected = DecimalArray._from_sequence(data)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
class TestArrayAnalytics:
|
||||
def test_searchsorted(self, string_dtype):
|
||||
arr = pd.array(["a", "b", "c"], dtype=string_dtype)
|
||||
|
||||
result = arr.searchsorted("a", side="left")
|
||||
assert is_scalar(result)
|
||||
assert result == 0
|
||||
|
||||
result = arr.searchsorted("a", side="right")
|
||||
assert is_scalar(result)
|
||||
assert result == 1
|
||||
|
||||
def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype):
|
||||
arr = pd.array([1, 3, 90], dtype=any_real_dtype)
|
||||
result = arr.searchsorted(30)
|
||||
assert is_scalar(result)
|
||||
assert result == 2
|
||||
|
||||
result = arr.searchsorted([30])
|
||||
expected = np.array([2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_searchsorted_numeric_dtypes_vector(self, any_real_dtype):
|
||||
arr = pd.array([1, 3, 90], dtype=any_real_dtype)
|
||||
result = arr.searchsorted([2, 30])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, val",
|
||||
[
|
||||
[
|
||||
pd.date_range("20120101", periods=10, freq="2D"),
|
||||
pd.Timestamp("20120102"),
|
||||
],
|
||||
[
|
||||
pd.date_range("20120101", periods=10, freq="2D", tz="Asia/Hong_Kong"),
|
||||
pd.Timestamp("20120102", tz="Asia/Hong_Kong"),
|
||||
],
|
||||
[
|
||||
pd.timedelta_range(start="1 day", end="10 days", periods=10),
|
||||
pd.Timedelta("2 days"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_search_sorted_datetime64_scalar(self, arr, val):
|
||||
arr = pd.array(arr)
|
||||
result = arr.searchsorted(val)
|
||||
assert is_scalar(result)
|
||||
assert result == 1
|
||||
|
||||
def test_searchsorted_sorter(self, any_real_dtype):
|
||||
arr = pd.array([3, 1, 2], dtype=any_real_dtype)
|
||||
result = arr.searchsorted([0, 3], sorter=np.argsort(arr))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,701 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
# TODO: more freq variants
|
||||
@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"])
|
||||
def period_index(request):
|
||||
"""
|
||||
A fixture to provide PeriodIndex objects with different frequencies.
|
||||
|
||||
Most PeriodArray behavior is already tested in PeriodIndex tests,
|
||||
so here we just test that the PeriodArray behavior matches
|
||||
the PeriodIndex behavior.
|
||||
"""
|
||||
freqstr = request.param
|
||||
# TODO: non-monotone indexes; NaTs, different start dates
|
||||
pi = pd.period_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr)
|
||||
return pi
|
||||
|
||||
|
||||
@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"])
|
||||
def datetime_index(request):
|
||||
"""
|
||||
A fixture to provide DatetimeIndex objects with different frequencies.
|
||||
|
||||
Most DatetimeArray behavior is already tested in DatetimeIndex tests,
|
||||
so here we just test that the DatetimeArray behavior matches
|
||||
the DatetimeIndex behavior.
|
||||
"""
|
||||
freqstr = request.param
|
||||
# TODO: non-monotone indexes; NaTs, different start dates, timezones
|
||||
pi = pd.date_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr)
|
||||
return pi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def timedelta_index(request):
|
||||
"""
|
||||
A fixture to provide TimedeltaIndex objects with different frequencies.
|
||||
Most TimedeltaArray behavior is already tested in TimedeltaIndex tests,
|
||||
so here we just test that the TimedeltaArray behavior matches
|
||||
the TimedeltaIndex behavior.
|
||||
"""
|
||||
# TODO: flesh this out
|
||||
return pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])
|
||||
|
||||
|
||||
class SharedTests:
|
||||
index_cls = None
|
||||
|
||||
def test_compare_len1_raises(self):
|
||||
# make sure we raise when comparing with different lengths, specific
|
||||
# to the case where one has length-1, which numpy would broadcast
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq="D")
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
arr == arr[:1]
|
||||
|
||||
# test the index classes while we're at it, GH#23078
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
idx <= idx[[0]]
|
||||
|
||||
def test_take(self):
|
||||
data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
np.random.shuffle(data)
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq="D")
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
takers = [1, 4, 94]
|
||||
result = arr.take(takers)
|
||||
expected = idx.take(takers)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
takers = np.array([1, 4, 94])
|
||||
result = arr.take(takers)
|
||||
expected = idx.take(takers)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
def test_take_fill(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq="D")
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=None)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True, fill_value=2)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True, fill_value=2.0)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True, fill_value=pd.Timestamp.now().time)
|
||||
|
||||
def test_concat_same_type(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq="D").insert(0, pd.NaT)
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
result = arr._concat_same_type([arr[:-1], arr[1:], arr])
|
||||
expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
def test_unbox_scalar(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
result = arr._unbox_scalar(arr[0])
|
||||
assert isinstance(result, int)
|
||||
|
||||
result = arr._unbox_scalar(pd.NaT)
|
||||
assert isinstance(result, int)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr._unbox_scalar("foo")
|
||||
|
||||
def test_check_compatible_with(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
|
||||
arr._check_compatible_with(arr[0])
|
||||
arr._check_compatible_with(arr[:1])
|
||||
arr._check_compatible_with(pd.NaT)
|
||||
|
||||
def test_scalar_from_string(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
result = arr._scalar_from_string(str(arr[0]))
|
||||
assert result == arr[0]
|
||||
|
||||
def test_reduce_invalid(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform"):
|
||||
arr._reduce("not a method")
|
||||
|
||||
@pytest.mark.parametrize("method", ["pad", "backfill"])
|
||||
def test_fillna_method_doesnt_change_orig(self, method):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
arr[4] = pd.NaT
|
||||
|
||||
fill_value = arr[3] if method == "pad" else arr[5]
|
||||
|
||||
result = arr.fillna(method=method)
|
||||
assert result[4] == fill_value
|
||||
|
||||
# check that the original was not changed
|
||||
assert arr[4] is pd.NaT
|
||||
|
||||
def test_searchsorted(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
|
||||
# scalar
|
||||
result = arr.searchsorted(arr[1])
|
||||
assert result == 1
|
||||
|
||||
result = arr.searchsorted(arr[2], side="right")
|
||||
assert result == 3
|
||||
|
||||
# own-type
|
||||
result = arr.searchsorted(arr[1:3])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.searchsorted(arr[1:3], side="right")
|
||||
expected = np.array([2, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Following numpy convention, NaT goes at the beginning
|
||||
# (unlike NaN which goes at the end)
|
||||
result = arr.searchsorted(pd.NaT)
|
||||
assert result == 0
|
||||
|
||||
def test_setitem(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
|
||||
arr[0] = arr[1]
|
||||
expected = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
expected[0] = expected[1]
|
||||
|
||||
tm.assert_numpy_array_equal(arr.asi8, expected)
|
||||
|
||||
arr[:2] = arr[-2:]
|
||||
expected[:2] = expected[-2:]
|
||||
tm.assert_numpy_array_equal(arr.asi8, expected)
|
||||
|
||||
def test_setitem_raises(self):
|
||||
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
|
||||
arr = self.array_cls(data, freq="D")
|
||||
val = arr[0]
|
||||
|
||||
with pytest.raises(IndexError, match="index 12 is out of bounds"):
|
||||
arr[12] = val
|
||||
|
||||
with pytest.raises(TypeError, match="'value' should be a.* 'object'"):
|
||||
arr[0] = object()
|
||||
|
||||
|
||||
class TestDatetimeArray(SharedTests):
|
||||
index_cls = pd.DatetimeIndex
|
||||
array_cls = DatetimeArray
|
||||
|
||||
def test_round(self, tz_naive_fixture):
|
||||
# GH#24064
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01 01:01:00", periods=3, freq="H", tz=tz)
|
||||
|
||||
result = dti.round(freq="2T")
|
||||
expected = dti - pd.Timedelta(minutes=1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, datetime_index):
|
||||
arr = DatetimeArray(datetime_index)
|
||||
|
||||
# default asarray gives the same underlying data (for tz naive)
|
||||
result = np.asarray(arr)
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# specifying M8[ns] gives the same result as default
|
||||
result = np.asarray(arr, dtype="datetime64[ns]")
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype="datetime64[ns]", copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype="datetime64[ns]")
|
||||
assert result is not expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype
|
||||
result = np.asarray(arr, dtype=object)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtype always copies
|
||||
result = np.asarray(arr, dtype="int64")
|
||||
assert result is not arr.asi8
|
||||
assert not np.may_share_memory(arr, result)
|
||||
expected = arr.asi8.copy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# other dtypes handled by numpy
|
||||
for dtype in ["float64", str]:
|
||||
result = np.asarray(arr, dtype=dtype)
|
||||
expected = np.asarray(arr).astype(dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_object_dtype(self, tz_naive_fixture):
|
||||
# GH#23524
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = np.array(list(dti))
|
||||
|
||||
result = np.array(arr, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also test the DatetimeIndex method while we're at it
|
||||
result = np.array(dti, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_tz(self, tz_naive_fixture):
|
||||
# GH#23524
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.asi8.view("M8[ns]")
|
||||
result = np.array(arr, dtype="M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.array(arr, dtype="datetime64[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# check that we are not making copies when setting copy=False
|
||||
result = np.array(arr, dtype="M8[ns]", copy=False)
|
||||
assert result.base is expected.base
|
||||
assert result.base is not None
|
||||
result = np.array(arr, dtype="datetime64[ns]", copy=False)
|
||||
assert result.base is expected.base
|
||||
assert result.base is not None
|
||||
|
||||
def test_array_i8_dtype(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.asi8
|
||||
result = np.array(arr, dtype="i8")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.array(arr, dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# check that we are still making copies when setting copy=False
|
||||
result = np.array(arr, dtype="i8", copy=False)
|
||||
assert result.base is not expected.base
|
||||
assert result.base is None
|
||||
|
||||
def test_from_array_keeps_base(self):
|
||||
# Ensure that DatetimeArray._data.base isn't lost.
|
||||
arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
||||
dta = DatetimeArray(arr)
|
||||
|
||||
assert dta._data is arr
|
||||
dta = DatetimeArray(arr[:0])
|
||||
assert dta._data.base is arr
|
||||
|
||||
def test_from_dti(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
assert list(dti) == list(arr)
|
||||
|
||||
# Check that Index.__new__ knows what to do with DatetimeArray
|
||||
dti2 = pd.Index(arr)
|
||||
assert isinstance(dti2, pd.DatetimeIndex)
|
||||
assert list(dti2) == list(arr)
|
||||
|
||||
def test_astype_object(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
asobj = arr.astype("O")
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == "O"
|
||||
assert list(asobj) == list(dti)
|
||||
|
||||
@pytest.mark.parametrize("freqstr", ["D", "B", "W", "M", "Q", "Y"])
|
||||
def test_to_perioddelta(self, datetime_index, freqstr):
|
||||
# GH#23113
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.to_perioddelta(freq=freqstr)
|
||||
result = arr.to_perioddelta(freq=freqstr)
|
||||
assert isinstance(result, TimedeltaArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize("freqstr", ["D", "B", "W", "M", "Q", "Y"])
|
||||
def test_to_period(self, datetime_index, freqstr):
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.to_period(freq=freqstr)
|
||||
result = arr.to_period(freq=freqstr)
|
||||
assert isinstance(result, PeriodArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize("propname", pd.DatetimeIndex._bool_ops)
|
||||
def test_bool_properties(self, datetime_index, propname):
|
||||
# in this case _bool_ops is just `is_leap_year`
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
assert dti.freq == arr.freq
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("propname", pd.DatetimeIndex._field_ops)
|
||||
def test_int_properties(self, datetime_index, propname):
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_take_fill_valid(self, datetime_index, tz_naive_fixture):
|
||||
dti = datetime_index.tz_localize(tz_naive_fixture)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
now = pd.Timestamp.now().tz_localize(dti.tz)
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=now)
|
||||
assert result[0] == now
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Timedelta invalid
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=now - now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Period invalid
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=pd.Period("2014Q1"))
|
||||
|
||||
tz = None if dti.tz is not None else "US/Eastern"
|
||||
now = pd.Timestamp.now().tz_localize(tz)
|
||||
with pytest.raises(TypeError):
|
||||
# Timestamp with mismatched tz-awareness
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# require NaT, not iNaT, as it could be confused with an integer
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT.value)
|
||||
|
||||
def test_concat_same_type_invalid(self, datetime_index):
|
||||
# different timezones
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
if arr.tz is None:
|
||||
other = arr.tz_localize("UTC")
|
||||
else:
|
||||
other = arr.tz_localize(None)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
arr._concat_same_type([arr, other])
|
||||
|
||||
def test_concat_same_type_different_freq(self):
|
||||
# we *can* concatenate DTI with different freqs.
|
||||
a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
|
||||
b = DatetimeArray(pd.date_range("2000", periods=2, freq="H", tz="US/Central"))
|
||||
result = DatetimeArray._concat_same_type([a, b])
|
||||
expected = DatetimeArray(
|
||||
pd.to_datetime(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-02 00:00:00",
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 01:00:00",
|
||||
]
|
||||
).tz_localize("US/Central")
|
||||
)
|
||||
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestTimedeltaArray(SharedTests):
|
||||
index_cls = pd.TimedeltaIndex
|
||||
array_cls = TimedeltaArray
|
||||
|
||||
def test_from_tdi(self):
|
||||
tdi = pd.TimedeltaIndex(["1 Day", "3 Hours"])
|
||||
arr = TimedeltaArray(tdi)
|
||||
assert list(arr) == list(tdi)
|
||||
|
||||
# Check that Index.__new__ knows what to do with TimedeltaArray
|
||||
tdi2 = pd.Index(arr)
|
||||
assert isinstance(tdi2, pd.TimedeltaIndex)
|
||||
assert list(tdi2) == list(arr)
|
||||
|
||||
def test_astype_object(self):
|
||||
tdi = pd.TimedeltaIndex(["1 Day", "3 Hours"])
|
||||
arr = TimedeltaArray(tdi)
|
||||
asobj = arr.astype("O")
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == "O"
|
||||
assert list(asobj) == list(tdi)
|
||||
|
||||
def test_to_pytimedelta(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
expected = tdi.to_pytimedelta()
|
||||
result = arr.to_pytimedelta()
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_total_seconds(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
expected = tdi.total_seconds()
|
||||
result = arr.total_seconds()
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
@pytest.mark.parametrize("propname", pd.TimedeltaIndex._field_ops)
|
||||
def test_int_properties(self, timedelta_index, propname):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(tdi, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, timedelta_index):
|
||||
arr = TimedeltaArray(timedelta_index)
|
||||
|
||||
# default asarray gives the same underlying data
|
||||
result = np.asarray(arr)
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# specifying m8[ns] gives the same result as default
|
||||
result = np.asarray(arr, dtype="timedelta64[ns]")
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype="timedelta64[ns]", copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype="timedelta64[ns]")
|
||||
assert result is not expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype
|
||||
result = np.asarray(arr, dtype=object)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtype always copies
|
||||
result = np.asarray(arr, dtype="int64")
|
||||
assert result is not arr.asi8
|
||||
assert not np.may_share_memory(arr, result)
|
||||
expected = arr.asi8.copy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# other dtypes handled by numpy
|
||||
for dtype in ["float64", str]:
|
||||
result = np.asarray(arr, dtype=dtype)
|
||||
expected = np.asarray(arr).astype(dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_take_fill_valid(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
td1 = pd.Timedelta(days=1)
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=td1)
|
||||
assert result[0] == td1
|
||||
|
||||
now = pd.Timestamp.now()
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Timestamp invalid
|
||||
arr.take([0, 1], allow_fill=True, fill_value=now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Period invalid
|
||||
arr.take([0, 1], allow_fill=True, fill_value=now.to_period("D"))
|
||||
|
||||
|
||||
class TestPeriodArray(SharedTests):
|
||||
index_cls = pd.PeriodIndex
|
||||
array_cls = PeriodArray
|
||||
|
||||
def test_from_pi(self, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
assert list(arr) == list(pi)
|
||||
|
||||
# Check that Index.__new__ knows what to do with PeriodArray
|
||||
pi2 = pd.Index(arr)
|
||||
assert isinstance(pi2, pd.PeriodIndex)
|
||||
assert list(pi2) == list(arr)
|
||||
|
||||
def test_astype_object(self, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
asobj = arr.astype("O")
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == "O"
|
||||
assert list(asobj) == list(pi)
|
||||
|
||||
@pytest.mark.parametrize("how", ["S", "E"])
|
||||
def test_to_timestamp(self, how, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
expected = DatetimeArray(pi.to_timestamp(how=how))
|
||||
result = arr.to_timestamp(how=how)
|
||||
assert isinstance(result, DatetimeArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize("propname", PeriodArray._bool_ops)
|
||||
def test_bool_properties(self, period_index, propname):
|
||||
# in this case _bool_ops is just `is_leap_year`
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(pi, propname))
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("propname", PeriodArray._field_ops)
|
||||
def test_int_properties(self, period_index, propname):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(pi, propname))
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, period_index):
|
||||
arr = PeriodArray(period_index)
|
||||
|
||||
# default asarray gives objects
|
||||
result = np.asarray(arr)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype (same as default)
|
||||
result = np.asarray(arr, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtypes
|
||||
with pytest.raises(TypeError):
|
||||
np.asarray(arr, dtype="int64")
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
np.asarray(arr, dtype="float64")
|
||||
|
||||
result = np.asarray(arr, dtype="S20")
|
||||
expected = np.asarray(arr).astype("S20")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array,casting_nats",
|
||||
[
|
||||
(
|
||||
pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
|
||||
(pd.NaT, np.timedelta64("NaT", "ns")),
|
||||
),
|
||||
(
|
||||
pd.date_range("2000-01-01", periods=3, freq="D")._data,
|
||||
(pd.NaT, np.datetime64("NaT", "ns")),
|
||||
),
|
||||
(pd.period_range("2000-01-01", periods=3, freq="D")._data, (pd.NaT,)),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_casting_nat_setitem_array(array, casting_nats):
|
||||
expected = type(array)._from_sequence([pd.NaT, array[1], array[2]])
|
||||
|
||||
for nat in casting_nats:
|
||||
arr = array.copy()
|
||||
arr[0] = nat
|
||||
tm.assert_equal(arr, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array,non_casting_nats",
|
||||
[
|
||||
(
|
||||
pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
|
||||
(np.datetime64("NaT", "ns"),),
|
||||
),
|
||||
(
|
||||
pd.date_range("2000-01-01", periods=3, freq="D")._data,
|
||||
(np.timedelta64("NaT", "ns"),),
|
||||
),
|
||||
(
|
||||
pd.period_range("2000-01-01", periods=3, freq="D")._data,
|
||||
(np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")),
|
||||
),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_invalid_nat_setitem_array(array, non_casting_nats):
|
||||
for nat in non_casting_nats:
|
||||
with pytest.raises(TypeError):
|
||||
array[0] = nat
|
||||
@@ -0,0 +1,314 @@
|
||||
"""
|
||||
Tests for DatetimeArray
|
||||
"""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.core.arrays.datetimes import sequence_to_dt64ns
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeArrayConstructor:
|
||||
def test_only_1dim_accepted(self):
|
||||
arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")
|
||||
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 2-dim
|
||||
DatetimeArray(arr.reshape(2, 2))
|
||||
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 0-dim
|
||||
DatetimeArray(arr[[0]].squeeze())
|
||||
|
||||
def test_freq_validation(self):
|
||||
# GH#24623 check that invalid instances cannot be created with the
|
||||
# public constructor
|
||||
arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9
|
||||
|
||||
msg = (
|
||||
"Inferred frequency H from passed values does not "
|
||||
"conform to passed frequency W-SUN"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, freq="W")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"meth",
|
||||
[
|
||||
DatetimeArray._from_sequence,
|
||||
sequence_to_dt64ns,
|
||||
pd.to_datetime,
|
||||
pd.DatetimeIndex,
|
||||
],
|
||||
)
|
||||
def test_mixing_naive_tzaware_raises(self, meth):
|
||||
# GH#24569
|
||||
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
|
||||
|
||||
msg = (
|
||||
"Cannot mix tz-aware with tz-naive values|"
|
||||
"Tz-aware datetime.datetime cannot be converted "
|
||||
"to datetime64 unless utc=True"
|
||||
)
|
||||
|
||||
for obj in [arr, arr[::-1]]:
|
||||
# check that we raise regardless of whether naive is found
|
||||
# before aware or vice-versa
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
meth(obj)
|
||||
|
||||
def test_from_pandas_array(self):
|
||||
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9
|
||||
|
||||
result = DatetimeArray._from_sequence(arr, freq="infer")
|
||||
|
||||
expected = pd.date_range("1970-01-01", periods=5, freq="H")._data
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
def test_mismatched_timezone_raises(self):
|
||||
arr = DatetimeArray(
|
||||
np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
|
||||
dtype=DatetimeTZDtype(tz="US/Central"),
|
||||
)
|
||||
dtype = DatetimeTZDtype(tz="US/Eastern")
|
||||
with pytest.raises(TypeError, match="Timezone of the array"):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
def test_non_array_raises(self):
|
||||
with pytest.raises(ValueError, match="list"):
|
||||
DatetimeArray([1, 2, 3])
|
||||
|
||||
def test_other_type_raises(self):
|
||||
with pytest.raises(
|
||||
ValueError, match="The dtype of 'values' is incorrect.*bool"
|
||||
):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="bool"))
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
|
||||
|
||||
def test_freq_infer_raises(self):
|
||||
with pytest.raises(ValueError, match="Frequency inference"):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype="M8[ns]")
|
||||
arr = DatetimeArray(data, copy=False)
|
||||
assert arr._data is data
|
||||
|
||||
arr = DatetimeArray(data, copy=True)
|
||||
assert arr._data is not data
|
||||
|
||||
|
||||
class TestDatetimeArrayComparisons:
|
||||
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
|
||||
# sufficiently robust
|
||||
|
||||
def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
|
||||
# arbitrary tz-naive DatetimeIndex
|
||||
opname = all_compare_operators.strip("_")
|
||||
op = getattr(operator, opname)
|
||||
|
||||
dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
|
||||
arr = DatetimeArray(dti)
|
||||
assert arr.freq == dti.freq
|
||||
assert arr.tz == dti.tz
|
||||
|
||||
right = dti
|
||||
|
||||
expected = np.ones(len(arr), dtype=bool)
|
||||
if opname in ["ne", "gt", "lt"]:
|
||||
# for these the comparisons should be all-False
|
||||
expected = ~expected
|
||||
|
||||
result = op(arr, arr)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
for other in [right, np.array(right)]:
|
||||
# TODO: add list and tuple, and object-dtype once those
|
||||
# are fixed in the constructor
|
||||
result = op(arr, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = op(other, arr)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestDatetimeArray:
|
||||
def test_astype_to_same(self):
|
||||
arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
|
||||
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
|
||||
assert result is arr
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
||||
def test_astype_int(self, dtype):
|
||||
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == "u":
|
||||
expected_dtype = np.dtype("uint64")
|
||||
else:
|
||||
expected_dtype = np.dtype("int64")
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_tz_setter_raises(self):
|
||||
arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
|
||||
with pytest.raises(AttributeError, match="tz_localize"):
|
||||
arr.tz = "UTC"
|
||||
|
||||
def test_setitem_different_tz_raises(self):
|
||||
data = np.array([1, 2, 3], dtype="M8[ns]")
|
||||
arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central"))
|
||||
with pytest.raises(ValueError, match="None"):
|
||||
arr[0] = pd.Timestamp("2000")
|
||||
|
||||
with pytest.raises(ValueError, match="US/Central"):
|
||||
arr[0] = pd.Timestamp("2000", tz="US/Eastern")
|
||||
|
||||
def test_setitem_clears_freq(self):
|
||||
a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
|
||||
a[0] = pd.Timestamp("2000", tz="US/Central")
|
||||
assert a.freq is None
|
||||
|
||||
def test_repeat_preserves_tz(self):
|
||||
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
repeated = arr.repeat([1, 1])
|
||||
|
||||
# preserves tz and values, but not freq
|
||||
expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
|
||||
tm.assert_equal(repeated, expected)
|
||||
|
||||
def test_value_counts_preserves_tz(self):
|
||||
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
|
||||
arr = DatetimeArray(dti).repeat([4, 3])
|
||||
|
||||
result = arr.value_counts()
|
||||
|
||||
# Note: not tm.assert_index_equal, since `freq`s do not match
|
||||
assert result.index.equals(dti)
|
||||
|
||||
arr[-2] = pd.NaT
|
||||
result = arr.value_counts()
|
||||
expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["pad", "backfill"])
|
||||
def test_fillna_preserves_tz(self, method):
|
||||
dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
|
||||
arr = DatetimeArray(dti, copy=True)
|
||||
arr[2] = pd.NaT
|
||||
|
||||
fill_val = dti[1] if method == "pad" else dti[3]
|
||||
expected = DatetimeArray._from_sequence(
|
||||
[dti[0], dti[1], fill_val, dti[3], dti[4]], freq=None, tz="US/Central"
|
||||
)
|
||||
|
||||
result = arr.fillna(method=method)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# assert that arr and dti were not modified in-place
|
||||
assert arr[2] is pd.NaT
|
||||
assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
|
||||
|
||||
def test_array_interface_tz(self):
|
||||
tz = "US/Central"
|
||||
data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz))
|
||||
result = np.asarray(data)
|
||||
|
||||
expected = np.array(
|
||||
[
|
||||
pd.Timestamp("2017-01-01T00:00:00", tz=tz),
|
||||
pd.Timestamp("2017-01-02T00:00:00", tz=tz),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype="M8[ns]")
|
||||
|
||||
expected = np.array(
|
||||
["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self):
|
||||
data = DatetimeArray(pd.date_range("2017", periods=2))
|
||||
expected = np.array(
|
||||
["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
|
||||
)
|
||||
|
||||
result = np.asarray(data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype=object)
|
||||
expected = np.array(
|
||||
[pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestSequenceToDT64NS:
|
||||
def test_tz_dtype_mismatch_raises(self):
|
||||
arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
|
||||
with pytest.raises(TypeError, match="data is already tz-aware"):
|
||||
sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
|
||||
|
||||
def test_tz_dtype_matches(self):
|
||||
arr = DatetimeArray._from_sequence(["2000"], tz="US/Central")
|
||||
result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central"))
|
||||
tm.assert_numpy_array_equal(arr._data, result)
|
||||
|
||||
|
||||
class TestReductions:
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_min_max(self, tz):
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-03",
|
||||
"2000-01-03",
|
||||
"NaT",
|
||||
"2000-01-02",
|
||||
"2000-01-05",
|
||||
"2000-01-04",
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timestamp("2000-01-02", tz=tz)
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timestamp("2000-01-05", tz=tz)
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_empty(self, skipna, tz):
|
||||
arr = DatetimeArray._from_sequence([], tz=tz)
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
@@ -0,0 +1,816 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCIndexClass
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar
|
||||
from pandas.core.arrays import IntegerArray, integer_array
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def make_data():
|
||||
return list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return integer_array(make_data(), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return integer_array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(Int8Dtype(), "Int8Dtype()"),
|
||||
(Int16Dtype(), "Int16Dtype()"),
|
||||
(Int32Dtype(), "Int32Dtype()"),
|
||||
(Int64Dtype(), "Int64Dtype()"),
|
||||
(UInt8Dtype(), "UInt8Dtype()"),
|
||||
(UInt16Dtype(), "UInt16Dtype()"),
|
||||
(UInt32Dtype(), "UInt32Dtype()"),
|
||||
(UInt64Dtype(), "UInt64Dtype()"),
|
||||
],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(integer_array([1, None, 3]))
|
||||
expected = "<IntegerArray>\n[1, NaN, 3]\nLength: 3, dtype: Int64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = integer_array([1, 2, None] * 1000)
|
||||
expected = (
|
||||
"<IntegerArray>\n"
|
||||
"[ 1, 2, NaN, 1, 2, NaN, 1, 2, NaN, 1,\n"
|
||||
" ...\n"
|
||||
" NaN, 1, 2, NaN, 1, 2, NaN, 1, 2, NaN]\n"
|
||||
"Length: 3000, dtype: Int64"
|
||||
)
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestConstructors:
|
||||
def test_from_dtype_from_float(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).astype("float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / array
|
||||
expected = pd.Series(data).dropna().reset_index(drop=True)
|
||||
dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
|
||||
result = pd.Series(dropped, dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseOpsUtil):
|
||||
def _check_divmod_op(self, s, op, other, exc=None):
|
||||
super()._check_divmod_op(s, op, other, None)
|
||||
|
||||
def _check_op(self, s, op_name, other, exc=None):
|
||||
op = self.get_op_from_name(op_name)
|
||||
result = op(s, other)
|
||||
|
||||
# compute expected
|
||||
mask = s.isna()
|
||||
|
||||
# if s is a DataFrame, squeeze to a Series
|
||||
# for comparison
|
||||
if isinstance(s, pd.DataFrame):
|
||||
result = result.squeeze()
|
||||
s = s.squeeze()
|
||||
mask = mask.squeeze()
|
||||
|
||||
# other array is an Integer
|
||||
if isinstance(other, IntegerArray):
|
||||
omask = getattr(other, "mask", None)
|
||||
mask = getattr(other, "data", other)
|
||||
if omask is not None:
|
||||
mask |= omask
|
||||
|
||||
# 1 ** na is na, so need to unmask those
|
||||
if op_name == "__pow__":
|
||||
mask = np.where(s == 1, False, mask)
|
||||
|
||||
elif op_name == "__rpow__":
|
||||
mask = np.where(other == 1, False, mask)
|
||||
|
||||
# float result type or float op
|
||||
if (
|
||||
is_float_dtype(other)
|
||||
or is_float(other)
|
||||
or op_name in ["__rtruediv__", "__truediv__", "__rdiv__", "__div__"]
|
||||
):
|
||||
rs = s.astype("float")
|
||||
expected = op(rs, other)
|
||||
self._check_op_float(result, expected, mask, s, op_name, other)
|
||||
|
||||
# integer result type
|
||||
else:
|
||||
rs = pd.Series(s.values._data, name=s.name)
|
||||
expected = op(rs, other)
|
||||
self._check_op_integer(result, expected, mask, s, op_name, other)
|
||||
|
||||
def _check_op_float(self, result, expected, mask, s, op_name, other):
|
||||
# check comparisons that are resulting in float dtypes
|
||||
|
||||
expected[mask] = np.nan
|
||||
if "floordiv" in op_name:
|
||||
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
|
||||
mask2 = np.isinf(expected) & np.isnan(result)
|
||||
expected[mask2] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def _check_op_integer(self, result, expected, mask, s, op_name, other):
|
||||
# check comparisons that are resulting in integer dtypes
|
||||
|
||||
# to compare properly, we convert the expected
|
||||
# to float, mask to nans and convert infs
|
||||
# if we have uints then we process as uints
|
||||
# then conert to float
|
||||
# and we ultimately want to create a IntArray
|
||||
# for comparisons
|
||||
|
||||
fill_value = 0
|
||||
|
||||
# mod/rmod turn floating 0 into NaN while
|
||||
# integer works as expected (no nan)
|
||||
if op_name in ["__mod__", "__rmod__"]:
|
||||
if is_scalar(other):
|
||||
if other == 0:
|
||||
expected[s.values == 0] = 0
|
||||
else:
|
||||
expected = expected.fillna(0)
|
||||
else:
|
||||
expected[(s.values == 0) & ((expected == 0) | expected.isna())] = 0
|
||||
try:
|
||||
expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
|
||||
original = expected
|
||||
expected = expected.astype(s.dtype)
|
||||
|
||||
except ValueError:
|
||||
|
||||
expected = expected.astype(float)
|
||||
expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
|
||||
original = expected
|
||||
expected = expected.astype(s.dtype)
|
||||
|
||||
expected[mask] = np.nan
|
||||
|
||||
# assert that the expected astype is ok
|
||||
# (skip for unsigned as they have wrap around)
|
||||
if not s.dtype.is_unsigned_integer:
|
||||
original = pd.Series(original)
|
||||
|
||||
# we need to fill with 0's to emulate what an astype('int') does
|
||||
# (truncation) for certain ops
|
||||
if op_name in ["__rtruediv__", "__rdiv__"]:
|
||||
mask |= original.isna()
|
||||
original = original.fillna(0).astype("int")
|
||||
|
||||
original = original.astype("float")
|
||||
original[mask] = np.nan
|
||||
tm.assert_series_equal(original, expected.astype("float"))
|
||||
|
||||
# assert our expected result
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_arith_integer_array(self, data, all_arithmetic_operators):
|
||||
# we operate with a rhs of an integer array
|
||||
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
rhs = pd.Series([1] * len(data), dtype=data.dtype)
|
||||
rhs.iloc[-1] = np.nan
|
||||
|
||||
self._check_op(s, op, rhs)
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# scalar
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
self._check_op(s, op, 1, exc=TypeError)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
op = all_arithmetic_operators
|
||||
|
||||
df = pd.DataFrame({"A": data})
|
||||
self._check_op(df, op, 1, exc=TypeError)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
other = np.ones(len(s), dtype=s.dtype.type)
|
||||
self._check_op(s, op, other, exc=TypeError)
|
||||
|
||||
def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
other = 0.01
|
||||
self._check_op(s, op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [1.0, 1.0, np.array(1.0), np.array([1.0])])
|
||||
def test_arithmetic_conversion(self, all_arithmetic_operators, other):
|
||||
# if we have a float operand we should have a float result
|
||||
# if that is equal to an integer
|
||||
op = self.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = op(s, other)
|
||||
assert result.dtype is np.dtype("float")
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_arith_zero_dim_ndarray(self, other):
|
||||
arr = integer_array([1, None, 2])
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
opa = getattr(data, op)
|
||||
|
||||
# invalid scalars
|
||||
with pytest.raises(TypeError):
|
||||
ops("foo")
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Series("foo", index=s.index))
|
||||
|
||||
if op != "__rpow__":
|
||||
# TODO(extension)
|
||||
# rpow with a datetimelike coerces the integer array incorrectly
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
# 2d
|
||||
with pytest.raises(NotImplementedError):
|
||||
opa(pd.DataFrame({"A": s}))
|
||||
with pytest.raises(NotImplementedError):
|
||||
opa(np.arange(len(s)).reshape(-1, len(s)))
|
||||
|
||||
def test_pow(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
a = integer_array([1, np.nan, np.nan, 1])
|
||||
b = integer_array([1, np.nan, 1, np.nan])
|
||||
result = a ** b
|
||||
expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_rpow_one_to_na(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
arr = integer_array([np.nan, np.nan])
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = np.array([1.0, np.nan])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseOpsUtil):
|
||||
def _compare_other(self, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
# array
|
||||
result = pd.Series(op(data, other))
|
||||
expected = pd.Series(op(data._data, other))
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = op_name == "__ne__"
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
result = op(s, other)
|
||||
|
||||
expected = pd.Series(data._data)
|
||||
expected = op(expected, other)
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = op_name == "__ne__"
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
self._compare_other(data, op_name, 0)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
other = pd.Series([0] * len(data))
|
||||
self._compare_other(data, op_name, other)
|
||||
|
||||
|
||||
class TestCasting:
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_construct_index(self, all_data, dropna):
|
||||
# ensure that we do not coerce to Float64Index, rather
|
||||
# keep as Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Index(integer_array(other, dtype=all_data.dtype))
|
||||
expected = pd.Index(other, dtype=object)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_astype_index(self, all_data, dropna):
|
||||
# as an int/uint index to Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
dtype = all_data.dtype
|
||||
idx = pd.Index(np.array(other))
|
||||
assert isinstance(idx, ABCIndexClass)
|
||||
|
||||
result = idx.astype(dtype)
|
||||
expected = idx.astype(object).astype(dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype(self, all_data):
|
||||
all_data = all_data[:10]
|
||||
|
||||
ints = all_data[~all_data.isna()]
|
||||
mixed = all_data
|
||||
dtype = Int8Dtype()
|
||||
|
||||
# coerce to same type - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(ints)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(ints, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype.numpy_dtype)
|
||||
expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same type - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(mixed)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(mixed, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - mixed
|
||||
s = pd.Series(mixed)
|
||||
with pytest.raises(ValueError):
|
||||
s.astype(all_data.dtype.numpy_dtype)
|
||||
|
||||
# coerce to object
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype("object")
|
||||
expected = pd.Series(np.asarray(mixed))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
|
||||
def test_astype_specific_casting(self, dtype):
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([1, 2, 3, None], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_construct_cast_invalid(self, dtype):
|
||||
|
||||
msg = "cannot safely"
|
||||
arr = [1.2, 2.3, 3.7]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
integer_array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
arr = [1.2, 2.3, 3.7, np.nan]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
integer_array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 NaN\n1 1"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_conversions(data_missing):
|
||||
|
||||
# astype to object series
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df["A"].astype("object")
|
||||
expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# convert to object ndarray
|
||||
# we assert that we are exactly equal
|
||||
# including type conversions of scalars
|
||||
result = df["A"].astype("object").values
|
||||
expected = np.array([np.nan, 1], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for r, e in zip(result, expected):
|
||||
if pd.isnull(r):
|
||||
assert pd.isnull(e)
|
||||
elif is_integer(r):
|
||||
assert r == e
|
||||
assert is_integer(e)
|
||||
else:
|
||||
assert r == e
|
||||
assert type(r) == type(e)
|
||||
|
||||
|
||||
def test_integer_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
expected = integer_array([1, 2, 3, np.nan], dtype="int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values.astype(float), mask)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, np.nan]),
|
||||
([None], [np.nan]),
|
||||
([None, np.nan], [np.nan, np.nan]),
|
||||
([np.nan, np.nan], [np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_integer_array_constructor_none_is_nan(a, b):
|
||||
result = integer_array(a)
|
||||
expected = integer_array(b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_integer_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = IntegerArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
["1", "2"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_error(values):
|
||||
# error in converting existing arrays to IntegerArrays
|
||||
with pytest.raises(TypeError):
|
||||
integer_array(values)
|
||||
|
||||
|
||||
def test_to_integer_array_inferred_dtype():
|
||||
# if values has dtype -> respect it
|
||||
result = integer_array(np.array([1, 2], dtype="int8"))
|
||||
assert result.dtype == Int8Dtype()
|
||||
result = integer_array(np.array([1, 2], dtype="int32"))
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
# if values have no dtype -> always int64
|
||||
result = integer_array([1, 2])
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_dtype_keyword():
|
||||
result = integer_array([1, 2], dtype="int8")
|
||||
assert result.dtype == Int8Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = integer_array(np.array([1, 2], dtype="int8"), dtype="int32")
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_float():
|
||||
result = integer_array([1.0, 2.0])
|
||||
expected = integer_array([1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
|
||||
integer_array([1.5, 2.0])
|
||||
|
||||
# for float dtypes, the itemsize is not preserved
|
||||
result = integer_array(np.array([1.0, 2.0], dtype="float32"))
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, int_values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
|
||||
([False, True], [0, 1], "Int64", Int64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_bool(bool_values, int_values, target_dtype, expected_dtype):
|
||||
result = integer_array(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = integer_array(int_values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, to_dtype, result_dtype",
|
||||
[
|
||||
(np.array([1], dtype="int64"), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), "int8", Int8Dtype),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array(values, to_dtype, result_dtype):
|
||||
# convert existing arrays to IntegerArrays
|
||||
result = integer_array(values, dtype=to_dtype)
|
||||
assert result.dtype == result_dtype()
|
||||
expected = integer_array(values, dtype=result_dtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series([1, 2, np.nan], dtype="Int64"),
|
||||
"B": pd.Series([1, np.nan, 3], dtype="UInt8"),
|
||||
"C": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
# TODO(#22346): preserve Int64 dtype
|
||||
# for ops that enable (mean would actually work here
|
||||
# but generally it is a float return value)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": integer_array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, int)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean"])
|
||||
def test_reduce_to_float(op):
|
||||
# some reduce ops always return float, even if the result
|
||||
# is a rounded number
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": integer_array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, float)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_nansafe():
|
||||
# see gh-22343
|
||||
arr = integer_array([np.nan, 1, 2], dtype="Int8")
|
||||
msg = "cannot convert float NaN to integer"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype("uint32")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
def test_ufuncs_single_int(ufunc):
|
||||
a = integer_array([1, 2, -3, np.nan])
|
||||
result = ufunc(a)
|
||||
expected = integer_array(ufunc(a.astype(float)))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(integer_array(ufunc(a.astype(float))))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = integer_array([1, 2, -3, np.nan])
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = ufunc(a.astype(float))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = ufunc(s.astype(float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_int(ufunc):
|
||||
# two IntegerArrays
|
||||
a = integer_array([1, 2, -3, np.nan])
|
||||
result = ufunc(a, a)
|
||||
expected = integer_array(ufunc(a.astype(float), a.astype(float)))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = integer_array(ufunc(a.astype(float), arr))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = integer_array(ufunc(arr, a.astype(float)))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = integer_array(ufunc(a.astype(float), 1))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = integer_array(ufunc(1, a.astype(float)))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
a = integer_array(values)
|
||||
with pytest.raises(NotImplementedError):
|
||||
np.add.reduce(a)
|
||||
|
||||
|
||||
# TODO(jreback) - these need testing / are broken
|
||||
|
||||
# shift
|
||||
|
||||
# set_index (destroys type)
|
||||
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Additional tests for PandasArray that aren't covered by
|
||||
the interface tests.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.arrays import PandasArray
|
||||
from pandas.core.arrays.numpy_ import PandasDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.array(["a", "b"], dtype=object),
|
||||
np.array([0, 1], dtype=float),
|
||||
np.array([0, 1], dtype=int),
|
||||
np.array([0, 1 + 2j], dtype=complex),
|
||||
np.array([True, False], dtype=bool),
|
||||
np.array([0, 1], dtype="datetime64[ns]"),
|
||||
np.array([0, 1], dtype="timedelta64[ns]"),
|
||||
]
|
||||
)
|
||||
def any_numpy_array(request):
|
||||
"""
|
||||
Parametrized fixture for NumPy arrays with different dtypes.
|
||||
|
||||
This excludes string and bytes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# PandasDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
("bool", True),
|
||||
("int", True),
|
||||
("uint", True),
|
||||
("float", True),
|
||||
("complex", True),
|
||||
("str", False),
|
||||
("bytes", False),
|
||||
("datetime64[ns]", False),
|
||||
("object", False),
|
||||
("void", False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
dtype = PandasDtype(dtype)
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
("bool", True),
|
||||
("int", False),
|
||||
("uint", False),
|
||||
("float", False),
|
||||
("complex", False),
|
||||
("str", False),
|
||||
("bytes", False),
|
||||
("datetime64[ns]", False),
|
||||
("object", False),
|
||||
("void", False),
|
||||
],
|
||||
)
|
||||
def test_is_boolean(dtype, expected):
|
||||
dtype = PandasDtype(dtype)
|
||||
assert dtype._is_boolean is expected
|
||||
|
||||
|
||||
def test_repr():
|
||||
dtype = PandasDtype(np.dtype("int64"))
|
||||
assert repr(dtype) == "PandasDtype('int64')"
|
||||
|
||||
|
||||
def test_constructor_from_string():
|
||||
result = PandasDtype.construct_from_string("int64")
|
||||
expected = PandasDtype(np.dtype("int64"))
|
||||
assert result == expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Construction
|
||||
|
||||
|
||||
def test_constructor_no_coercion():
|
||||
with pytest.raises(ValueError, match="NumPy array"):
|
||||
PandasArray([1, 2, 3])
|
||||
|
||||
|
||||
def test_series_constructor_with_copy():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
ser = pd.Series(PandasArray(ndarray), copy=True)
|
||||
|
||||
assert ser.values is not ndarray
|
||||
|
||||
|
||||
def test_series_constructor_with_astype():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
result = pd.Series(PandasArray(ndarray), dtype="float64")
|
||||
expected = pd.Series([1.0, 2.0, 3.0], dtype="float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_sequence_dtype():
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
result = PandasArray._from_sequence(arr, dtype="uint64")
|
||||
expected = PandasArray(np.array([1, 2, 3], dtype="uint64"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_constructor_copy():
|
||||
arr = np.array([0, 1])
|
||||
result = PandasArray(arr, copy=True)
|
||||
|
||||
assert np.shares_memory(result._ndarray, arr) is False
|
||||
|
||||
|
||||
def test_constructor_with_data(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = PandasArray(nparr)
|
||||
assert arr.dtype.numpy_dtype == nparr.dtype
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Conversion
|
||||
|
||||
|
||||
def test_to_numpy():
|
||||
arr = PandasArray(np.array([1, 2, 3]))
|
||||
result = arr.to_numpy()
|
||||
assert result is arr._ndarray
|
||||
|
||||
result = arr.to_numpy(copy=True)
|
||||
assert result is not arr._ndarray
|
||||
|
||||
result = arr.to_numpy(dtype="f8")
|
||||
expected = np.array([1, 2, 3], dtype="f8")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Setitem
|
||||
|
||||
|
||||
def test_setitem_series():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
ser.array[0] = 10
|
||||
expected = pd.Series([10, 2, 3])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_setitem(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = PandasArray(nparr, copy=True)
|
||||
|
||||
arr[0] = arr[1]
|
||||
nparr[0] = nparr[1]
|
||||
|
||||
tm.assert_numpy_array_equal(arr.to_numpy(), nparr)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
|
||||
def test_bad_reduce_raises():
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
arr = PandasArray(arr)
|
||||
msg = "cannot perform not_a_method with type int"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr._reduce(msg)
|
||||
|
||||
|
||||
def test_validate_reduction_keyword_args():
|
||||
arr = PandasArray(np.array([1, 2, 3]))
|
||||
msg = "the 'keepdims' parameter is not supported .*all"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.all(keepdims=True)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
|
||||
def test_ufunc():
|
||||
arr = PandasArray(np.array([-1.0, 0.0, 1.0]))
|
||||
result = np.abs(arr)
|
||||
expected = PandasArray(np.abs(arr._ndarray))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
r1, r2 = np.divmod(arr, np.add(arr, 2))
|
||||
e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2))
|
||||
e1 = PandasArray(e1)
|
||||
e2 = PandasArray(e2)
|
||||
tm.assert_extension_array_equal(r1, e1)
|
||||
tm.assert_extension_array_equal(r2, e2)
|
||||
|
||||
|
||||
def test_basic_binop():
|
||||
# Just a basic smoke test. The EA interface tests exercise this
|
||||
# more thoroughly.
|
||||
x = PandasArray(np.array([1, 2, 3]))
|
||||
result = x + x
|
||||
expected = PandasArray(np.array([2, 4, 6]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
@@ -0,0 +1,325 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype, registry
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import PeriodArray, period_array
|
||||
import pandas.util.testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Dtype
|
||||
|
||||
|
||||
def test_registered():
|
||||
assert PeriodDtype in registry.dtypes
|
||||
result = registry.find("Period[D]")
|
||||
expected = PeriodDtype("D")
|
||||
assert result == expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# period_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq, expected",
|
||||
[
|
||||
([pd.Period("2017", "D")], None, [17167]),
|
||||
([pd.Period("2017", "D")], "D", [17167]),
|
||||
([2017], "D", [17167]),
|
||||
(["2017"], "D", [17167]),
|
||||
([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
|
||||
([pd.Period("2017", "D"), None], None, [17167, iNaT]),
|
||||
(pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]),
|
||||
(pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
|
||||
],
|
||||
)
|
||||
def test_period_array_ok(data, freq, expected):
|
||||
result = period_array(data, freq=freq).asi8
|
||||
expected = np.asarray(expected, dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_period_array_readonly_object():
|
||||
# https://github.com/pandas-dev/pandas/issues/25403
|
||||
pa = period_array([pd.Period("2019-01-01")])
|
||||
arr = np.asarray(pa, dtype="object")
|
||||
arr.setflags(write=False)
|
||||
|
||||
result = period_array(arr)
|
||||
tm.assert_period_array_equal(result, pa)
|
||||
|
||||
result = pd.Series(arr)
|
||||
tm.assert_series_equal(result, pd.Series(pa))
|
||||
|
||||
result = pd.DataFrame({"A": arr})
|
||||
tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
|
||||
|
||||
|
||||
def test_from_datetime64_freq_changes():
|
||||
# https://github.com/pandas-dev/pandas/issues/23438
|
||||
arr = pd.date_range("2017", periods=3, freq="D")
|
||||
result = PeriodArray._from_datetime64(arr, freq="M")
|
||||
expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M")
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq, msg",
|
||||
[
|
||||
(
|
||||
[pd.Period("2017", "D"), pd.Period("2017", "A")],
|
||||
None,
|
||||
"Input has different freq",
|
||||
),
|
||||
([pd.Period("2017", "D")], "A", "Input has different freq"),
|
||||
],
|
||||
)
|
||||
def test_period_array_raises(data, freq, msg):
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
period_array(data, freq)
|
||||
|
||||
|
||||
def test_period_array_non_period_series_raies():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="dtype"):
|
||||
PeriodArray(ser, freq="D")
|
||||
|
||||
|
||||
def test_period_array_freq_mismatch():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
PeriodArray(arr, freq="M")
|
||||
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
|
||||
|
||||
|
||||
def test_asi8():
|
||||
result = period_array(["2000", "2001", None], freq="D").asi8
|
||||
expected = np.array([10957, 11323, iNaT])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_raises():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
|
||||
|
||||
with pytest.raises(ValueError, match="foo"):
|
||||
arr.take([0, -1], allow_fill=True, fill_value="foo")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
||||
def test_astype(dtype):
|
||||
# We choose to ignore the sign and size of integers for
|
||||
# Period/Datetime/Timedelta astype
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == "u":
|
||||
expected_dtype = np.dtype("uint64")
|
||||
else:
|
||||
expected_dtype = np.dtype("int64")
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copies():
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
result = arr.astype(np.int64, copy=False)
|
||||
# Add the `.base`, since we now use `.asi8` which returns a view.
|
||||
# We could maybe override it in PeriodArray to return ._data directly.
|
||||
assert result.base is arr._data
|
||||
|
||||
result = arr.astype(np.int64, copy=True)
|
||||
assert result is not arr._data
|
||||
tm.assert_numpy_array_equal(result, arr._data.view("i8"))
|
||||
|
||||
|
||||
def test_astype_categorical():
|
||||
arr = period_array(["2000", "2001", "2001", None], freq="D")
|
||||
result = arr.astype("category")
|
||||
categories = pd.PeriodIndex(["2000", "2001"], freq="D")
|
||||
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_period():
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
result = arr.astype(PeriodDtype("M"))
|
||||
expected = period_array(["2000", "2001", None], freq="M")
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_astype_datetime(other):
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
# slice off the [ns] so that the regex matches.
|
||||
with pytest.raises(TypeError, match=other[:-4]):
|
||||
arr.astype(other)
|
||||
|
||||
|
||||
def test_fillna_raises():
|
||||
arr = period_array(["2000", "2001", "2002"], freq="D")
|
||||
with pytest.raises(ValueError, match="Length"):
|
||||
arr.fillna(arr[:2])
|
||||
|
||||
|
||||
def test_fillna_copies():
|
||||
arr = period_array(["2000", "2001", "2002"], freq="D")
|
||||
result = arr.fillna(pd.Period("2000", "D"))
|
||||
assert result is not arr
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# setitem
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key, value, expected",
|
||||
[
|
||||
([0], pd.Period("2000", "D"), [10957, 1, 2]),
|
||||
([0], None, [iNaT, 1, 2]),
|
||||
([0], np.nan, [iNaT, 1, 2]),
|
||||
([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
|
||||
(
|
||||
[0, 1, 2],
|
||||
[pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
|
||||
[10957, 11323, 11688],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_setitem(key, value, expected):
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
expected = PeriodArray(expected, freq="D")
|
||||
arr[key] = value
|
||||
tm.assert_period_array_equal(arr, expected)
|
||||
|
||||
|
||||
def test_setitem_raises_incompatible_freq():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr[0] = pd.Period("2000", freq="A")
|
||||
|
||||
other = period_array(["2000", "2001"], freq="A")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr[[0, 1]] = other
|
||||
|
||||
|
||||
def test_setitem_raises_length():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(ValueError, match="length"):
|
||||
arr[[0, 1]] = [pd.Period("2000", freq="D")]
|
||||
|
||||
|
||||
def test_setitem_raises_type():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(TypeError, match="int"):
|
||||
arr[0] = 1
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
|
||||
def test_sub_period():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
other = pd.Period("2000", freq="M")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr - other
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Methods
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")],
|
||||
)
|
||||
def test_where_different_freq_raises(other):
|
||||
ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D"))
|
||||
cond = np.array([True, False, True])
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
ser.where(cond, other)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Printing
|
||||
|
||||
|
||||
def test_repr_small():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
result = str(arr)
|
||||
expected = (
|
||||
"<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_large():
|
||||
arr = period_array(["2000", "2001"] * 500, freq="D")
|
||||
result = str(arr)
|
||||
expected = (
|
||||
"<PeriodArray>\n"
|
||||
"['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
||||
"'2000-01-01',\n"
|
||||
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
||||
"'2001-01-01',\n"
|
||||
" ...\n"
|
||||
" '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
||||
"'2000-01-01',\n"
|
||||
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
||||
"'2001-01-01']\n"
|
||||
"Length: 1000, dtype: period[D]"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
|
||||
class TestReductions:
|
||||
def test_min_max(self):
|
||||
arr = period_array(
|
||||
[
|
||||
"2000-01-03",
|
||||
"2000-01-03",
|
||||
"NaT",
|
||||
"2000-01-02",
|
||||
"2000-01-05",
|
||||
"2000-01-04",
|
||||
],
|
||||
freq="D",
|
||||
)
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Period("2000-01-02", freq="D")
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Period("2000-01-05", freq="D")
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_empty(self, skipna):
|
||||
arr = period_array([], freq="D")
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
@@ -0,0 +1,154 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import TimedeltaArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaArrayConstructor:
|
||||
def test_only_1dim_accepted(self):
|
||||
# GH#25282
|
||||
arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]")
|
||||
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 2-dim
|
||||
TimedeltaArray(arr.reshape(2, 2))
|
||||
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 0-dim
|
||||
TimedeltaArray(arr[[0]].squeeze())
|
||||
|
||||
def test_freq_validation(self):
|
||||
# ensure that the public constructor cannot create an invalid instance
|
||||
arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9
|
||||
|
||||
msg = (
|
||||
"Inferred frequency None from passed values does not "
|
||||
"conform to passed frequency D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaArray(arr.view("timedelta64[ns]"), freq="D")
|
||||
|
||||
def test_non_array_raises(self):
|
||||
with pytest.raises(ValueError, match="list"):
|
||||
TimedeltaArray([1, 2, 3])
|
||||
|
||||
def test_other_type_raises(self):
|
||||
with pytest.raises(ValueError, match="dtype bool cannot be converted"):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype="bool"))
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
# TODO: why TypeError for 'category' but ValueError for i8?
|
||||
with pytest.raises(
|
||||
ValueError, match=r"category cannot be converted " r"to timedelta64\[ns\]"
|
||||
):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r"dtype int64 cannot be converted " r"to timedelta64\[ns\]",
|
||||
):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64"))
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype="m8[ns]")
|
||||
arr = TimedeltaArray(data, copy=False)
|
||||
assert arr._data is data
|
||||
|
||||
arr = TimedeltaArray(data, copy=True)
|
||||
assert arr._data is not data
|
||||
assert arr._data.base is not data
|
||||
|
||||
|
||||
class TestTimedeltaArray:
|
||||
def test_np_sum(self):
|
||||
# GH#25282
|
||||
vals = np.arange(5, dtype=np.int64).view("m8[h]").astype("m8[ns]")
|
||||
arr = TimedeltaArray(vals)
|
||||
result = np.sum(arr)
|
||||
assert result == vals.sum()
|
||||
|
||||
result = np.sum(pd.TimedeltaIndex(arr))
|
||||
assert result == vals.sum()
|
||||
|
||||
def test_from_sequence_dtype(self):
|
||||
msg = "dtype .*object.* cannot be converted to timedelta64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaArray._from_sequence([], dtype=object)
|
||||
|
||||
def test_abs(self):
|
||||
vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
|
||||
arr = TimedeltaArray(vals)
|
||||
|
||||
evals = np.array([3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
|
||||
expected = TimedeltaArray(evals)
|
||||
|
||||
result = abs(arr)
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
def test_neg(self):
|
||||
vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
|
||||
arr = TimedeltaArray(vals)
|
||||
|
||||
evals = np.array([3600 * 10 ** 9, "NaT", -7200 * 10 ** 9], dtype="m8[ns]")
|
||||
expected = TimedeltaArray(evals)
|
||||
|
||||
result = -arr
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
def test_neg_freq(self):
|
||||
tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
|
||||
arr = TimedeltaArray(tdi, freq=tdi.freq)
|
||||
|
||||
expected = TimedeltaArray(-tdi._data, freq=-tdi.freq)
|
||||
|
||||
result = -arr
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
||||
def test_astype_int(self, dtype):
|
||||
arr = TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")])
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == "u":
|
||||
expected_dtype = np.dtype("uint64")
|
||||
else:
|
||||
expected_dtype = np.dtype("int64")
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_setitem_clears_freq(self):
|
||||
a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H"))
|
||||
a[0] = pd.Timedelta("1H")
|
||||
assert a.freq is None
|
||||
|
||||
|
||||
class TestReductions:
|
||||
def test_min_max(self):
|
||||
arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"])
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timedelta("2H")
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timedelta("5H")
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_empty(self, skipna):
|
||||
arr = TimedeltaArray._from_sequence([])
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
Reference in New Issue
Block a user