8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
_ts = tm.makeTimeSeries()
|
||||
|
||||
|
||||
class TestData:
|
||||
@cache_readonly
|
||||
def ts(self):
|
||||
ts = _ts.copy()
|
||||
ts.name = "ts"
|
||||
return ts
|
||||
|
||||
@cache_readonly
|
||||
def series(self):
|
||||
series = tm.makeStringSeries()
|
||||
series.name = "series"
|
||||
return series
|
||||
|
||||
@cache_readonly
|
||||
def objSeries(self):
|
||||
objSeries = tm.makeObjectSeries()
|
||||
objSeries.name = "objects"
|
||||
return objSeries
|
||||
|
||||
@cache_readonly
|
||||
def empty(self):
|
||||
return pd.Series([], index=[])
|
||||
@@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def datetime_series():
|
||||
"""
|
||||
Fixture for Series of floats with DatetimeIndex
|
||||
"""
|
||||
s = tm.makeTimeSeries()
|
||||
s.name = "ts"
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def string_series():
|
||||
"""
|
||||
Fixture for Series of floats with Index of unique strings
|
||||
"""
|
||||
s = tm.makeStringSeries()
|
||||
s.name = "series"
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def object_series():
|
||||
"""
|
||||
Fixture for Series of dtype datetime64[ns] with Index of unique strings
|
||||
"""
|
||||
s = tm.makeObjectSeries()
|
||||
s.name = "objects"
|
||||
return s
|
||||
@@ -0,0 +1,8 @@
|
||||
import pytest
|
||||
|
||||
from pandas.tests.series.common import TestData
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def test_data():
|
||||
return TestData()
|
||||
@@ -0,0 +1,558 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, Series, date_range, isna
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill", [None, -1])
|
||||
def test_align(test_data, first_slice, second_slice, join_type, fill):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
assert aa.name == "ts"
|
||||
assert ea.name == "ts"
|
||||
assert ab.name == "ts"
|
||||
assert eb.name == "ts"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["pad", "bfill"])
|
||||
@pytest.mark.parametrize("limit", [None, 1])
|
||||
def test_align_fill_method(
|
||||
test_data, first_slice, second_slice, join_type, method, limit
|
||||
):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(test_data):
|
||||
b = test_data.ts[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join="left")
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join="left", copy=False)
|
||||
ra[:5] = 5
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join="right")
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join="right", copy=False)
|
||||
rb[:2] = 5
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(test_data):
|
||||
a, b = test_data.ts.align(test_data.ts, copy=False)
|
||||
assert a.index is test_data.ts.index
|
||||
assert b.index is test_data.ts.index
|
||||
|
||||
a, b = test_data.ts.align(test_data.ts, copy=True)
|
||||
assert a.index is not test_data.ts.index
|
||||
assert b.index is not test_data.ts.index
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
idx = pd.Index(range(2), name="b")
|
||||
s1 = pd.Series(np.arange(12, dtype="int64"), index=midx)
|
||||
s2 = pd.Series(np.arange(2, dtype="int64"), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join="left")
|
||||
res2l, res2r = s2.align(s1, join="right")
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join="right")
|
||||
res2l, res2r = s2.align(s1, join="left")
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
def test_reindex(test_data):
|
||||
identity = test_data.series.reindex(test_data.series.index)
|
||||
|
||||
# __array_interface__ is not defined for older numpies
|
||||
# and on some pythons
|
||||
try:
|
||||
assert np.may_share_memory(test_data.series.index, identity.index)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
assert identity.index.is_(test_data.series.index)
|
||||
assert identity.index.identical(test_data.series.index)
|
||||
|
||||
subIndex = test_data.series.index[10:20]
|
||||
subSeries = test_data.series.reindex(subIndex)
|
||||
|
||||
for idx, val in subSeries.items():
|
||||
assert val == test_data.series[idx]
|
||||
|
||||
subIndex2 = test_data.ts.index[10:20]
|
||||
subTS = test_data.ts.reindex(subIndex2)
|
||||
|
||||
for idx, val in subTS.items():
|
||||
assert val == test_data.ts[idx]
|
||||
stuffSeries = test_data.ts.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = test_data.ts.index[::2]
|
||||
subNonContig = test_data.ts.reindex(nonContigIndex)
|
||||
for idx, val in subNonContig.items():
|
||||
assert val == test_data.ts[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = test_data.ts.reindex()
|
||||
assert not (result is test_data.ts)
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
|
||||
|
||||
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype("object")
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(range(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
ts = Series(np.random.randn(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(test_data):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
test_data.empty.reindex(test_data.ts.index, method="pad") # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = test_data.empty.reindex(test_data.ts.index, method="pad")
|
||||
|
||||
# pass non-Index
|
||||
reindexed = test_data.ts.reindex(list(test_data.ts.index))
|
||||
assert_series_equal(test_data.ts, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = test_data.ts[::2]
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
|
||||
r" \(bfill\) or nearest\. Got foo"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.reindex(test_data.ts.index, method="foo")
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method="pad")
|
||||
reindexed2 = s2.reindex(s.index, method="ffill")
|
||||
assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
|
||||
assert_series_equal(reindexed, expected)
|
||||
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
|
||||
new_index = ["a", "g", "c", "f"]
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
assert_series_equal(result, expected.astype("float64"))
|
||||
|
||||
result = s.reindex(new_index).ffill(downcast="infer")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method="ffill")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list("abcd"))
|
||||
new_index = "agc"
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=range(0, 5))
|
||||
result = s.shift(1).fillna(method="bfill")
|
||||
expected = Series(False, index=range(0, 5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype="int64"))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
actual = s.reindex(target, method="nearest")
|
||||
expected = Series(np.around(target).astype("int64"), target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method="nearest")
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method="nearest", tolerance=1)
|
||||
assert_series_equal(expected, actual)
|
||||
actual = s.reindex_like(actual, method="nearest", tolerance=[1, 2, 3, 4])
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method="nearest", tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
def test_reindex_backfill():
|
||||
pass
|
||||
|
||||
|
||||
def test_reindex_int(test_data):
|
||||
ts = test_data.ts[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.int_
|
||||
|
||||
|
||||
def test_reindex_bool(test_data):
|
||||
# A series other than float, int, string, or object
|
||||
ts = test_data.ts[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(test_data):
|
||||
# fail
|
||||
ts = test_data.ts[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(test_data.ts.index, method="pad")
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range("20000101", periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(["a", "b", "c"], dtype="category")
|
||||
result = s.reindex(index)
|
||||
expected = Series(
|
||||
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
|
||||
)
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like(test_data):
|
||||
other = test_data.ts[::2]
|
||||
assert_series_equal(
|
||||
test_data.ts.reindex(other.index), test_data.ts.reindex_like(other)
|
||||
)
|
||||
|
||||
# GH 7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method="pad")
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1.0, 2.0, 3.0])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value="foo")
|
||||
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||||
# GH 8306
|
||||
idx = date_range("20131101", tz="America/Chicago", periods=7)
|
||||
newidx = date_range("20131103", periods=10, freq="H")
|
||||
s = Series(range(7), index=idx)
|
||||
with pytest.raises(TypeError):
|
||||
s.reindex(newidx, method="ffill")
|
||||
|
||||
|
||||
def test_reindex_empty_series_tz_dtype():
|
||||
# GH 20869
|
||||
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
|
||||
expected = Series([pd.NaT] * 2, dtype="datetime64[ns, UTC]")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_rename():
|
||||
# GH 17407
|
||||
s = Series(range(1, 6), index=pd.Index(range(2, 7), name="IntIndex"))
|
||||
result = s.rename(str)
|
||||
expected = s.rename(lambda i: str(i))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, expected_data, expected_index",
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
|
||||
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
|
||||
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
|
||||
],
|
||||
)
|
||||
def test_drop_unique_and_non_unique_index(
|
||||
data, index, axis, drop_labels, expected_data, expected_index
|
||||
):
|
||||
|
||||
s = Series(data=data, index=index)
|
||||
result = s.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, error_type, error_desc",
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
|
||||
# bad axis
|
||||
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
|
||||
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
|
||||
],
|
||||
)
|
||||
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
|
||||
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
Series(data, index=index).drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
s = Series(range(3), index=list("abc"))
|
||||
result = s.drop("bc", errors="ignore")
|
||||
tm.assert_series_equal(result, s)
|
||||
result = s.drop(["a", "d"], errors="ignore")
|
||||
expected = s.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
s = Series([2, 3], index=[True, False])
|
||||
assert s.index.is_object()
|
||||
result = s.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = pd.Series(index=index).drop(drop_labels)
|
||||
tm.assert_series_equal(series, pd.Series(index=expected_index))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels",
|
||||
[
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True]),
|
||||
],
|
||||
)
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
with pytest.raises(KeyError, match="not found in axis"):
|
||||
pd.Series(data=data, index=index).drop(drop_labels)
|
||||
@@ -0,0 +1,628 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series, Timestamp, date_range, isna
|
||||
from pandas.core.indexing import IndexingError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_getitem_boolean(test_data):
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
|
||||
# passing list is OK
|
||||
result = s[list(mask)]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, s.index[mask])
|
||||
|
||||
|
||||
def test_getitem_boolean_empty():
|
||||
s = Series([], dtype=np.int64)
|
||||
s.index.name = "index_name"
|
||||
s = s[s.isna()]
|
||||
assert s.index.name == "index_name"
|
||||
assert s.dtype == np.int64
|
||||
|
||||
# GH5877
|
||||
# indexing with empty series
|
||||
s = Series(["A", "B"])
|
||||
expected = Series(np.nan, index=["C"], dtype=object)
|
||||
result = s[Series(["C"], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(["A", "B"])
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = s[Series([], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([True], dtype=bool)]
|
||||
|
||||
|
||||
def test_getitem_boolean_object(test_data):
|
||||
# using column from DataFrame
|
||||
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = s[omask]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
cop = s.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "cannot index with vector containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask] = 5
|
||||
|
||||
|
||||
def test_getitem_setitem_boolean_corner(test_data):
|
||||
ts = test_data.ts
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
# these used to raise...??
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted] = 1
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted] = 1
|
||||
|
||||
|
||||
def test_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
# similar indexed series
|
||||
result = test_data.series.copy()
|
||||
result[mask] = test_data.series * 2
|
||||
expected = test_data.series * 2
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
# needs alignment
|
||||
result = test_data.series.copy()
|
||||
result[mask] = (test_data.series * 2)[0:5]
|
||||
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
|
||||
expected[-mask] = test_data.series[mask]
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
|
||||
def test_get_set_boolean_different_order(test_data):
|
||||
ordered = test_data.series.sort_values()
|
||||
|
||||
# setting
|
||||
copy = test_data.series.copy()
|
||||
copy[ordered > 0] = 0
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[expected > 0] = 0
|
||||
|
||||
assert_series_equal(copy, expected)
|
||||
|
||||
# getting
|
||||
sel = test_data.series[ordered > 0]
|
||||
exp = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(sel, exp)
|
||||
|
||||
|
||||
def test_where_unsafe_int(sint_dtype):
|
||||
s = Series(np.arange(10), dtype=sint_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_dtype):
|
||||
s = Series(np.arange(10), dtype=float_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
msg = (
|
||||
"NumPy boolean array indexing assignment cannot assign 0 input"
|
||||
" values to the 1 output values where the mask is true"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = "cannot set using a {} indexer with a different length than the value"
|
||||
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
s[selection] = box(item)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[cond])
|
||||
assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = pd.Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
def test_where_timedelta_coerce():
|
||||
s = Series([1, 2], dtype="timedelta64[ns]")
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype="object")
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetime_conversion():
|
||||
s = Series(date_range("20130102", periods=2))
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype="object")
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
s = Series([pd.Timestamp(t) for t in timestamps])
|
||||
rs = s.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, s[1]])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_dt_tz_values(tz_naive_fixture):
|
||||
ser1 = pd.Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture)
|
||||
)
|
||||
ser2 = pd.Series(
|
||||
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture)
|
||||
)
|
||||
mask = pd.Series([True, True, False])
|
||||
result = ser1.where(mask, ser2)
|
||||
exp = pd.Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture)
|
||||
)
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.mask(s > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# see gh-21891
|
||||
s = Series([1, 2])
|
||||
res = s.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[~cond])
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.mask(cond, -s))
|
||||
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series(4, index=list("ABCD"))
|
||||
result = s[lambda x: "A"]
|
||||
assert result == s.loc["A"]
|
||||
|
||||
result = s[lambda x: ["A", "B"]]
|
||||
tm.assert_series_equal(result, s.loc[["A", "B"]])
|
||||
|
||||
result = s[lambda x: [True, False, True, True]]
|
||||
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
|
||||
|
||||
|
||||
def test_setitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series([1, 2, 3, 4], index=list("ABCD"))
|
||||
s[lambda x: "A"] = -1
|
||||
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list("ABCD")))
|
||||
|
||||
|
||||
def test_setitem_other_callable():
|
||||
# GH 13299
|
||||
inc = lambda x: x + 1
|
||||
|
||||
s = pd.Series([1, 2, -1, 4])
|
||||
s[s < 0] = inc
|
||||
|
||||
expected = pd.Series([1, 2, inc, 4])
|
||||
tm.assert_series_equal(s, expected)
|
||||
@@ -0,0 +1,778 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
import pandas._libs.index as _index
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
|
||||
@pytest.mark.parametrize("name", [None, "my_dti"])
|
||||
def test_dti_snap(name, tz):
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
"1/1/2002",
|
||||
"1/2/2002",
|
||||
"1/3/2002",
|
||||
"1/4/2002",
|
||||
"1/5/2002",
|
||||
"1/6/2002",
|
||||
"1/7/2002",
|
||||
],
|
||||
name=name,
|
||||
tz=tz,
|
||||
freq="D",
|
||||
)
|
||||
|
||||
result = dti.snap(freq="W-MON")
|
||||
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
|
||||
expected = expected.repeat([3, 4])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
|
||||
result = dti.snap(freq="B")
|
||||
|
||||
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
|
||||
expected = expected.repeat([1, 1, 1, 2, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
|
||||
|
||||
def test_dti_reset_index_round_trip():
|
||||
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")
|
||||
d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes[0] == np.dtype("M8[ns]")
|
||||
d3 = d2.set_index("index")
|
||||
assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# #2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
|
||||
df = df.set_index("Date")
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()["Date"][0] == stamp
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# #1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s = Series().set_value(dates[0], 1.0)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s2 = s.set_value(dates[1], np.nan)
|
||||
|
||||
exp = Series([1.0, np.nan], index=index)
|
||||
|
||||
assert_series_equal(s2, exp)
|
||||
|
||||
# s = Series(index[:1], index[:1])
|
||||
# s2 = s.set_value(dates[1], index[1])
|
||||
# assert s2.values.dtype == 'M8[ns]'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_slice_locs_indexerror():
|
||||
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)]
|
||||
s = Series(range(100000), times)
|
||||
s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
|
||||
|
||||
|
||||
def test_slicing_datetimes():
|
||||
# GH 7523
|
||||
|
||||
# unique
|
||||
df = DataFrame(
|
||||
np.arange(4.0, dtype="float64"),
|
||||
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
|
||||
)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) :]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[: datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11) :]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc["20010101 11":]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# duplicates
|
||||
df = pd.DataFrame(
|
||||
np.arange(5.0, dtype="float64"),
|
||||
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
|
||||
)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 10) :]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[: datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11) :]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc["20010101 11":]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_datetime64_duplicated():
|
||||
dates = date_range("2010-07-01", end="2010-08-05")
|
||||
|
||||
tst = DataFrame({"symbol": "AAA", "date": dates})
|
||||
result = tst.duplicated(["date", "symbol"])
|
||||
assert (-result).all()
|
||||
|
||||
tst = DataFrame({"date": dates})
|
||||
result = tst.duplicated()
|
||||
assert (-result).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_pytz():
|
||||
from pytz import timezone as tz
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
|
||||
# comparison dates with datetime MUST be localized!
|
||||
date = tz("US/Central").localize(datetime(1990, 1, 1, 3))
|
||||
result[date] = 0
|
||||
result[date] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_dateutil():
|
||||
from dateutil.tz import tzutc
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
tz = (
|
||||
lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
) # handle special case for utc in dateutil
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="America/New_York")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = 0
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# repeat all the above with naive datetimes
|
||||
result = ts[datetime(1990, 1, 1, 4)]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
from pandas import period_range
|
||||
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="H")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
# FutureWarning from NumPy.
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_getitem_median_slice_bug():
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
from pandas import date_range
|
||||
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dups():
|
||||
dates = [
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
]
|
||||
|
||||
return Series(np.random.randn(len(dates)), index=dates)
|
||||
|
||||
|
||||
def test_constructor(dups):
|
||||
assert isinstance(dups, Series)
|
||||
assert isinstance(dups.index, DatetimeIndex)
|
||||
|
||||
|
||||
def test_is_unique_monotonic(dups):
|
||||
assert not dups.index.is_unique
|
||||
|
||||
|
||||
def test_index_unique(dups):
|
||||
uniques = dups.index.unique()
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
]
|
||||
)
|
||||
assert uniques.dtype == "M8[ns]" # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert dups.index.nunique() == 4
|
||||
|
||||
# #2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = dups.index.tz_localize("US/Eastern")
|
||||
dups_local.name = "foo"
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name="foo")
|
||||
expected = expected.tz_localize("US/Eastern")
|
||||
assert result.tz is not None
|
||||
assert result.name == "foo"
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [iNaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
arr = [
|
||||
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
|
||||
] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_dupes_contains():
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
|
||||
def test_duplicate_dates_indexing(dups):
|
||||
ts = dups
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
assert_series_equal(result, expected)
|
||||
else:
|
||||
assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
assert_series_equal(cp, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947116800000000000$"):
|
||||
ts[datetime(2000, 1, 6)]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_range_slice():
|
||||
idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"])
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_average_dup_values(dups):
|
||||
result = dups.groupby(level=0).mean()
|
||||
expected = dups.groupby(dups.index).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff():
|
||||
import datetime
|
||||
|
||||
# #1821
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
try:
|
||||
_index._SIZE_CUTOFF = 1000
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = datetime.timedelta(seconds=1)
|
||||
half_sec = datetime.timedelta(microseconds=500000)
|
||||
d = datetime.datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")
|
||||
)
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
finally:
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = pd.period_range("1/1/2000", freq="T", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = pd.Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
# TODO: unused?
|
||||
s = str(t) # noqa
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
compare(slice("2011-01-01", "2011-01-15"))
|
||||
compare(slice("2010-12-30", "2011-01-15"))
|
||||
compare(slice("2011-01-01", "2011-01-16"))
|
||||
|
||||
# partial ranges
|
||||
compare(slice("2011-01-01", "2011-01-6"))
|
||||
compare(slice("2011-01-06", "2011-01-8"))
|
||||
compare(slice("2011-01-06", "2011-01-12"))
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="M")
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df = DataFrame(dict(A=ts))
|
||||
result = df["2001"]["A"]
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts["2001"] = 1
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df.loc["2001", "A"] = 1
|
||||
|
||||
result = df["2001"]["A"]
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.rand(5, 5),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
expected = df.loc[[df.index[2]]]
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
"""
|
||||
test NaT support
|
||||
"""
|
||||
|
||||
|
||||
def test_set_none_nan():
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
series[3] = None
|
||||
assert series[3] is NaT
|
||||
|
||||
series[3:5] = None
|
||||
assert series[4] is NaT
|
||||
|
||||
series[5] = np.nan
|
||||
assert series[5] is NaT
|
||||
|
||||
series[5:7] = np.nan
|
||||
assert series[6] is NaT
|
||||
|
||||
|
||||
def test_nat_operations():
|
||||
# GH 8617
|
||||
s = Series([0, pd.NaT], dtype="m8[ns]")
|
||||
exp = s[0]
|
||||
assert s.median() == exp
|
||||
assert s.min() == exp
|
||||
assert s.max() == exp
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(method, freq):
|
||||
# GH14940
|
||||
s = Series([pd.NaT])
|
||||
expected = Series(pd.NaT)
|
||||
round_method = getattr(s.dt, method)
|
||||
assert_series_equal(round_method(freq), expected)
|
||||
|
||||
|
||||
def test_setitem_tuple_with_datetimetz():
|
||||
# GH 20441
|
||||
arr = date_range("2017", periods=4, tz="US/Eastern")
|
||||
index = [(0, 1), (0, 2), (0, 3), (0, 4)]
|
||||
result = Series(arr, index=index)
|
||||
expected = result.copy()
|
||||
result[(0, 1)] = np.nan
|
||||
expected.iloc[0] = np.nan
|
||||
assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,32 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_iloc():
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
|
||||
for i in range(len(s)):
|
||||
result = s.iloc[i]
|
||||
exp = s[s.index[i]]
|
||||
assert_almost_equal(result, exp)
|
||||
|
||||
# pass a slice
|
||||
result = s.iloc[slice(1, 3)]
|
||||
expected = s.loc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# test slice is a view
|
||||
result[:] = 0
|
||||
assert (s[1:3] == 0).all()
|
||||
|
||||
# list of integers
|
||||
result = s.iloc[[0, 2, 3, 4, 5]]
|
||||
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_nonunique():
|
||||
s = Series([0, 1, 2], index=[0, 1, 0])
|
||||
assert s.iloc[2] == 2
|
||||
@@ -0,0 +1,879 @@
|
||||
""" test get/set & misc """
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
msg = r"index out of bounds|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
result = test_data.ts[indices]
|
||||
expected = test_data.ts.reindex(indices)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts[indices[0] : indices[2]]
|
||||
expected = test_data.ts.loc[indices[0] : indices[2]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[inds]
|
||||
expected = s.reindex(inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[arr_inds]
|
||||
expected = s.reindex(arr_inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(
|
||||
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = s.loc["a"]
|
||||
assert result == expected
|
||||
result = s.iloc[0]
|
||||
assert result == expected
|
||||
result = s["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
def test_getitem_get(test_data):
|
||||
test_series = test_data.series
|
||||
test_obj_series = test_data.objSeries
|
||||
|
||||
idx1 = test_series.index[5]
|
||||
idx2 = test_obj_series.index[5]
|
||||
|
||||
assert test_series[idx1] == test_series.get(idx1)
|
||||
assert test_obj_series[idx2] == test_obj_series.get(idx2)
|
||||
|
||||
assert test_series[idx1] == test_series[5]
|
||||
assert test_obj_series[idx2] == test_obj_series[5]
|
||||
|
||||
assert test_series.get(-1) == test_series.get(test_series.index[-1])
|
||||
assert test_series[5] == test_series.get(test_series.index[5])
|
||||
|
||||
# missing
|
||||
d = test_data.ts.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.ts[d]
|
||||
|
||||
# None
|
||||
# GH 5652
|
||||
for s in [Series(), Series(index=list("abc"))]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_getitem_fancy(test_data):
|
||||
slice1 = test_data.series[[1, 2, 3]]
|
||||
slice2 = test_data.objSeries[[1, 2, 3]]
|
||||
assert test_data.series.index[2] == slice1.index[1]
|
||||
assert test_data.objSeries.index[2] == slice2.index[1]
|
||||
assert test_data.series[2] == slice1[1]
|
||||
assert test_data.objSeries[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_generator(test_data):
|
||||
gen = (x > 0 for x in test_data.series)
|
||||
result = test_data.series[gen]
|
||||
result2 = test_data.series[iter(test_data.series > 0)]
|
||||
expected = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
def test_type_promotion():
|
||||
# GH12599
|
||||
s = pd.Series()
|
||||
s["a"] = pd.Timestamp("2016-01-01")
|
||||
s["b"] = 3.0
|
||||
s["c"] = "foo"
|
||||
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
pd.Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: 313}),
|
||||
pd.Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_out_of_bounds(test_data):
|
||||
# don't segfault, GH #495
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
test_data.ts[len(test_data.ts)]
|
||||
|
||||
# GH #917
|
||||
s = Series([])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-1]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_getitem_box_float64(test_data):
|
||||
value = test_data.ts[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")],
|
||||
)
|
||||
def test_get(arr):
|
||||
# GH 21260
|
||||
s = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = Series(arr[:6], index=list("abcdef"))
|
||||
assert s.get("c") == s.iloc[2]
|
||||
|
||||
result = s.get(slice("b", "d"))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get("Z")
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(arr)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = pd.date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
rng = pd.date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
assert isinstance(ser.iat[5], pd.Timestamp)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror():
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
s[1]
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
s.loc[1]
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing():
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
expected = s.loc[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[["foo", "bar", "bah", "bam"]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = s["C"]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror():
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
s2[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_getitem_dataframe():
|
||||
rng = list(range(10))
|
||||
s = pd.Series(10, index=rng)
|
||||
df = pd.DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported,"
|
||||
" use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[df > 5]
|
||||
|
||||
|
||||
def test_setitem(test_data):
|
||||
test_data.ts[test_data.ts.index[5]] = np.NaN
|
||||
test_data.ts[[1, 2, 17]] = np.NaN
|
||||
test_data.ts[6] = np.NaN
|
||||
assert np.isnan(test_data.ts[6])
|
||||
assert np.isnan(test_data.ts[2])
|
||||
test_data.ts[np.isnan(test_data.ts)] = 5
|
||||
assert not np.isnan(test_data.ts[2])
|
||||
|
||||
# caught this bug when writing tests
|
||||
series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20))
|
||||
|
||||
series[::2] = 0
|
||||
assert (series[::2] == 0).all()
|
||||
|
||||
# set item that's not contained
|
||||
s = test_data.series.copy()
|
||||
s["foobar"] = 1
|
||||
|
||||
app = Series([1], index=["foobar"], name="series")
|
||||
expected = test_data.series.append(app)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# Test for issue #10193
|
||||
key = pd.Timestamp("2012-01-01")
|
||||
series = pd.Series()
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, [key])
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
series = pd.Series([], pd.DatetimeIndex([], freq="D"))
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, pd.DatetimeIndex([key], freq="D"))
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_setitem_dtypes():
|
||||
# change dtypes
|
||||
# GH 4463
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.iloc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([False])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan]))
|
||||
|
||||
s = Series([False, True])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan, 1.0]))
|
||||
|
||||
|
||||
def test_set_value(test_data):
|
||||
idx = test_data.ts.index[10]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
res = test_data.ts.set_value(idx, 0)
|
||||
assert res is test_data.ts
|
||||
assert test_data.ts[idx] == 0
|
||||
|
||||
# equiv
|
||||
s = test_data.series.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
res = s.set_value("foobar", 0)
|
||||
assert res is s
|
||||
assert res.index[-1] == "foobar"
|
||||
assert res["foobar"] == 0
|
||||
|
||||
s = test_data.series.copy()
|
||||
s.loc["foobar"] = 0
|
||||
assert s.index[-1] == "foobar"
|
||||
assert s["foobar"] == 0
|
||||
|
||||
|
||||
def test_setslice(test_data):
|
||||
sl = test_data.ts[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
# FutureWarning from NumPy about [slice(None, 5).
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_basic_getitem_setitem_corner(test_data):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "Can only tuple-index with a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
result = test_data.ts[[slice(None, 5)]]
|
||||
expected = test_data.ts[:5]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]] = 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"])
|
||||
def test_setitem_with_tz(tz):
|
||||
orig = pd.Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz))
|
||||
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2016-01-01 02:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series(
|
||||
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
|
||||
index=[1, 2],
|
||||
)
|
||||
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2012-01-01 00:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_setitem_with_tz_dst():
|
||||
# GH XXX
|
||||
tz = "US/Eastern"
|
||||
orig = pd.Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz))
|
||||
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-11-06 00:00-04:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00-05:00", tz=tz),
|
||||
pd.Timestamp("2016-11-06 01:00-05:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series(
|
||||
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
|
||||
index=[1, 2],
|
||||
)
|
||||
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-11-06 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2012-01-01 00:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_categorical_assigning_ops():
|
||||
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
||||
s = orig.copy()
|
||||
s[:] = "a"
|
||||
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[1] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[s.index > 0] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[[False, True]] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.index = ["x", "y"]
|
||||
s["y"] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# ensure that one can set something to np.nan
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
||||
s[1] = np.nan
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_slice(test_data):
|
||||
numSlice = test_data.series[10:20]
|
||||
numSliceEnd = test_data.series[-10:]
|
||||
objSlice = test_data.objSeries[10:20]
|
||||
|
||||
assert test_data.series.index[9] not in numSlice.index
|
||||
assert test_data.objSeries.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == test_data.series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = test_data.series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (test_data.series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
s = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
s[::-1] # it works!
|
||||
|
||||
|
||||
def test_loc_setitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[inds] = 5
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[[3, 4, 7]] = 5
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result.iloc[5:10] = 10
|
||||
expected[5:10] = 10
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set slice with indices
|
||||
d1, d2 = test_data.series.index[[5, 15]]
|
||||
result.loc[d1:d2] = 6
|
||||
expected[5:16] = 6 # because it's inclusive
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set index value
|
||||
test_data.series.loc[d1] = 4
|
||||
test_data.series.loc[d2] = 6
|
||||
assert test_data.series[d1] == 4
|
||||
assert test_data.series[d2] == 6
|
||||
|
||||
|
||||
def test_setitem_na():
|
||||
# these induce dtype changes
|
||||
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# gets coerced to float, right?
|
||||
expected = Series([np.nan, 1, np.nan, 0])
|
||||
s = Series([True, True, False, False])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9])
|
||||
s = Series(np.arange(10))
|
||||
s[:5] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([])
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
result = s.fillna(timedelta(1))
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 14155
|
||||
s = Series(10 * [np.timedelta64(10, "m")])
|
||||
s.loc[[1, 2, 3]] = np.timedelta64(20, "m")
|
||||
expected = pd.Series(10 * [np.timedelta64(10, "m")])
|
||||
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m"))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"nat_val,should_cast",
|
||||
[
|
||||
(pd.NaT, True),
|
||||
(np.timedelta64("NaT", "ns"), True),
|
||||
(np.datetime64("NaT", "ns"), False),
|
||||
],
|
||||
)
|
||||
def test_td64_series_assign_nat(nat_val, should_cast):
|
||||
# some nat-like values should be cast to timedelta64 when inserting
|
||||
# into a timedelta64 series. Others should coerce to object
|
||||
# and retain their dtypes.
|
||||
base = pd.Series([0, 1, 2], dtype="m8[ns]")
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype="m8[ns]")
|
||||
if not should_cast:
|
||||
expected = expected.astype(object)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser.loc[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser.iloc[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"td",
|
||||
[
|
||||
pd.Timedelta("9 days"),
|
||||
pd.Timedelta("9 days").to_timedelta64(),
|
||||
pd.Timedelta("9 days").to_pytimedelta(),
|
||||
],
|
||||
)
|
||||
def test_append_timedelta_does_not_cast(td):
|
||||
# GH#22717 inserting a Timedelta should _not_ cast to int64
|
||||
expected = pd.Series(["x", td], index=[0, "td"], dtype=object)
|
||||
|
||||
ser = pd.Series(["x"])
|
||||
ser["td"] = td
|
||||
tm.assert_series_equal(ser, expected)
|
||||
assert isinstance(ser["td"], pd.Timedelta)
|
||||
|
||||
ser = pd.Series(["x"])
|
||||
ser.loc["td"] = pd.Timedelta("9 days")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
assert isinstance(ser["td"], pd.Timedelta)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
df.set_index(["a", "b", "c"], inplace=True)
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df
|
||||
df["val"].update(s)
|
||||
|
||||
expected = DataFrame(dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
|
||||
expected.set_index(["a", "b", "c"], inplace=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 3970
|
||||
# these are chained assignments as well
|
||||
pd.set_option("chained_assignment", None)
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = 0.13
|
||||
|
||||
# TODO: unused
|
||||
df_tmp = df.iloc[ck] # noqa
|
||||
|
||||
df["bb"].iloc[0] = 0.15
|
||||
assert df["bb"].iloc[0] == 0.15
|
||||
pd.set_option("chained_assignment", "raise")
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
|
||||
df["c"] = np.nan
|
||||
df["c"].update(pd.Series(["foo"], index=[0]))
|
||||
|
||||
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=["foo", np.nan]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(test_data):
|
||||
seq = test_data.ts[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(test_data.ts[10])
|
||||
|
||||
|
||||
def test_cast_on_putmask():
|
||||
# GH 2746
|
||||
|
||||
# need to upcast
|
||||
s = Series([1, 2], index=[1, 2], dtype="int64")
|
||||
s[[True, False]] = Series([0], index=[1], dtype="int64")
|
||||
expected = Series([0, 2], index=[1, 2], dtype="int64")
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_type_promote_putmask():
|
||||
# GH8387: test that changing types does not break alignment
|
||||
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
||||
left, mask = ts.copy(), ts > 0
|
||||
right = ts[mask].copy().map(str)
|
||||
left[mask] = right
|
||||
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
||||
|
||||
s = Series([0, 1, 2, 0])
|
||||
mask = s > 0
|
||||
s2 = s[mask].map(str)
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, "1", "2", 0]))
|
||||
|
||||
s = Series([0, "foo", "bar", 0])
|
||||
mask = Series([False, True, True, False])
|
||||
s2 = s[mask]
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, "foo", "bar", 0]))
|
||||
|
||||
|
||||
def test_multilevel_preserve_name():
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
s = Series(np.random.randn(len(index)), index=index, name="sth")
|
||||
|
||||
result = s["foo"]
|
||||
result2 = s.loc["foo"]
|
||||
assert result.name == s.name
|
||||
assert result2.name == s.name
|
||||
|
||||
|
||||
def test_setitem_scalar_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
for n in range(len(series)):
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[n] = 1
|
||||
|
||||
assert array[n] == 0
|
||||
|
||||
|
||||
def test_setitem_slice_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[1:3] = 1
|
||||
|
||||
assert not array.any()
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH 6600
|
||||
df = DataFrame({"A": 0, "B": np.arange(5, dtype="int64"), "C": 0})
|
||||
k = df.iloc[4]
|
||||
|
||||
result = k.pop("B")
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=["A", "C"], name=4)
|
||||
assert_series_equal(k, expected)
|
||||
|
||||
|
||||
def test_take():
|
||||
s = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = s.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = s.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "index {} is out of bounds for size 5"
|
||||
with pytest.raises(IndexError, match=msg.format(10)):
|
||||
s.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg.format(5)):
|
||||
s.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
s = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = s.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_head_tail(test_data):
|
||||
assert_series_equal(test_data.series.head(), test_data.series[:5])
|
||||
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
|
||||
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
|
||||
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
|
||||
@@ -0,0 +1,159 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, Timestamp
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
|
||||
def test_loc_uint64(val, expected):
|
||||
# see gh-19399
|
||||
s = Series({2 ** 63 - 1: 3, 2 ** 63: 4})
|
||||
assert s.loc[val] == expected
|
||||
|
||||
|
||||
def test_loc_getitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
assert_series_equal(test_data.series.loc[inds], test_data.series.reindex(inds))
|
||||
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
|
||||
|
||||
# slice with indices
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
result = test_data.ts.loc[d1:d2]
|
||||
expected = test_data.ts.truncate(d1, d2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# boolean
|
||||
mask = test_data.series > test_data.series.median()
|
||||
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
|
||||
|
||||
# ask for index value
|
||||
assert test_data.ts.loc[d1] == test_data.ts[d1]
|
||||
assert test_data.ts.loc[d2] == test_data.ts[d2]
|
||||
|
||||
|
||||
def test_loc_getitem_not_monotonic(test_data):
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
|
||||
ts2 = test_data.ts[::2][[1, 2, 0]]
|
||||
|
||||
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_setitem_integer_slice_keyerrors():
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# this is OK
|
||||
cp = s.copy()
|
||||
cp.iloc[4:10] = 0
|
||||
assert (cp.iloc[4:10] == 0).all()
|
||||
|
||||
# so is this
|
||||
cp = s.copy()
|
||||
cp.iloc[3:11] = 0
|
||||
assert (cp.iloc[3:11] == 0).values.all()
|
||||
|
||||
result = s.iloc[2:6]
|
||||
result2 = s.loc[3:11]
|
||||
expected = s.reindex([4, 6, 8, 10])
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
# non-monotonic, raise KeyError
|
||||
s2 = s.iloc[list(range(5)) + list(range(9, 4, -1))]
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
s2.loc[3:11]
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
s2.loc[3:11] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_iterator(test_data):
|
||||
idx = iter(test_data.series.index[:10])
|
||||
result = test_data.series.loc[idx]
|
||||
assert_series_equal(result, test_data.series[:10])
|
||||
|
||||
|
||||
def test_loc_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[mask] = 0
|
||||
expected = test_data.series
|
||||
expected[mask] = 0
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_setitem_corner(test_data):
|
||||
inds = list(test_data.series.index[[5, 8, 12]])
|
||||
test_data.series.loc[inds] = 5
|
||||
msg = r"\['foo'\] not in index"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.series.loc[inds + ["foo"]] = 5
|
||||
|
||||
|
||||
def test_basic_setitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices] = 0
|
||||
exp.loc[indices] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices[0] : indices[2]] = 0
|
||||
exp.loc[indices[0] : indices[2]] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
inds = [0, 4, 6]
|
||||
arr_inds = np.array([0, 4, 6])
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[inds] = 0
|
||||
s.loc[inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[arr_inds] = 0
|
||||
s.loc[arr_inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
inds_notfound = [0, 4, 5, 6]
|
||||
arr_inds_notfound = np.array([0, 4, 5, 6])
|
||||
msg = r"\[5\] not contained in the index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[inds_notfound] = 0
|
||||
with pytest.raises(Exception, match=msg):
|
||||
s[arr_inds_notfound] = 0
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(
|
||||
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
s2 = s.copy()
|
||||
expected = Timestamp("2011-01-03", tz="US/Eastern")
|
||||
s2.loc["a"] = expected
|
||||
result = s2.loc["a"]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = expected
|
||||
result = s2.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2["a"] = expected
|
||||
result = s2["a"]
|
||||
assert result == expected
|
||||
@@ -0,0 +1,317 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=pd.Float64Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
|
||||
|
||||
idx = [2, np.nan]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_delitem():
|
||||
# GH 5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# empty
|
||||
s = Series()
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=["a"])
|
||||
del s["a"]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
|
||||
s["a"] = 1
|
||||
assert_series_equal(s, Series(1, index=["a"]))
|
||||
del s["a"]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
|
||||
|
||||
|
||||
def test_slice_float64():
|
||||
values = np.arange(10.0, 50.0, 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
s = Series(np.random.randn(20), index=index)
|
||||
|
||||
result = s[start:end]
|
||||
expected = s.iloc[5:16]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[start:end]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3), index=index)
|
||||
|
||||
result = df[start:end]
|
||||
expected = df.iloc[5:16]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[start:end]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_negative_out_of_bounds():
|
||||
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11]
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11] = "foo"
|
||||
|
||||
|
||||
def test_getitem_regression():
|
||||
s = Series(range(5), index=list(range(5)))
|
||||
result = s[list(range(5))]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_bug():
|
||||
s = Series(range(10), index=list(range(10)))
|
||||
result = s[-12:]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
result = s[-7:]
|
||||
assert_series_equal(result, s[3:])
|
||||
|
||||
result = s[:-12]
|
||||
assert_series_equal(result, s[:0])
|
||||
|
||||
s = Series(range(10), index=list(range(10)))
|
||||
s[-12:] = 0
|
||||
assert (s == 0).all()
|
||||
|
||||
s[:-12] = 5
|
||||
assert (s == 0).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_integers():
|
||||
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = s[:4]
|
||||
expected = s.reindex([2, 4, 6, 8])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s[:4] = 0
|
||||
assert (s[:4] == 0).all()
|
||||
assert not (s[4:] == 0).any()
|
||||
|
||||
|
||||
def test_setitem_float_labels():
|
||||
# note labels are floats
|
||||
s = Series(["a", "b", "c"], index=[0, 0.5, 1])
|
||||
tmp = s.copy()
|
||||
|
||||
s.loc[1] = "zoo"
|
||||
tmp.iloc[2] = "zoo"
|
||||
|
||||
assert_series_equal(s, tmp)
|
||||
|
||||
|
||||
def test_slice_float_get_set(test_data):
|
||||
msg = (
|
||||
r"cannot do slice indexing on <class 'pandas\.core\.indexes"
|
||||
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
|
||||
r" of <class 'float'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0]
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0] = 0
|
||||
|
||||
|
||||
def test_slice_floats2():
|
||||
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
|
||||
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
i = np.arange(10, 20, dtype=float)
|
||||
i[2] = 12.2
|
||||
s.index = i
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
|
||||
def test_int_indexing():
|
||||
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
# not monotonic
|
||||
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
|
||||
def test_getitem_int64(test_data):
|
||||
idx = np.int64(5)
|
||||
assert test_data.ts[idx] == test_data.ts[5]
|
||||
@@ -0,0 +1,346 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAlterAxes:
|
||||
def test_setindex(self, string_series):
|
||||
# wrong type
|
||||
msg = (
|
||||
r"Index\(\.\.\.\) must be called with a collection of some"
|
||||
r" kind, None was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
string_series.index = None
|
||||
|
||||
# wrong length
|
||||
msg = (
|
||||
"Length mismatch: Expected axis has 30 elements, new"
|
||||
" values have 29 elements"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
string_series.index = np.arange(len(string_series) - 1)
|
||||
|
||||
# works
|
||||
string_series.index = np.arange(len(string_series))
|
||||
assert isinstance(string_series.index, Index)
|
||||
|
||||
# Renaming
|
||||
|
||||
def test_rename(self, datetime_series):
|
||||
ts = datetime_series
|
||||
renamer = lambda x: x.strftime("%Y%m%d")
|
||||
renamed = ts.rename(renamer)
|
||||
assert renamed.index[0] == renamer(ts.index[0])
|
||||
|
||||
# dict
|
||||
rename_dict = dict(zip(ts.index, renamed.index))
|
||||
renamed2 = ts.rename(rename_dict)
|
||||
tm.assert_series_equal(renamed, renamed2)
|
||||
|
||||
# partial dict
|
||||
s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
|
||||
renamed = s.rename({"b": "foo", "d": "bar"})
|
||||
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
|
||||
|
||||
# index with name
|
||||
renamer = Series(
|
||||
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
|
||||
)
|
||||
renamed = renamer.rename({})
|
||||
assert renamed.index.name == renamer.index.name
|
||||
|
||||
def test_rename_by_series(self):
|
||||
s = Series(range(5), name="foo")
|
||||
renamer = Series({1: 10, 2: 20})
|
||||
result = s.rename(renamer)
|
||||
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_set_name(self):
|
||||
s = Series(range(4), index=list("abcd"))
|
||||
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
|
||||
result = s.rename(name)
|
||||
assert result.name == name
|
||||
tm.assert_numpy_array_equal(result.index.values, s.index.values)
|
||||
assert s.name is None
|
||||
|
||||
def test_rename_set_name_inplace(self):
|
||||
s = Series(range(3), index=list("abc"))
|
||||
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
|
||||
s.rename(name, inplace=True)
|
||||
assert s.name == name
|
||||
|
||||
exp = np.array(["a", "b", "c"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(s.index.values, exp)
|
||||
|
||||
def test_rename_axis_supported(self):
|
||||
# Supporting axis for compatibility, detailed in GH-18589
|
||||
s = Series(range(5))
|
||||
s.rename({}, axis=0)
|
||||
s.rename({}, axis="index")
|
||||
with pytest.raises(ValueError, match="No axis named 5"):
|
||||
s.rename({}, axis=5)
|
||||
|
||||
def test_set_name_attribute(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = Series([1, 2, 3], name="bar")
|
||||
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
|
||||
s.name = name
|
||||
assert s.name == name
|
||||
s2.name = name
|
||||
assert s2.name == name
|
||||
|
||||
def test_set_name(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = s._set_name("foo")
|
||||
assert s2.name == "foo"
|
||||
assert s.name is None
|
||||
assert s is not s2
|
||||
|
||||
def test_rename_inplace(self, datetime_series):
|
||||
renamer = lambda x: x.strftime("%Y%m%d")
|
||||
expected = renamer(datetime_series.index[0])
|
||||
|
||||
datetime_series.rename(renamer, inplace=True)
|
||||
assert datetime_series.index[0] == expected
|
||||
|
||||
def test_set_index_makes_timeseries(self):
|
||||
idx = tm.makeDateIndex(10)
|
||||
|
||||
s = Series(range(10))
|
||||
s.index = idx
|
||||
assert s.index.is_all_dates
|
||||
|
||||
def test_reset_index(self):
|
||||
df = tm.makeDataFrame()[:5]
|
||||
ser = df.stack()
|
||||
ser.index.names = ["hash", "category"]
|
||||
|
||||
ser.name = "value"
|
||||
df = ser.reset_index()
|
||||
assert "value" in df
|
||||
|
||||
df = ser.reset_index(name="value2")
|
||||
assert "value2" in df
|
||||
|
||||
# check inplace
|
||||
s = ser.reset_index(drop=True)
|
||||
s2 = ser
|
||||
s2.reset_index(drop=True, inplace=True)
|
||||
tm.assert_series_equal(s, s2)
|
||||
|
||||
# level
|
||||
index = MultiIndex(
|
||||
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
s = Series(np.random.randn(6), index=index)
|
||||
rs = s.reset_index(level=1)
|
||||
assert len(rs.columns) == 2
|
||||
|
||||
rs = s.reset_index(level=[0, 2], drop=True)
|
||||
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
|
||||
assert isinstance(rs, Series)
|
||||
|
||||
def test_reset_index_name(self):
|
||||
s = Series([1, 2, 3], index=Index(range(3), name="x"))
|
||||
assert s.reset_index().index.name is None
|
||||
assert s.reset_index(drop=True).index.name is None
|
||||
|
||||
def test_reset_index_level(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
|
||||
|
||||
for levels in ["A", "B"], [0, 1]:
|
||||
# With MultiIndex
|
||||
s = df.set_index(["A", "B"])["C"]
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df.set_index("B"))
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df.set_index("B"))
|
||||
|
||||
result = s.reset_index(level=levels)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
|
||||
tm.assert_frame_equal(result, df[["C"]])
|
||||
|
||||
with pytest.raises(KeyError, match="Level E "):
|
||||
s.reset_index(level=["A", "E"])
|
||||
|
||||
# With single-level Index
|
||||
s = df.set_index("A")["B"]
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df[["A", "B"]])
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df[["A", "B"]])
|
||||
|
||||
result = s.reset_index(level=levels[0], drop=True)
|
||||
tm.assert_series_equal(result, df["B"])
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
s.reset_index(level=[0, 1, 2])
|
||||
|
||||
# Check that .reset_index([],drop=True) doesn't fail
|
||||
result = Series(range(4)).reset_index([], drop=True)
|
||||
expected = Series(range(4))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_range(self):
|
||||
# GH 12071
|
||||
s = Series(range(2), name="A", dtype="int64")
|
||||
series_result = s.reset_index()
|
||||
assert isinstance(series_result.index, RangeIndex)
|
||||
series_expected = DataFrame(
|
||||
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
|
||||
)
|
||||
tm.assert_frame_equal(series_result, series_expected)
|
||||
|
||||
def test_reorder_levels(self):
|
||||
index = MultiIndex(
|
||||
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=["L0", "L1", "L2"],
|
||||
)
|
||||
s = Series(np.arange(6), index=index)
|
||||
|
||||
# no change, position
|
||||
result = s.reorder_levels([0, 1, 2])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# no change, labels
|
||||
result = s.reorder_levels(["L0", "L1", "L2"])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# rotate, position
|
||||
result = s.reorder_levels([1, 2, 0])
|
||||
e_idx = MultiIndex(
|
||||
levels=[["one", "two", "three"], [0, 1], ["bar"]],
|
||||
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]],
|
||||
names=["L1", "L2", "L0"],
|
||||
)
|
||||
expected = Series(np.arange(6), index=e_idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_axis_mapper(self):
|
||||
# GH 19978
|
||||
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
|
||||
s = Series([i for i in range(len(mi))], index=mi)
|
||||
|
||||
result = s.rename_axis(index={"ll": "foo"})
|
||||
assert result.index.names == ["foo", "nn"]
|
||||
|
||||
result = s.rename_axis(index=str.upper, axis=0)
|
||||
assert result.index.names == ["LL", "NN"]
|
||||
|
||||
result = s.rename_axis(index=["foo", "goo"])
|
||||
assert result.index.names == ["foo", "goo"]
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected"):
|
||||
s.rename_axis(columns="wrong")
|
||||
|
||||
def test_rename_axis_inplace(self, datetime_series):
|
||||
# GH 15704
|
||||
expected = datetime_series.rename_axis("foo")
|
||||
result = datetime_series
|
||||
no_return = result.rename_axis("foo", inplace=True)
|
||||
|
||||
assert no_return is None
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
|
||||
def test_rename_axis_none(self, kwargs):
|
||||
# GH 25034
|
||||
index = Index(list("abc"), name="foo")
|
||||
df = Series([1, 2, 3], index=index)
|
||||
|
||||
result = df.rename_axis(**kwargs)
|
||||
expected_index = index.rename(None) if kwargs else index
|
||||
expected = Series([1, 2, 3], index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_axis_inplace_axes(self, axis_series):
|
||||
# GH14636
|
||||
ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
|
||||
|
||||
expected = ser.copy()
|
||||
expected.index = list("abcd")
|
||||
|
||||
# inplace=True
|
||||
# The FutureWarning comes from the fact that we would like to have
|
||||
# inplace default to False some day
|
||||
for inplace, warn in [(None, FutureWarning), (True, None)]:
|
||||
result = ser.copy()
|
||||
kwargs = {"inplace": inplace}
|
||||
with tm.assert_produces_warning(warn):
|
||||
result.set_axis(list("abcd"), axis=axis_series, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_axis_inplace(self):
|
||||
# GH14636
|
||||
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list("abcd")
|
||||
|
||||
# inplace=False
|
||||
result = s.set_axis(list("abcd"), axis=0, inplace=False)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# omitting the "axis" parameter
|
||||
with tm.assert_produces_warning(None):
|
||||
result = s.set_axis(list("abcd"), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# wrong values for the "axis" parameter
|
||||
for axis in [2, "foo"]:
|
||||
with pytest.raises(ValueError, match="No axis named"):
|
||||
s.set_axis(list("abcd"), axis=axis, inplace=False)
|
||||
|
||||
def test_set_axis_prior_to_deprecation_signature(self):
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list("abcd")
|
||||
|
||||
for axis in [0, "index"]:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.set_axis(0, list("abcd"), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_drop_errors(self):
|
||||
# GH 20925
|
||||
|
||||
# KeyError raised for series index when passed level name is missing
|
||||
s = Series(range(4))
|
||||
with pytest.raises(KeyError, match="must be same as name"):
|
||||
s.reset_index("wrong", drop=True)
|
||||
with pytest.raises(KeyError, match="must be same as name"):
|
||||
s.reset_index("wrong")
|
||||
|
||||
# KeyError raised for series when level to be dropped is missing
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
|
||||
with pytest.raises(KeyError, match="not found"):
|
||||
s.reset_index("wrong", drop=True)
|
||||
|
||||
def test_droplevel(self):
|
||||
# GH20342
|
||||
ser = Series([1, 2, 3, 4])
|
||||
ser.index = MultiIndex.from_arrays(
|
||||
[(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"]
|
||||
)
|
||||
expected = ser.reset_index("b", drop=True)
|
||||
result = ser.droplevel("b", axis="index")
|
||||
tm.assert_series_equal(result, expected)
|
||||
# test that droplevel raises ValueError on axis != 0
|
||||
with pytest.raises(ValueError):
|
||||
ser.droplevel(1, axis="columns")
|
||||
File diff suppressed because it is too large
Load Diff
759
venv/lib/python3.6/site-packages/pandas/tests/series/test_api.py
Normal file
759
venv/lib/python3.6/site-packages/pandas/tests/series/test_api.py
Normal file
@@ -0,0 +1,759 @@
|
||||
from collections import OrderedDict
|
||||
import pydoc
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal, ensure_clean
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class SharedWithSparse:
|
||||
"""
|
||||
A collection of tests Series and SparseSeries can share.
|
||||
|
||||
In generic tests on this class, use ``self._assert_series_equal()``
|
||||
which is implemented in sub-classes.
|
||||
"""
|
||||
|
||||
def _assert_series_equal(self, left, right):
|
||||
"""Dispatch to series class dependent assertion"""
|
||||
raise NotImplementedError
|
||||
|
||||
def test_scalarop_preserve_name(self):
|
||||
result = self.ts * 2
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_name(self):
|
||||
result = self.ts.copy()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_index_name_checking(self):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
|
||||
self.ts.index.name = None
|
||||
assert self.ts.index.name is None
|
||||
assert self.ts is self.ts
|
||||
|
||||
cp = self.ts.copy()
|
||||
cp.index.name = "foo"
|
||||
printing.pprint_thing(self.ts.index.name)
|
||||
assert self.ts.index.name is None
|
||||
|
||||
def test_append_preserve_name(self):
|
||||
result = self.ts[:5].append(self.ts[5:])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_binop_maybe_preserve_name(self):
|
||||
# names match, preserve
|
||||
result = self.ts * self.ts
|
||||
assert result.name == self.ts.name
|
||||
result = self.ts.mul(self.ts)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts * self.ts[:-2]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = "something else"
|
||||
result = self.ts + cp
|
||||
assert result.name is None
|
||||
result = self.ts.add(cp)
|
||||
assert result.name is None
|
||||
|
||||
ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
|
||||
ops = ops + ["r" + op for op in ops]
|
||||
for op in ops:
|
||||
# names match, preserve
|
||||
s = self.ts.copy()
|
||||
result = getattr(s, op)(s)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = "changed"
|
||||
result = getattr(s, op)(cp)
|
||||
assert result.name is None
|
||||
|
||||
def test_combine_first_name(self):
|
||||
result = self.ts.combine_first(self.ts[:5])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_getitem_preserve_name(self):
|
||||
result = self.ts[self.ts > 0]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[[0, 2, 4]]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[5:10]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_pickle(self):
|
||||
unp_series = self._pickle_roundtrip(self.series)
|
||||
unp_ts = self._pickle_roundtrip(self.ts)
|
||||
assert_series_equal(unp_series, self.series)
|
||||
assert_series_equal(unp_ts, self.ts)
|
||||
|
||||
def _pickle_roundtrip(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_argsort_preserve_name(self):
|
||||
result = self.ts.argsort()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_sort_index_name(self):
|
||||
result = self.ts.sort_index(ascending=False)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
def test_to_sparse_pass_name(self):
|
||||
result = self.ts.to_sparse()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_constructor_dict(self):
|
||||
d = {"a": 0.0, "b": 1.0, "c": 2.0}
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(d, index=sorted(d.keys()))
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
result = self.series_klass(d, index=["b", "c", "d", "a"])
|
||||
expected = self.series_klass([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_subclass_dict(self):
|
||||
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(dict(data.items()))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_ordereddict(self):
|
||||
# GH3283
|
||||
data = OrderedDict(
|
||||
("col{i}".format(i=i), np.random.random()) for i in range(12)
|
||||
)
|
||||
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(list(data.values()), list(data.keys()))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
# Test with subclass
|
||||
class A(OrderedDict):
|
||||
pass
|
||||
|
||||
series = self.series_klass(A(data))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_dict_multiindex(self):
|
||||
d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
|
||||
_d = sorted(d.items())
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(
|
||||
[x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d])
|
||||
)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
d["z"] = 111.0
|
||||
_d.insert(0, ("z", d["z"]))
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(
|
||||
[x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False)
|
||||
)
|
||||
result = result.reindex(index=expected.index)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_dict_timedelta_index(self):
|
||||
# GH #12169 : Resample category data with timedelta index
|
||||
# construct Series from dict as data and TimedeltaIndex as index
|
||||
# will result NaN in result Series data
|
||||
expected = self.series_klass(
|
||||
data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s")
|
||||
)
|
||||
|
||||
result = self.series_klass(
|
||||
data={
|
||||
pd.to_timedelta(0, unit="s"): "A",
|
||||
pd.to_timedelta(10, unit="s"): "B",
|
||||
pd.to_timedelta(20, unit="s"): "C",
|
||||
},
|
||||
index=pd.to_timedelta([0, 10, 20], unit="s"),
|
||||
)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_from_array_deprecated(self):
|
||||
|
||||
# multiple FutureWarnings, so can't assert stacklevel
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.series_klass.from_array([1, 2, 3])
|
||||
|
||||
def test_sparse_accessor_updates_on_inplace(self):
|
||||
s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
|
||||
s.drop([0, 1], inplace=True)
|
||||
assert s.sparse.density == 1.0
|
||||
|
||||
|
||||
class TestSeriesMisc(TestData, SharedWithSparse):
|
||||
|
||||
series_klass = Series
|
||||
# SharedWithSparse tests use generic, series_klass-agnostic assertion
|
||||
_assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
|
||||
def test_tab_completion(self):
|
||||
# GH 9910
|
||||
s = Series(list("abcd"))
|
||||
# Series of str values should have .str but not .dt/.cat in __dir__
|
||||
assert "str" in dir(s)
|
||||
assert "dt" not in dir(s)
|
||||
assert "cat" not in dir(s)
|
||||
|
||||
# similarly for .dt
|
||||
s = Series(date_range("1/1/2015", periods=5))
|
||||
assert "dt" in dir(s)
|
||||
assert "str" not in dir(s)
|
||||
assert "cat" not in dir(s)
|
||||
|
||||
# Similarly for .cat, but with the twist that str and dt should be
|
||||
# there if the categories are of that type first cat and str.
|
||||
s = Series(list("abbcd"), dtype="category")
|
||||
assert "cat" in dir(s)
|
||||
assert "str" in dir(s) # as it is a string categorical
|
||||
assert "dt" not in dir(s)
|
||||
|
||||
# similar to cat and str
|
||||
s = Series(date_range("1/1/2015", periods=5)).astype("category")
|
||||
assert "cat" in dir(s)
|
||||
assert "str" not in dir(s)
|
||||
assert "dt" in dir(s) # as it is a datetime categorical
|
||||
|
||||
def test_tab_completion_with_categorical(self):
|
||||
# test the tab completion display
|
||||
ok_for_cat = [
|
||||
"name",
|
||||
"index",
|
||||
"categorical",
|
||||
"categories",
|
||||
"codes",
|
||||
"ordered",
|
||||
"set_categories",
|
||||
"add_categories",
|
||||
"remove_categories",
|
||||
"rename_categories",
|
||||
"reorder_categories",
|
||||
"remove_unused_categories",
|
||||
"as_ordered",
|
||||
"as_unordered",
|
||||
]
|
||||
|
||||
def get_dir(s):
|
||||
results = [r for r in s.cat.__dir__() if not r.startswith("_")]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(list("aabbcde")).astype("category")
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
tm.makeUnicodeIndex(10),
|
||||
tm.makeStringIndex(10),
|
||||
tm.makeCategoricalIndex(10),
|
||||
Index(["foo", "bar", "baz"] * 2),
|
||||
tm.makeDateIndex(10),
|
||||
tm.makePeriodIndex(10),
|
||||
tm.makeTimedeltaIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeUIntIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeFloatIndex(10),
|
||||
Index([True, False]),
|
||||
Index(["a{}".format(i) for i in range(101)]),
|
||||
pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
|
||||
pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
|
||||
],
|
||||
)
|
||||
def test_index_tab_completion(self, index):
|
||||
# dir contains string-like values of the Index.
|
||||
s = pd.Series(index=index)
|
||||
dir_s = dir(s)
|
||||
for i, x in enumerate(s.index.unique(level=0)):
|
||||
if i < 100:
|
||||
assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
|
||||
else:
|
||||
assert x not in dir_s
|
||||
|
||||
def test_not_hashable(self):
|
||||
s_empty = Series()
|
||||
s = Series([1])
|
||||
msg = "'Series' objects are mutable, thus they cannot be hashed"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
hash(s_empty)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
hash(s)
|
||||
|
||||
def test_contains(self):
|
||||
tm.assert_contains_all(self.ts.index, self.ts)
|
||||
|
||||
def test_iter(self):
|
||||
for i, val in enumerate(self.series):
|
||||
assert val == self.series[i]
|
||||
|
||||
for i, val in enumerate(self.ts):
|
||||
assert val == self.ts[i]
|
||||
|
||||
def test_keys(self):
|
||||
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
|
||||
# to .keys() in a list()
|
||||
getkeys = self.ts.keys
|
||||
assert getkeys() is self.ts.index
|
||||
|
||||
def test_values(self):
|
||||
tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)
|
||||
|
||||
def test_iteritems(self):
|
||||
for idx, val in self.series.iteritems():
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in self.ts.iteritems():
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.iteritems(), "reverse")
|
||||
|
||||
def test_items(self):
|
||||
for idx, val in self.series.items():
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in self.ts.items():
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.items(), "reverse")
|
||||
|
||||
def test_raise_on_info(self):
|
||||
s = Series(np.random.randn(10))
|
||||
msg = "'Series' object has no attribute 'info'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.info()
|
||||
|
||||
def test_copy(self):
|
||||
|
||||
for deep in [None, False, True]:
|
||||
s = Series(np.arange(10), dtype="float64")
|
||||
|
||||
# default deep is True
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[::2] = np.NaN
|
||||
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert not np.isnan(s[0])
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert np.isnan(s[0])
|
||||
|
||||
def test_copy_tzaware(self):
|
||||
# GH#11794
|
||||
# copy of tz-aware
|
||||
expected = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
|
||||
|
||||
for deep in [None, False, True]:
|
||||
|
||||
s = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[0] = pd.Timestamp("1999/01/01", tz="UTC")
|
||||
|
||||
# default deep is True
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected)
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected2)
|
||||
|
||||
def test_axis_alias(self):
|
||||
s = Series([1, 2, np.nan])
|
||||
assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
|
||||
assert s.dropna().sum("rows") == 3
|
||||
assert s._get_axis_number("rows") == 0
|
||||
assert s._get_axis_name("rows") == "index"
|
||||
|
||||
def test_class_axis(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/18147
|
||||
# no exception and no empty docstring
|
||||
assert pydoc.getdoc(Series.index)
|
||||
|
||||
def test_numpy_unique(self):
|
||||
# it works!
|
||||
np.unique(self.ts)
|
||||
|
||||
def test_ndarray_compat(self):
|
||||
|
||||
# test numpy compat with Series as sub-class of NDFrame
|
||||
tsdf = DataFrame(
|
||||
np.random.randn(1000, 3),
|
||||
columns=["A", "B", "C"],
|
||||
index=date_range("1/1/2000", periods=1000),
|
||||
)
|
||||
|
||||
def f(x):
|
||||
return x[x.idxmax()]
|
||||
|
||||
result = tsdf.apply(f)
|
||||
expected = tsdf.max()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .item()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s = Series([1])
|
||||
result = s.item()
|
||||
assert result == 1
|
||||
assert s.item() == s.iloc[0]
|
||||
|
||||
# using an ndarray like function
|
||||
s = Series(np.random.randn(10))
|
||||
result = Series(np.ones_like(s))
|
||||
expected = Series(1, index=range(10), dtype="float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ravel
|
||||
s = Series(np.random.randn(10))
|
||||
tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))
|
||||
|
||||
# compress
|
||||
# GH 6658
|
||||
s = Series([0, 1.0, -1], index=list("abc"))
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.0], index=["b"]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Index(dtype=object) as the same as original
|
||||
exp = Series([], dtype="float64", index=Index([], dtype="object"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.0], index=[0.2]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Float64Index as the same as original
|
||||
exp = Series([], dtype="float64", index=Index([], dtype="float64"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_str_accessor_updates_on_inplace(self):
|
||||
s = pd.Series(list("abc"))
|
||||
s.drop([0], inplace=True)
|
||||
assert len(s.str.lower()) == 2
|
||||
|
||||
def test_str_attribute(self):
|
||||
# GH9068
|
||||
methods = ["strip", "rstrip", "lstrip"]
|
||||
s = Series([" jack", "jill ", " jesse ", "frank"])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in s.values])
|
||||
assert_series_equal(getattr(Series.str, method)(s.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
s = Series(range(5))
|
||||
with pytest.raises(AttributeError, match="only use .str accessor"):
|
||||
s.str.repeat(2)
|
||||
|
||||
def test_empty_method(self):
|
||||
s_empty = pd.Series()
|
||||
assert s_empty.empty
|
||||
|
||||
for full_series in [pd.Series([1]), pd.Series(index=[1])]:
|
||||
assert not full_series.empty
|
||||
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip("IPython", minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; s = pd.Series()"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("s.", 1))
|
||||
|
||||
def test_integer_series_size(self):
|
||||
# GH 25580
|
||||
s = Series(range(9))
|
||||
assert s.size == 9
|
||||
s = Series(range(9), dtype="Int64")
|
||||
assert s.size == 9
|
||||
|
||||
def test_get_values_deprecation(self):
|
||||
s = Series(range(9))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = s.get_values()
|
||||
tm.assert_numpy_array_equal(res, s.values)
|
||||
|
||||
|
||||
class TestCategoricalSeries:
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
],
|
||||
)
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH 17509
|
||||
s = Series([1, 2, 3], name="A").astype("category")
|
||||
expected = "A"
|
||||
result = method(s).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
s = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
|
||||
assert not s.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
s.cat.set_categories(["b", "a"], inplace=True)
|
||||
tm.assert_categorical_equal(s.values, exp)
|
||||
|
||||
res = s.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
s[:] = "a"
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
|
||||
assert Series.cat is CategoricalAccessor
|
||||
s = Series(list("aabbcde")).astype("category")
|
||||
assert isinstance(s.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with pytest.raises(AttributeError, match="only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, "cat")
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
c = Series(list("aabbcde")).astype("category")
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
c.cat.xlabel = "a"
|
||||
|
||||
def test_cat_accessor_updates_on_inplace(self):
|
||||
s = Series(list("abc")).astype("category")
|
||||
s.drop(0, inplace=True)
|
||||
s.cat.remove_unused_categories(inplace=True)
|
||||
assert len(s.cat.categories) == 2
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
|
||||
# invalid accessor
|
||||
msg = r"Can only use \.cat accessor with a 'category' dtype"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat()
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(["a", "b", "c"]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(np.arange(5.0)).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([Timestamp("20130101")]).cat
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
s.cat.categories = [1, 2, 3]
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype="int8")
|
||||
tm.assert_series_equal(s.cat.codes, exp_codes)
|
||||
|
||||
assert s.cat.ordered
|
||||
s = s.cat.as_unordered()
|
||||
assert not s.cat.ordered
|
||||
s.cat.as_ordered(inplace=True)
|
||||
assert s.cat.ordered
|
||||
|
||||
# reorder
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
s = s.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
msg = "'Series' object has no attribute 'set_categories'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.set_categories([4, 3, 2, 1])
|
||||
|
||||
# right: s.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH18862 (let Series.cat.rename_categories take callables)
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = s.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(
|
||||
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api_for_categorical(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
|
||||
c_dr = s_dr.astype("category")
|
||||
|
||||
s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
|
||||
c_pr = s_pr.astype("category")
|
||||
|
||||
s_tdr = Series(timedelta_range("1 days", "10 days"))
|
||||
c_tdr = s_tdr.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
get_ops = lambda x: x._datetimelike_ops
|
||||
|
||||
test_data = [
|
||||
("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
|
||||
("Period", get_ops(PeriodArray), s_pr, c_pr),
|
||||
("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr),
|
||||
]
|
||||
|
||||
assert isinstance(c_dr.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
("strftime", ("%Y-%m-%d",), {}),
|
||||
("tz_convert", ("EST",), {}),
|
||||
("round", ("D",), {}),
|
||||
("floor", ("D",), {}),
|
||||
("ceil", ("D",), {}),
|
||||
("asfreq", ("D",), {}),
|
||||
# ('tz_localize', ("UTC",), {}),
|
||||
]
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
# the series is already localized
|
||||
_ignore_names = ["tz_localize", "components"]
|
||||
|
||||
for name, attr_names, s, c in test_data:
|
||||
func_names = [
|
||||
f
|
||||
for f in dir(s.dt)
|
||||
if not (
|
||||
f.startswith("_")
|
||||
or f in attr_names
|
||||
or f in _special_func_names
|
||||
or f in _ignore_names
|
||||
)
|
||||
]
|
||||
|
||||
func_defs = [(f, (), {}) for f in func_names]
|
||||
for f_def in special_func_defs:
|
||||
if f_def[0] in dir(s.dt):
|
||||
func_defs.append(f_def)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
with warnings.catch_warnings():
|
||||
if func == "to_period":
|
||||
# dropping TZ
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
res = getattr(c.dt, func)(*args, **kwargs)
|
||||
exp = getattr(s.dt, func)(*args, **kwargs)
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
try:
|
||||
res = getattr(c.dt, attr)
|
||||
exp = getattr(s.dt, attr)
|
||||
except Exception as e:
|
||||
print(name, attr)
|
||||
raise e
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
invalid = Series([1, 2, 3]).astype("category")
|
||||
msg = "Can only use .dt accessor with datetimelike"
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, "str")
|
||||
@@ -0,0 +1,739 @@
|
||||
from collections import Counter, OrderedDict, defaultdict
|
||||
from itertools import chain
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, isna
|
||||
from pandas.conftest import _get_cython_table_params
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesApply:
|
||||
def test_apply(self, datetime_series):
|
||||
with np.errstate(all="ignore"):
|
||||
tm.assert_series_equal(
|
||||
datetime_series.apply(np.sqrt), np.sqrt(datetime_series)
|
||||
)
|
||||
|
||||
# element-wise apply
|
||||
import math
|
||||
|
||||
tm.assert_series_equal(
|
||||
datetime_series.apply(math.exp), np.exp(datetime_series)
|
||||
)
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=pd.Index([], name="bar"))
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3])
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
def test_apply_same_length_inference_bug(self):
|
||||
s = Series([1, 2])
|
||||
f = lambda x: (x, x + 1)
|
||||
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_apply_dont_convert_dtype(self):
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
f = lambda x: x if x > 0 else np.nan
|
||||
result = s.apply(f, convert_dtype=False)
|
||||
assert result.dtype == object
|
||||
|
||||
def test_with_string_args(self, datetime_series):
|
||||
|
||||
for arg in ["sum", "mean", "min", "max", "std"]:
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
def test_apply_args(self):
|
||||
s = Series(["foo,bar"])
|
||||
|
||||
result = s.apply(str.split, args=(",",))
|
||||
assert result[0] == ["foo", "bar"]
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
def test_series_map_box_timestamps(self):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(pd.date_range("1/1/2000", periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
# it works!
|
||||
ser.map(func)
|
||||
ser.apply(func)
|
||||
|
||||
def test_apply_box(self):
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
|
||||
exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
|
||||
exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "timedelta64[ns]"
|
||||
res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "Period[M]"
|
||||
res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr))
|
||||
exp = pd.Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_apply_datetimetz(self):
|
||||
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
s = pd.Series(values, name="XX")
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = pd.Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.apply(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_apply_dict_depr(self):
|
||||
|
||||
tsdf = pd.DataFrame(
|
||||
np.random.randn(10, 3),
|
||||
columns=["A", "B", "C"],
|
||||
index=pd.date_range("1/1/2000", periods=10),
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tsdf.A.agg({"foo": ["sum", "mean"]})
|
||||
|
||||
@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
|
||||
def test_apply_categorical_with_nan_values(self, series):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
s = pd.Series(series, dtype="category")
|
||||
result = s.apply(lambda x: x.split("-")[0])
|
||||
result = result.astype(object)
|
||||
expected = pd.Series(["1", "1", np.NaN], dtype="category")
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesAggregate:
|
||||
def test_transform(self, string_series):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.transform(np.sqrt)
|
||||
expected = f_sqrt.copy()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = string_series.apply(np.sqrt)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.transform([np.sqrt])
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ["sqrt"]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform([np.sqrt])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform(["sqrt"])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["sqrt", "absolute"]
|
||||
result = string_series.apply([np.sqrt, np.abs])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform(["sqrt", "abs"])
|
||||
expected.columns = ["sqrt", "abs"]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
expected = expected.unstack().rename("series")
|
||||
|
||||
result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
|
||||
assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_transform_and_agg_error(self, string_series):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError):
|
||||
string_series.transform(["min", "max"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg(["sqrt", "max"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.transform(["sqrt", "max"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg({"foo": np.sqrt, "bar": "sum"})
|
||||
|
||||
def test_demo(self):
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
|
||||
result = s.agg(["min", "max"])
|
||||
expected = Series([0, 5], index=["min", "max"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({"foo": "min"})
|
||||
expected = Series([0], index=["foo"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({"foo": ["min", "max"]})
|
||||
|
||||
expected = (
|
||||
DataFrame({"foo": [0, 5]}, index=["min", "max"]).unstack().rename("series")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiple_aggregators_with_dict_api(self):
|
||||
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})
|
||||
|
||||
expected = (
|
||||
DataFrame(
|
||||
{"foo": [5.0, np.nan, 0.0, np.nan], "bar": [np.nan, 2.5, np.nan, 15.0]},
|
||||
columns=["foo", "bar"],
|
||||
index=["max", "mean", "min", "sum"],
|
||||
)
|
||||
.unstack()
|
||||
.rename("series")
|
||||
)
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
|
||||
# test that we are evaluating row-by-row first
|
||||
# before vectorized evaluation
|
||||
result = string_series.apply(lambda x: str(x))
|
||||
expected = string_series.agg(lambda x: str(x))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = string_series.apply(str)
|
||||
expected = string_series.agg(str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_with_nested_series(self, datetime_series):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
result = datetime_series.apply(
|
||||
lambda x: Series([x, x ** 2], index=["x", "x^2"])
|
||||
)
|
||||
expected = DataFrame({"x": datetime_series, "x^2": datetime_series ** 2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_replicate_describe(self, string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(
|
||||
OrderedDict(
|
||||
[
|
||||
("count", "count"),
|
||||
("mean", "mean"),
|
||||
("std", "std"),
|
||||
("min", "min"),
|
||||
("25%", lambda x: x.quantile(0.25)),
|
||||
("50%", "median"),
|
||||
("75%", lambda x: x.quantile(0.75)),
|
||||
("max", "max"),
|
||||
]
|
||||
)
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_reduce(self, string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(["sum", "mean"])
|
||||
expected = Series(
|
||||
[string_series.sum(), string_series.mean()],
|
||||
["sum", "mean"],
|
||||
name=string_series.name,
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_non_callable_aggregates(self):
|
||||
# test agg using non-callable series attributes
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = s.agg("size")
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = s.agg(["size", "count", "mean"])
|
||||
expected = Series(OrderedDict([("size", 3.0), ("count", 2.0), ("mean", 1.5)]))
|
||||
assert_series_equal(result[expected.index], expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
_get_cython_table_params(
|
||||
Series(),
|
||||
[
|
||||
("sum", 0),
|
||||
("max", np.nan),
|
||||
("min", np.nan),
|
||||
("all", True),
|
||||
("any", False),
|
||||
("mean", np.nan),
|
||||
("prod", 1),
|
||||
("std", np.nan),
|
||||
("var", np.nan),
|
||||
("median", np.nan),
|
||||
],
|
||||
),
|
||||
_get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("sum", 6),
|
||||
("max", 3),
|
||||
("min", 1),
|
||||
("all", True),
|
||||
("any", True),
|
||||
("mean", 2),
|
||||
("prod", 6),
|
||||
("std", 1),
|
||||
("var", 1),
|
||||
("median", 2),
|
||||
],
|
||||
),
|
||||
_get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("sum", "abc"),
|
||||
("max", "c"),
|
||||
("min", "a"),
|
||||
("all", "c"), # see GH12863
|
||||
("any", "a"),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table(self, series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
result = series.agg(func)
|
||||
if tm.is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
_get_cython_table_params(
|
||||
Series(),
|
||||
[("cumprod", Series([], Index([]))), ("cumsum", Series([], Index([])))],
|
||||
),
|
||||
_get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("cumprod", Series([np.nan, 1, 2, 6])),
|
||||
("cumsum", Series([np.nan, 1, 3, 6])),
|
||||
],
|
||||
),
|
||||
_get_cython_table_params(
|
||||
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform(self, series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
_get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("mean", TypeError), # mean raises TypeError
|
||||
("prod", TypeError),
|
||||
("std", TypeError),
|
||||
("var", TypeError),
|
||||
("median", TypeError),
|
||||
("cumprod", TypeError),
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises(self, series, func, expected):
|
||||
# GH21224
|
||||
with pytest.raises(expected):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
series.agg(func)
|
||||
|
||||
|
||||
class TestSeriesMap:
|
||||
def test_map(self, datetime_series):
|
||||
index, data = tm.getMixedTypeDict()
|
||||
|
||||
source = Series(data["B"], index=data["C"])
|
||||
target = Series(data["C"][:4], index=data["D"][:4])
|
||||
|
||||
merged = target.map(source)
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
# input could be a dict
|
||||
merged = target.map(source.to_dict())
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
# function
|
||||
result = datetime_series.map(lambda x: x * 2)
|
||||
tm.assert_series_equal(result, datetime_series * 2)
|
||||
|
||||
# GH 10324
|
||||
a = Series([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series(["odd", "even", "odd", np.nan])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
|
||||
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series(
|
||||
["B", "C", "D", "E"],
|
||||
dtype="category",
|
||||
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
|
||||
)
|
||||
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series(
|
||||
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
|
||||
)
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, "B", "C", "D"])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_index_generator(10))
|
||||
def test_map_empty(self, index):
|
||||
s = Series(index)
|
||||
result = s.map({})
|
||||
|
||||
expected = pd.Series(np.nan, index=s.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_map_compat(self):
|
||||
# related GH 8024
|
||||
s = Series([True, True, False], index=[1, 2, 3])
|
||||
result = s.map({True: "foo", False: "bar"})
|
||||
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_int(self):
|
||||
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
|
||||
right = Series({1: 11, 2: 22, 3: 33})
|
||||
|
||||
assert left.dtype == np.float_
|
||||
assert issubclass(right.dtype.type, np.integer)
|
||||
|
||||
merged = left.map(right)
|
||||
assert merged.dtype == np.float_
|
||||
assert isna(merged["d"])
|
||||
assert not isna(merged["c"])
|
||||
|
||||
def test_map_type_inference(self):
|
||||
s = Series(range(3))
|
||||
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||||
assert issubclass(s2.dtype.type, np.integer)
|
||||
|
||||
def test_map_decimal(self, string_series):
|
||||
from decimal import Decimal
|
||||
|
||||
result = string_series.map(lambda x: Decimal(str(x)))
|
||||
assert result.dtype == np.object_
|
||||
assert isinstance(result[0], Decimal)
|
||||
|
||||
def test_map_na_exclusion(self):
|
||||
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||||
|
||||
result = s.map(lambda x: x * 2, na_action="ignore")
|
||||
exp = s * 2
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_map_dict_with_tuple_keys(self):
|
||||
"""
|
||||
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||||
dicts with tuple keys passed to map were being
|
||||
converted to a multi-index, preventing tuple values
|
||||
from being mapped properly.
|
||||
"""
|
||||
# GH 18496
|
||||
df = pd.DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
|
||||
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
|
||||
|
||||
df["labels"] = df["a"].map(label_mappings)
|
||||
df["expected_labels"] = pd.Series(["A", "B", "A", "B"], index=df.index)
|
||||
# All labels should be filled now
|
||||
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
|
||||
|
||||
def test_map_counter(self):
|
||||
s = Series(["a", "b", "c"], index=[1, 2, 3])
|
||||
counter = Counter()
|
||||
counter["b"] = 5
|
||||
counter["c"] += 1
|
||||
result = s.map(counter)
|
||||
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_defaultdict(self):
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
default_dict = defaultdict(lambda: "blank")
|
||||
default_dict[1] = "stuff"
|
||||
result = s.map(default_dict)
|
||||
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_with_missing(self):
|
||||
"""
|
||||
Test Series.map with a dictionary subclass that defines __missing__,
|
||||
i.e. sets a default value (GH #15999).
|
||||
"""
|
||||
|
||||
class DictWithMissing(dict):
|
||||
def __missing__(self, key):
|
||||
return "missing"
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series(["missing", "missing", "three"])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_without_missing(self):
|
||||
class DictWithoutMissing(dict):
|
||||
pass
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithoutMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_box(self):
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
|
||||
exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
|
||||
exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "timedelta64[ns]"
|
||||
res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == "Period[M]"
|
||||
res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr))
|
||||
exp = pd.Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_map_categorical(self):
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
s = pd.Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
result = s.map(lambda x: x.lower())
|
||||
exp_values = pd.Categorical(
|
||||
list("abbabcd"), categories=list("dcba"), ordered=True
|
||||
)
|
||||
exp = pd.Series(exp_values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp_values)
|
||||
|
||||
result = s.map(lambda x: "A")
|
||||
exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == np.object
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action="ignore")
|
||||
|
||||
def test_map_datetimetz(self):
|
||||
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
s = pd.Series(values, name="XX")
|
||||
|
||||
# keep tz
|
||||
result = s.map(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = pd.Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.map(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action="ignore")
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals,mapping,exp",
|
||||
[
|
||||
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
|
||||
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
|
||||
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
|
||||
],
|
||||
)
|
||||
def test_map_missing_mixed(self, vals, mapping, exp):
|
||||
# GH20495
|
||||
s = pd.Series(vals + [np.nan])
|
||||
result = s.map(mapping)
|
||||
|
||||
tm.assert_series_equal(result, pd.Series(exp))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dti,exp",
|
||||
[
|
||||
(
|
||||
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
|
||||
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
|
||||
),
|
||||
(
|
||||
tm.makeTimeSeries(nper=30),
|
||||
DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_apply_series_on_date_time_index_aware_series(self, dti, exp):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
index = dti.tz_localize("UTC").index
|
||||
result = pd.Series(index).apply(lambda x: pd.Series([1, 2]))
|
||||
assert_frame_equal(result, exp)
|
||||
|
||||
def test_apply_scaler_on_date_time_index_aware_series(self):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
|
||||
result = pd.Series(series.index).apply(lambda x: 1)
|
||||
assert_series_equal(result, pd.Series(np.ones(30), dtype="int64"))
|
||||
@@ -0,0 +1,165 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
from pandas.core.indexes.period import IncompatibleFrequency
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def _permute(obj):
|
||||
return obj.take(np.random.permutation(len(obj)))
|
||||
|
||||
|
||||
class TestSeriesFlexArithmetic:
|
||||
@pytest.mark.parametrize(
|
||||
"ts",
|
||||
[
|
||||
(lambda x: x, lambda x: x * 2, False),
|
||||
(lambda x: x, lambda x: x[::2], False),
|
||||
(lambda x: x, lambda x: 5, True),
|
||||
(lambda x: tm.makeFloatSeries(), lambda x: tm.makeFloatSeries(), True),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"]
|
||||
)
|
||||
def test_flex_method_equivalence(self, opname, ts):
|
||||
# check that Series.{opname} behaves like Series.__{opname}__,
|
||||
tser = tm.makeTimeSeries().rename("ts")
|
||||
|
||||
series = ts[0](tser)
|
||||
other = ts[1](tser)
|
||||
check_reverse = ts[2]
|
||||
|
||||
op = getattr(Series, opname)
|
||||
alt = getattr(operator, opname)
|
||||
|
||||
result = op(series, other)
|
||||
expected = alt(series, other)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
if check_reverse:
|
||||
rop = getattr(Series, "r" + opname)
|
||||
result = rop(series, other)
|
||||
expected = alt(other, series)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesArithmetic:
|
||||
# Some of these may end up in tests/arithmetic, but are not yet sorted
|
||||
|
||||
def test_add_series_with_period_index(self):
|
||||
rng = pd.period_range("1/1/2000", "1/1/2010", freq="A")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts + ts[::2]
|
||||
expected = ts + ts
|
||||
expected[1::2] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts + _permute(ts[::2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts + ts.asfreq("D", how="end")
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
|
||||
class TestSeriesFlexComparison:
|
||||
def test_comparison_flex_basic(self):
|
||||
left = pd.Series(np.random.randn(10))
|
||||
right = pd.Series(np.random.randn(10))
|
||||
|
||||
tm.assert_series_equal(left.eq(right), left == right)
|
||||
tm.assert_series_equal(left.ne(right), left != right)
|
||||
tm.assert_series_equal(left.le(right), left < right)
|
||||
tm.assert_series_equal(left.lt(right), left <= right)
|
||||
tm.assert_series_equal(left.gt(right), left > right)
|
||||
tm.assert_series_equal(left.ge(right), left >= right)
|
||||
|
||||
# axis
|
||||
for axis in [0, None, "index"]:
|
||||
tm.assert_series_equal(left.eq(right, axis=axis), left == right)
|
||||
tm.assert_series_equal(left.ne(right, axis=axis), left != right)
|
||||
tm.assert_series_equal(left.le(right, axis=axis), left < right)
|
||||
tm.assert_series_equal(left.lt(right, axis=axis), left <= right)
|
||||
tm.assert_series_equal(left.gt(right, axis=axis), left > right)
|
||||
tm.assert_series_equal(left.ge(right, axis=axis), left >= right)
|
||||
|
||||
#
|
||||
msg = "No axis named 1 for object type"
|
||||
for op in ["eq", "ne", "le", "le", "gt", "ge"]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(left, op)(right, axis=1)
|
||||
|
||||
|
||||
class TestSeriesComparison:
|
||||
def test_comparison_different_length(self):
|
||||
a = Series(["a", "b", "c"])
|
||||
b = Series(["b", "a"])
|
||||
with pytest.raises(ValueError):
|
||||
a < b
|
||||
|
||||
a = Series([1, 2])
|
||||
b = Series([2, 3, 4])
|
||||
with pytest.raises(ValueError):
|
||||
a == b
|
||||
|
||||
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
|
||||
def test_ser_flex_cmp_return_dtypes(self, opname):
|
||||
# GH#15115
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
const = 2
|
||||
result = getattr(ser, opname)(const).dtypes
|
||||
expected = np.dtype("bool")
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
|
||||
def test_ser_flex_cmp_return_dtypes_empty(self, opname):
|
||||
# GH#15115 empty Series case
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
empty = ser.iloc[:0]
|
||||
const = 2
|
||||
result = getattr(empty, opname)(const).dtypes
|
||||
expected = np.dtype("bool")
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op",
|
||||
[operator.eq, operator.ne, operator.le, operator.lt, operator.ge, operator.gt],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")]
|
||||
)
|
||||
def test_ser_cmp_result_names(self, names, op):
|
||||
# datetime64 dtype
|
||||
dti = pd.date_range("1949-06-07 03:00:00", freq="H", periods=5, name=names[0])
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# datetime64tz dtype
|
||||
dti = dti.tz_localize("US/Central")
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# timedelta64 dtype
|
||||
tdi = dti - dti.shift(1)
|
||||
ser = Series(tdi).rename(names[1])
|
||||
result = op(ser, tdi)
|
||||
assert result.name == names[2]
|
||||
|
||||
# categorical
|
||||
if op in [operator.eq, operator.ne]:
|
||||
# categorical dtype comparisons raise for inequalities
|
||||
cidx = tdi.astype("category")
|
||||
ser = Series(cidx).rename(names[1])
|
||||
result = op(ser, cidx)
|
||||
assert result.name == names[2]
|
||||
@@ -0,0 +1,178 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series, Timestamp, date_range, isna, notna, offsets
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAsof:
|
||||
def test_basic(self):
|
||||
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
val = result[result.index[result.index >= ub][0]]
|
||||
assert ts[ub] == val
|
||||
|
||||
def test_scalar(self):
|
||||
|
||||
N = 30
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
ts = Series(np.arange(N), index=rng)
|
||||
ts[5:10] = np.NaN
|
||||
ts[15:20] = np.NaN
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
result = ts.asof(ts.index[3])
|
||||
assert result == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0] - offsets.BDay()
|
||||
assert np.isnan(ts.asof(d))
|
||||
|
||||
def test_with_nan(self):
|
||||
# basic asof test
|
||||
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
r = s.resample("2h").mean()
|
||||
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[3:5] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[-3:] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
from pandas import period_range, PeriodIndex
|
||||
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="H")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
pix = PeriodIndex(result.index.values, freq="H")
|
||||
mask = (pix >= lb) & (pix < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
ts[5:10] = np.nan
|
||||
ts[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
assert ts.asof(ts.index[3]) == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0].to_timestamp() - offsets.BDay()
|
||||
assert isna(ts.asof(d))
|
||||
|
||||
def test_errors(self):
|
||||
|
||||
s = Series(
|
||||
[1, 2, 3],
|
||||
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
|
||||
)
|
||||
|
||||
# non-monotonic
|
||||
assert not s.index.is_monotonic
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0])
|
||||
|
||||
# subset with Series
|
||||
N = 10
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
s = Series(np.random.randn(N), index=rng)
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0], subset="foo")
|
||||
|
||||
def test_all_nans(self):
|
||||
# GH 15713
|
||||
# series is all nans
|
||||
result = Series([np.nan]).asof([0])
|
||||
expected = Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing non-default indexes
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
result = Series(np.nan, index=rng).asof(dates)
|
||||
expected = Series(np.nan, index=dates)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
|
||||
result = Series(np.nan, index=rng).asof(date)
|
||||
assert isna(result)
|
||||
|
||||
# test name is propagated
|
||||
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
|
||||
expected = Series(np.nan, index=[4, 5], name="test")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
|
||||
# Segregated collection of methods that require the BlockManager internal data
|
||||
# structure
|
||||
|
||||
|
||||
class TestSeriesBlockInternals:
|
||||
def test_setitem_invalidates_datetime_index_freq(self):
|
||||
# GH#24096 altering a datetime64tz Series inplace invalidates the
|
||||
# `freq` attribute on the underlying DatetimeIndex
|
||||
|
||||
dti = pd.date_range("20130101", periods=3, tz="US/Eastern")
|
||||
ts = dti[1]
|
||||
ser = pd.Series(dti)
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert dti.freq == "D"
|
||||
ser.iloc[1] = pd.NaT
|
||||
assert ser._values.freq is None
|
||||
|
||||
# check that the DatetimeIndex was not altered in place
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert dti[1] == ts
|
||||
assert dti.freq == "D"
|
||||
|
||||
def test_dt64tz_setitem_does_not_mutate_dti(self):
|
||||
# GH#21907, GH#24096
|
||||
dti = pd.date_range("2016-01-01", periods=10, tz="US/Pacific")
|
||||
ts = dti[0]
|
||||
ser = pd.Series(dti)
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert ser._data.blocks[0].values is not dti
|
||||
assert ser._data.blocks[0].values._data.base is not dti._data._data.base
|
||||
|
||||
ser[::3] = pd.NaT
|
||||
assert ser[0] is pd.NaT
|
||||
assert dti[0] == ts
|
||||
@@ -0,0 +1,418 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, Series, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesCombine:
|
||||
def test_append(self, datetime_series, string_series, object_series):
|
||||
appendedSeries = string_series.append(object_series)
|
||||
for idx, value in appendedSeries.items():
|
||||
if idx in string_series.index:
|
||||
assert value == string_series[idx]
|
||||
elif idx in object_series.index:
|
||||
assert value == object_series[idx]
|
||||
else:
|
||||
raise AssertionError("orphaned index!")
|
||||
|
||||
msg = "Indexes have overlapping values:"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
datetime_series.append(datetime_series, verify_integrity=True)
|
||||
|
||||
def test_append_many(self, datetime_series):
|
||||
pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]]
|
||||
|
||||
result = pieces[0].append(pieces[1:])
|
||||
assert_series_equal(result, datetime_series)
|
||||
|
||||
def test_append_duplicates(self):
|
||||
# GH 13677
|
||||
s1 = pd.Series([1, 2, 3])
|
||||
s2 = pd.Series([4, 5, 6])
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(s1.append(s2), exp)
|
||||
tm.assert_series_equal(pd.concat([s1, s2]), exp)
|
||||
|
||||
# the result must have RangeIndex
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6])
|
||||
tm.assert_series_equal(
|
||||
s1.append(s2, ignore_index=True), exp, check_index_type=True
|
||||
)
|
||||
tm.assert_series_equal(
|
||||
pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True
|
||||
)
|
||||
|
||||
msg = "Indexes have overlapping values:"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s1.append(s2, verify_integrity=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.concat([s1, s2], verify_integrity=True)
|
||||
|
||||
def test_combine_scalar(self):
|
||||
# GH 21248
|
||||
# Note - combine() with another Series is tested elsewhere because
|
||||
# it is used when testing operators
|
||||
s = pd.Series([i * 10 for i in range(5)])
|
||||
result = s.combine(3, lambda x, y: x + y)
|
||||
expected = pd.Series([i * 10 + 3 for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.combine(22, lambda x, y: min(x, y))
|
||||
expected = pd.Series([min(i * 10, 22) for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self):
|
||||
values = tm.makeIntIndex(20).values.astype(float)
|
||||
series = Series(values, index=tm.makeIntIndex(20))
|
||||
|
||||
series_copy = series * 2
|
||||
series_copy[::2] = np.NaN
|
||||
|
||||
# nothing used from the input
|
||||
combined = series.combine_first(series_copy)
|
||||
|
||||
tm.assert_series_equal(combined, series)
|
||||
|
||||
# Holes filled from input
|
||||
combined = series_copy.combine_first(series)
|
||||
assert np.isfinite(combined).all()
|
||||
|
||||
tm.assert_series_equal(combined[::2], series[::2])
|
||||
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
||||
|
||||
# mixed types
|
||||
index = tm.makeStringIndex(20)
|
||||
floats = Series(tm.randn(20), index=index)
|
||||
strings = Series(tm.makeStringIndex(10), index=index[::2])
|
||||
|
||||
combined = strings.combine_first(floats)
|
||||
|
||||
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
||||
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
|
||||
|
||||
# corner case
|
||||
s = Series([1.0, 2, 3], index=[0, 1, 2])
|
||||
result = s.combine_first(Series([], index=[]))
|
||||
s.index = s.index.astype("O")
|
||||
assert_series_equal(s, result)
|
||||
|
||||
def test_update(self):
|
||||
s = Series([1.5, nan, 3.0, 4.0, nan])
|
||||
s2 = Series([nan, 3.5, nan, 5.0])
|
||||
s.update(s2)
|
||||
|
||||
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
||||
df["c"] = np.nan
|
||||
|
||||
df["c"].update(Series(["foo"], index=[0]))
|
||||
expected = DataFrame(
|
||||
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
|
||||
)
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, dtype, expected",
|
||||
[
|
||||
# other is int
|
||||
([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")),
|
||||
([61, 63], "int64", pd.Series([10, 61, 12])),
|
||||
([61, 63], float, pd.Series([10.0, 61.0, 12.0])),
|
||||
([61, 63], object, pd.Series([10, 61, 12], dtype=object)),
|
||||
# other is float, but can be cast to int
|
||||
([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")),
|
||||
([61.0, 63.0], "int64", pd.Series([10, 61, 12])),
|
||||
([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])),
|
||||
([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)),
|
||||
# others is float, cannot be cast to int
|
||||
([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])),
|
||||
([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])),
|
||||
([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])),
|
||||
([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)),
|
||||
# other is object, cannot be cast
|
||||
([(61,), (63,)], "int32", pd.Series([10, (61,), 12])),
|
||||
([(61,), (63,)], "int64", pd.Series([10, (61,), 12])),
|
||||
([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])),
|
||||
([(61,), (63,)], object, pd.Series([10, (61,), 12])),
|
||||
],
|
||||
)
|
||||
def test_update_dtypes(self, other, dtype, expected):
|
||||
|
||||
s = Series([10, 11, 12], dtype=dtype)
|
||||
other = Series(other, index=[1, 3])
|
||||
s.update(other)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
def test_concat_empty_series_dtypes_roundtrips(self):
|
||||
|
||||
# round-tripping with self & like self
|
||||
dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
|
||||
|
||||
for dtype in dtypes:
|
||||
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
|
||||
assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype
|
||||
|
||||
def int_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {"i", "u", "b"}) and (
|
||||
dtype.kind == "i" or dtype2.kind == "i"
|
||||
):
|
||||
return "i"
|
||||
elif not len(typs - {"u", "b"}) and (
|
||||
dtype.kind == "u" or dtype2.kind == "u"
|
||||
):
|
||||
return "u"
|
||||
return None
|
||||
|
||||
def float_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {"f", "i", "u"}) and (
|
||||
dtype.kind == "f" or dtype2.kind == "f"
|
||||
):
|
||||
return "f"
|
||||
return None
|
||||
|
||||
def get_result_type(dtype, dtype2):
|
||||
result = float_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
result = int_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
return "O"
|
||||
|
||||
for dtype in dtypes:
|
||||
for dtype2 in dtypes:
|
||||
if dtype == dtype2:
|
||||
continue
|
||||
|
||||
expected = get_result_type(dtype, dtype2)
|
||||
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
|
||||
assert result.kind == expected
|
||||
|
||||
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
|
||||
ser1 = pd.Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
|
||||
name="ser1",
|
||||
)
|
||||
ser2 = pd.Series(
|
||||
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
|
||||
index=[2, 3, 4],
|
||||
name="ser2",
|
||||
)
|
||||
result = ser1.combine_first(ser2)
|
||||
exp_vals = pd.DatetimeIndex(
|
||||
["20150101", "20150102", "20150103", "20160515", "20160516"],
|
||||
tz=tz_naive_fixture,
|
||||
)
|
||||
exp = pd.Series(exp_vals, name="ser1")
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
def test_concat_empty_series_dtypes(self):
|
||||
|
||||
# booleans
|
||||
assert (
|
||||
pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype
|
||||
== np.int32
|
||||
)
|
||||
assert (
|
||||
pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype
|
||||
== np.object_
|
||||
)
|
||||
|
||||
# datetime-like
|
||||
assert (
|
||||
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype
|
||||
== np.object_
|
||||
)
|
||||
assert (
|
||||
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype
|
||||
== np.object_
|
||||
)
|
||||
assert (
|
||||
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype
|
||||
== np.object_
|
||||
)
|
||||
assert (
|
||||
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype
|
||||
== np.object_
|
||||
)
|
||||
assert (
|
||||
pd.concat(
|
||||
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
|
||||
).dtype
|
||||
== np.object_
|
||||
)
|
||||
|
||||
# categorical
|
||||
assert (
|
||||
pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype
|
||||
== "category"
|
||||
)
|
||||
# GH 18515
|
||||
assert (
|
||||
pd.concat(
|
||||
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
|
||||
).dtype
|
||||
== "float64"
|
||||
)
|
||||
assert (
|
||||
pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype
|
||||
== "object"
|
||||
)
|
||||
|
||||
# sparse
|
||||
# TODO: move?
|
||||
result = pd.concat(
|
||||
[Series(dtype="float64").to_sparse(), Series(dtype="float64").to_sparse()]
|
||||
)
|
||||
assert result.dtype == "Sparse[float64]"
|
||||
|
||||
# GH 26705 - Assert .ftype is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert result.ftype == "float64:sparse"
|
||||
|
||||
result = pd.concat(
|
||||
[Series(dtype="float64").to_sparse(), Series(dtype="float64")]
|
||||
)
|
||||
# TODO: release-note: concat sparse dtype
|
||||
expected = pd.core.sparse.api.SparseDtype(np.float64)
|
||||
assert result.dtype == expected
|
||||
|
||||
# GH 26705 - Assert .ftype is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert result.ftype == "float64:sparse"
|
||||
|
||||
result = pd.concat(
|
||||
[Series(dtype="float64").to_sparse(), Series(dtype="object")]
|
||||
)
|
||||
# TODO: release-note: concat sparse dtype
|
||||
expected = pd.core.sparse.api.SparseDtype("object")
|
||||
assert result.dtype == expected
|
||||
|
||||
# GH 26705 - Assert .ftype is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert result.ftype == "object:sparse"
|
||||
|
||||
def test_combine_first_dt64(self):
|
||||
from pandas.core.tools.datetimes import to_datetime
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = to_datetime(Series([np.NaN, "2011"]))
|
||||
rs = s0.combine_first(s1)
|
||||
xp = to_datetime(Series(["2010", "2011"]))
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = Series([np.NaN, "2011"])
|
||||
rs = s0.combine_first(s1)
|
||||
xp = Series([datetime(2010, 1, 1), "2011"])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
|
||||
class TestTimeseries:
|
||||
def test_append_concat(self):
|
||||
rng = date_range("5/8/2012 1:45", periods=10, freq="5T")
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
|
||||
result = ts.append(ts)
|
||||
result_df = df.append(df)
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 2))
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
tm.assert_index_equal(result_df.index, ex_index)
|
||||
|
||||
appended = rng.append(rng)
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
appended = rng.append([rng, rng])
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 3))
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
# different index names
|
||||
rng1 = rng.copy()
|
||||
rng2 = rng.copy()
|
||||
rng1.name = "foo"
|
||||
rng2.name = "bar"
|
||||
assert rng1.append(rng1).name == "foo"
|
||||
assert rng1.append(rng2).name is None
|
||||
|
||||
def test_append_concat_tz(self):
|
||||
# see gh-2938
|
||||
rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern")
|
||||
rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern")
|
||||
rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_explicit_pytz(self):
|
||||
# see gh-2938
|
||||
from pytz import timezone as timezone
|
||||
|
||||
rng = date_range(
|
||||
"5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern")
|
||||
)
|
||||
rng2 = date_range(
|
||||
"5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern")
|
||||
)
|
||||
rng3 = date_range(
|
||||
"5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern")
|
||||
)
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_dateutil(self):
|
||||
# see gh-2938
|
||||
rng = date_range(
|
||||
"5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern"
|
||||
)
|
||||
rng2 = date_range(
|
||||
"5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern"
|
||||
)
|
||||
rng3 = date_range(
|
||||
"5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern"
|
||||
)
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,648 @@
|
||||
import calendar
|
||||
from datetime import date, datetime, time
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
|
||||
from pandas.core.dtypes.common import is_integer_dtype, is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
bdate_range,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.core.arrays import PeriodArray
|
||||
import pandas.core.common as com
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues:
|
||||
def test_dt_namespace_accessor(self):
|
||||
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
ok_for_period = PeriodArray._datetimelike_ops
|
||||
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
|
||||
ok_for_dt = DatetimeIndex._datetimelike_ops
|
||||
ok_for_dt_methods = [
|
||||
"to_period",
|
||||
"to_pydatetime",
|
||||
"tz_localize",
|
||||
"tz_convert",
|
||||
"normalize",
|
||||
"strftime",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"day_name",
|
||||
"month_name",
|
||||
]
|
||||
ok_for_td = TimedeltaIndex._datetimelike_ops
|
||||
ok_for_td_methods = [
|
||||
"components",
|
||||
"to_pytimedelta",
|
||||
"total_seconds",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
]
|
||||
|
||||
def get_expected(s, name):
|
||||
result = getattr(Index(s._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype("int64")
|
||||
elif not is_list_like(result):
|
||||
return result
|
||||
return Series(result, index=s.index, name=s.name)
|
||||
|
||||
def compare(s, name):
|
||||
a = getattr(s.dt, prop)
|
||||
b = get_expected(s, prop)
|
||||
if not (is_list_like(a) and is_list_like(b)):
|
||||
assert a == b
|
||||
else:
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
# datetimeindex
|
||||
cases = [
|
||||
Series(date_range("20130101", periods=5), name="xxx"),
|
||||
Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
|
||||
Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"),
|
||||
]
|
||||
for s in cases:
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_localize("US/Eastern")
|
||||
exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, index=s.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "US/Eastern"
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq="infer").freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
|
||||
exp_values = (
|
||||
DatetimeIndex(s.values).tz_localize("UTC").tz_convert("US/Eastern")
|
||||
)
|
||||
expected = Series(exp_values, index=s.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# datetimeindex with tz
|
||||
s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx")
|
||||
for prop in ok_for_dt:
|
||||
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_convert("CET")
|
||||
expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "CET"
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq="infer").freq
|
||||
|
||||
# timedelta index
|
||||
cases = [
|
||||
Series(
|
||||
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
|
||||
),
|
||||
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
|
||||
Series(
|
||||
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
|
||||
name="xxx",
|
||||
),
|
||||
]
|
||||
for s in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, s.index)
|
||||
|
||||
result = s.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.total_seconds()
|
||||
assert isinstance(result, pd.Series)
|
||||
assert result.dtype == "float64"
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == TimedeltaIndex(s.values, freq="infer").freq
|
||||
|
||||
# both
|
||||
index = date_range("20130101", periods=3, freq="D")
|
||||
s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx")
|
||||
exp = Series(
|
||||
np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx"
|
||||
)
|
||||
tm.assert_series_equal(s.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx")
|
||||
tm.assert_series_equal(s.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx")
|
||||
tm.assert_series_equal(s.dt.second, exp)
|
||||
|
||||
exp = pd.Series([s[0]] * 3, index=index, name="xxx")
|
||||
tm.assert_series_equal(s.dt.normalize(), exp)
|
||||
|
||||
# periodindex
|
||||
cases = [Series(period_range("20130101", periods=5, freq="D"), name="xxx")]
|
||||
for s in cases:
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == PeriodIndex(s.values).freq
|
||||
|
||||
# test limited display api
|
||||
def get_dir(s):
|
||||
results = [r for r in s.dt.__dir__() if not r.startswith("_")]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
)
|
||||
|
||||
s = Series(
|
||||
period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
|
||||
)
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_period + ok_for_period_methods)))
|
||||
)
|
||||
|
||||
# 11295
|
||||
# ambiguous time error on the conversions
|
||||
s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
|
||||
s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
)
|
||||
exp_values = pd.date_range(
|
||||
"2015-01-01", "2016-01-01", freq="T", tz="UTC"
|
||||
).tz_convert("America/Chicago")
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# no setting allowed
|
||||
s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
with pytest.raises(ValueError, match="modifications"):
|
||||
s.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
with pd.option_context("chained_assignment", "raise"):
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
s.dt.hour[0] = 5
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, dates",
|
||||
[
|
||||
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
|
||||
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
|
||||
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
|
||||
],
|
||||
)
|
||||
def test_dt_round(self, method, dates):
|
||||
# round
|
||||
s = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = getattr(s.dt, method)("D")
|
||||
expected = Series(pd.to_datetime(dates), name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_round_tz(self):
|
||||
s = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
|
||||
|
||||
exp_values = pd.to_datetime(
|
||||
["2012-01-01", "2012-01-01", "2012-01-01"]
|
||||
).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
|
||||
def test_dt_round_tz_ambiguous(self, method):
|
||||
# GH 18946 round near "fall back" DST
|
||||
df1 = pd.DataFrame(
|
||||
[
|
||||
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
|
||||
],
|
||||
columns=["date"],
|
||||
)
|
||||
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
|
||||
# infer
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous="infer")
|
||||
expected = df1["date"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# bool-array
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# NaT
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous="NaT")
|
||||
expected = df1["date"].copy()
|
||||
expected.iloc[0:2] = pd.NaT
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raise
|
||||
with pytest.raises(pytz.AmbiguousTimeError):
|
||||
getattr(df1.date.dt, method)("H", ambiguous="raise")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, ts_str, freq",
|
||||
[
|
||||
["ceil", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["round", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["floor", "2018-03-11 03:01:00-0500", "2H"],
|
||||
],
|
||||
)
|
||||
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
|
||||
# GH 23324 round near "spring forward" DST
|
||||
s = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
|
||||
result = getattr(s.dt, method)(freq, nonexistent="shift_forward")
|
||||
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(s.dt, method)(freq, nonexistent="NaT")
|
||||
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
|
||||
getattr(s.dt, method)(freq, nonexistent="raise")
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
|
||||
s = Series(pd.Categorical(dti), name="foo")
|
||||
result = s.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
s = Series(date_range("20130101", periods=5, freq="D"))
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
s.dt.xlabel = "a"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales()
|
||||
)
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
s = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert s.dt.weekday_name[day] == eng_name
|
||||
assert s.dt.day_name(locale=time_locale)[day] == name
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
s = Series(date_range(freq="M", start="2012", end="2013"))
|
||||
result = s.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around https://github.com/pandas-dev/pandas/issues/22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for s_date, expected in zip(s, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", expected)
|
||||
|
||||
assert result == expected
|
||||
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
s = Series(date_range("20130101", periods=5))
|
||||
result = s.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
|
||||
result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/04 11-22-33",
|
||||
"2015/02/05 11-22-33",
|
||||
"2015/02/06 11-22-33",
|
||||
"2015/02/07 11-22-33",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range("20130101", periods=5))
|
||||
result = s.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
|
||||
result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/03 11-22-34",
|
||||
"2015/02/03 11-22-35",
|
||||
"2015/02/03 11-22-36",
|
||||
"2015/02/03 11-22-37",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range("20130101", periods=5))
|
||||
s.iloc[0] = pd.NaT
|
||||
result = s.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["NaT", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range("20150301", periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype=np.object_,
|
||||
)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
period_index = period_range("20150301", periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype="=U10",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
|
||||
result = s.dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range("20130101", periods=4, freq="H"))
|
||||
result = s.dt.strftime("%Y/%m/%d %H:%M:%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00",
|
||||
"2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00",
|
||||
"2013/01/01 03:00:00",
|
||||
]
|
||||
)
|
||||
|
||||
s = Series(period_range("20130101", periods=4, freq="L"))
|
||||
result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
|
||||
from datetime import date, time
|
||||
|
||||
# GH 8689
|
||||
s = Series(date_range("20130101", periods=5, freq="D"))
|
||||
s.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
|
||||
expected = getattr(s.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(s.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.date
|
||||
expected = Series(
|
||||
[
|
||||
date(2013, 1, 1),
|
||||
date(2013, 1, 2),
|
||||
np.nan,
|
||||
date(2013, 1, 4),
|
||||
date(2013, 1, 5),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.time
|
||||
expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties,
|
||||
DatetimeProperties,
|
||||
)
|
||||
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
s = Series(date_range("2000-01-01", periods=3))
|
||||
assert isinstance(s.dt, DatetimeProperties)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))]
|
||||
)
|
||||
def test_dt_accessor_invalid(self, ser):
|
||||
# GH#9322 check that series with incorrect dtypes don't have attr
|
||||
with pytest.raises(AttributeError, match="only use .dt accessor"):
|
||||
ser.dt
|
||||
assert not hasattr(ser, "dt")
|
||||
|
||||
def test_dt_accessor_updates_on_inplace(self):
|
||||
s = Series(pd.date_range("2018-01-01", periods=10))
|
||||
s[2] = None
|
||||
s.fillna(pd.Timestamp("2018-01-01"), inplace=True)
|
||||
result = s.dt.date
|
||||
assert result[0] == result[2]
|
||||
|
||||
def test_between(self):
|
||||
s = Series(bdate_range("1/1/2000", periods=20).astype(object))
|
||||
s[::2] = np.nan
|
||||
|
||||
result = s[s.between(s[3], s[17])]
|
||||
expected = s[3:18].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s[s.between(s[3], s[17], inclusive=False)]
|
||||
expected = s[5:16].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = pd.DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
s = Series(rng)
|
||||
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
|
||||
assert_series_equal(s.dt.date, expected)
|
||||
assert_series_equal(s.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_datetime_understood(self):
|
||||
# Ensures it doesn't fail to create the right series
|
||||
# reported in issue#16726
|
||||
series = pd.Series(pd.date_range("2012-01-01", periods=3))
|
||||
offset = pd.offsets.DateOffset(days=6)
|
||||
result = series - offset
|
||||
expected = pd.Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_timetz_accessor(self, tz_naive_fixture):
|
||||
# GH21358
|
||||
tz = maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
dtindex = pd.DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
|
||||
)
|
||||
s = Series(dtindex)
|
||||
expected = Series(
|
||||
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
|
||||
)
|
||||
result = s.dt.timetz
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_setitem_with_string_index(self):
|
||||
# GH 23451
|
||||
x = pd.Series([1, 2, 3], index=["Date", "b", "other"])
|
||||
x["Date"] = date.today()
|
||||
assert x.Date == date.today()
|
||||
assert x["Date"] == date.today()
|
||||
|
||||
def test_setitem_with_different_tz(self):
|
||||
# GH#24024
|
||||
ser = pd.Series(pd.date_range("2000", periods=2, tz="US/Central"))
|
||||
ser[0] = pd.Timestamp("2000", tz="US/Eastern")
|
||||
expected = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,521 @@
|
||||
from datetime import datetime, timedelta
|
||||
from importlib import reload
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesDtypes:
|
||||
def test_dt64_series_astype_object(self):
|
||||
dt64ser = Series(date_range("20130101", periods=3))
|
||||
result = dt64ser.astype(object)
|
||||
assert isinstance(result.iloc[0], datetime)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
def test_td64_series_astype_object(self):
|
||||
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
|
||||
result = tdser.astype(object)
|
||||
assert isinstance(result.iloc[0], timedelta)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
|
||||
def test_astype(self, dtype):
|
||||
s = Series(np.random.randn(5), name="foo")
|
||||
as_typed = s.astype(dtype)
|
||||
|
||||
assert as_typed.dtype == dtype
|
||||
assert as_typed.name == s.name
|
||||
|
||||
def test_asobject_deprecated(self):
|
||||
s = Series(np.random.randn(5), name="foo")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
o = s.asobject
|
||||
assert isinstance(o, np.ndarray)
|
||||
|
||||
def test_dtype(self, datetime_series):
|
||||
|
||||
assert datetime_series.dtype == np.dtype("float64")
|
||||
assert datetime_series.dtypes == np.dtype("float64")
|
||||
|
||||
# GH 26705 - Assert .ftype is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert datetime_series.ftype == "float64:dense"
|
||||
|
||||
# GH 26705 - Assert .ftypes is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert datetime_series.ftypes == "float64:dense"
|
||||
# GH18243 - Assert .get_ftype_counts is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tm.assert_series_equal(
|
||||
datetime_series.get_ftype_counts(), Series(1, ["float64:dense"])
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("value", [np.nan, np.inf])
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
def test_astype_cast_nan_inf_int(self, dtype, value):
|
||||
# gh-14265: check NaN and inf raise error when converting to int
|
||||
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
|
||||
s = Series([value])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
|
||||
def test_astype_cast_object_int_fail(self, dtype):
|
||||
arr = Series(["car", "house", "tree", "1"])
|
||||
msg = r"invalid literal for int\(\) with base 10: 'car'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype(dtype)
|
||||
|
||||
def test_astype_cast_object_int(self):
|
||||
arr = Series(["1", "2", "3", "4"], dtype=object)
|
||||
result = arr.astype(int)
|
||||
|
||||
tm.assert_series_equal(result, Series(np.arange(1, 5)))
|
||||
|
||||
def test_astype_datetime(self):
|
||||
s = Series(iNaT, dtype="M8[ns]", index=range(5))
|
||||
|
||||
s = s.astype("O")
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0)])
|
||||
|
||||
s = s.astype("O")
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
|
||||
|
||||
s[1] = np.nan
|
||||
assert s.dtype == "M8[ns]"
|
||||
|
||||
s = s.astype("O")
|
||||
assert s.dtype == np.object_
|
||||
|
||||
def test_astype_datetime64tz(self):
|
||||
s = Series(date_range("20130101", periods=3, tz="US/Eastern"))
|
||||
|
||||
# astype
|
||||
result = s.astype(object)
|
||||
expected = Series(s.astype(object), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# astype - object, preserves on construction
|
||||
result = Series(s.astype(object))
|
||||
expected = s.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# astype - datetime64[ns, tz]
|
||||
result = Series(s.values).astype("datetime64[ns, US/Eastern]")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = Series(s.values).astype(s.dtype)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = s.astype("datetime64[ns, CET]")
|
||||
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [str, np.str_])
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
|
||||
Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
|
||||
],
|
||||
)
|
||||
def test_astype_str_map(self, dtype, series):
|
||||
# see gh-4405
|
||||
result = series.astype(dtype)
|
||||
expected = series.map(str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_str_cast(self):
|
||||
# see gh-9757
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00")])
|
||||
s = ts.astype(str)
|
||||
|
||||
expected = Series([str("2010-01-04")])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
|
||||
s = ts.astype(str)
|
||||
|
||||
expected = Series([str("2010-01-04 00:00:00-05:00")])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
td = Series([Timedelta(1, unit="d")])
|
||||
s = td.astype(str)
|
||||
|
||||
expected = Series([str("1 days 00:00:00.000000000")])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_astype_unicode(self):
|
||||
# see gh-7758: A bit of magic is required to set
|
||||
# default encoding to utf-8
|
||||
digits = string.digits
|
||||
test_series = [
|
||||
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
|
||||
Series(["データーサイエンス、お前はもう死んでいる"]),
|
||||
]
|
||||
|
||||
former_encoding = None
|
||||
|
||||
if sys.getdefaultencoding() == "utf-8":
|
||||
test_series.append(Series(["野菜食べないとやばい".encode("utf-8")]))
|
||||
|
||||
for s in test_series:
|
||||
res = s.astype("unicode")
|
||||
expec = s.map(str)
|
||||
tm.assert_series_equal(res, expec)
|
||||
|
||||
# Restore the former encoding
|
||||
if former_encoding is not None and former_encoding != "utf-8":
|
||||
reload(sys)
|
||||
sys.setdefaultencoding(former_encoding)
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# see gh-7271
|
||||
s = Series(range(0, 10, 2), name="abc")
|
||||
|
||||
dt1 = dtype_class({"abc": str})
|
||||
result = s.astype(dt1)
|
||||
expected = Series(["0", "2", "4", "6", "8"], name="abc")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt2 = dtype_class({"abc": "float64"})
|
||||
result = s.astype(dt2)
|
||||
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt3 = dtype_class({"abc": str, "def": str})
|
||||
msg = (
|
||||
"Only the Series name can be used for the key in Series dtype"
|
||||
r" mappings\."
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt3)
|
||||
|
||||
dt4 = dtype_class({0: str})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt4)
|
||||
|
||||
# GH16717
|
||||
# if dtypes provided is empty, it should error
|
||||
dt5 = dtype_class({})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt5)
|
||||
|
||||
def test_astype_categories_deprecation_raises(self):
|
||||
|
||||
# deprecated 17636
|
||||
s = Series(["a", "b", "a"])
|
||||
with pytest.raises(ValueError, match="Got an unexpected"):
|
||||
s.astype("category", categories=["a", "b"], ordered=True)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"none, warning", [(None, None), (ordered_sentinel, FutureWarning)]
|
||||
)
|
||||
def test_astype_category_ordered_none_deprecated(self, none, warning):
|
||||
# GH 26336: only warn if None is not explicitly passed
|
||||
cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
|
||||
cdt2 = CategoricalDtype(categories=list("cedafb"), ordered=none)
|
||||
s = Series(list("abcdaba"), dtype=cdt1)
|
||||
with tm.assert_produces_warning(warning, check_stacklevel=False):
|
||||
s.astype(cdt2)
|
||||
|
||||
def test_astype_from_categorical(self):
|
||||
items = ["a", "b", "c", "a"]
|
||||
s = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = s.astype("category")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
items = [1, 2, 3, 1]
|
||||
s = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = s.astype("category")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical([1, 2, 3, 4, 5, 6])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
df = DataFrame(
|
||||
{"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
|
||||
)
|
||||
cats = Categorical(["a", "b", "b", "a", "a", "d"])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
# with keywords
|
||||
lst = ["a", "b", "c", "a"]
|
||||
s = Series(lst)
|
||||
exp = Series(Categorical(lst, ordered=True))
|
||||
res = s.astype(CategoricalDtype(None, ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
|
||||
res = s.astype(CategoricalDtype(list("abcdef"), ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_categorical_to_other(self):
|
||||
|
||||
value = np.random.RandomState(0).randint(0, 10000, 100)
|
||||
df = DataFrame({"value": value})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=["value"], ascending=True)
|
||||
df["value_group"] = pd.cut(
|
||||
df.value, range(0, 10500, 500), right=False, labels=cat_labels
|
||||
)
|
||||
|
||||
s = df["value_group"]
|
||||
expected = s
|
||||
tm.assert_series_equal(s.astype("category"), expected)
|
||||
tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
|
||||
msg = r"could not convert string to float|" r"invalid literal for float\(\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype("float64")
|
||||
|
||||
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
|
||||
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
|
||||
tm.assert_series_equal(cat.astype("str"), exp)
|
||||
s2 = Series(Categorical(["1", "2", "3", "4"]))
|
||||
exp2 = Series([1, 2, 3, 4]).astype(int)
|
||||
tm.assert_series_equal(s2.astype("int"), exp2)
|
||||
|
||||
# object don't sort correctly, so just compare that we have the same
|
||||
# values
|
||||
def cmp(a, b):
|
||||
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
|
||||
|
||||
expected = Series(np.array(s.values), name="value_group")
|
||||
cmp(s.astype("object"), expected)
|
||||
cmp(s.astype(np.object_), expected)
|
||||
|
||||
# array conversion
|
||||
tm.assert_almost_equal(np.array(s), np.array(s.values))
|
||||
|
||||
# valid conversion
|
||||
for valid in [
|
||||
lambda x: x.astype("category"),
|
||||
lambda x: x.astype(CategoricalDtype()),
|
||||
lambda x: x.astype("object").astype("category"),
|
||||
lambda x: x.astype("object").astype(CategoricalDtype()),
|
||||
]:
|
||||
|
||||
result = valid(s)
|
||||
# compare series values
|
||||
# internal .categories can't be compared because it is sorted
|
||||
tm.assert_series_equal(result, s, check_categorical=False)
|
||||
|
||||
# invalid conversion (these are NOT a dtype)
|
||||
msg = (
|
||||
r"invalid type <class 'pandas\.core\.arrays\.categorical\."
|
||||
"Categorical'> for astype"
|
||||
)
|
||||
for invalid in [
|
||||
lambda x: x.astype(Categorical),
|
||||
lambda x: x.astype("object").astype(Categorical),
|
||||
]:
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
invalid(s)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("series_ordered", [True, False])
|
||||
def test_astype_categorical_to_categorical(
|
||||
self, name, dtype_ordered, series_ordered
|
||||
):
|
||||
# GH 10696/18593
|
||||
s_data = list("abcaacbab")
|
||||
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
|
||||
s = Series(s_data, dtype=s_dtype, name=name)
|
||||
|
||||
# unspecified categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
|
||||
expected = Series(s_data, name=name, dtype=exp_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
expected = Series(s_data, name=name, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# not specifying ordered, so only test once
|
||||
expected = s
|
||||
result = s.astype("category")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_categoricaldtype(self):
|
||||
s = Series(["a", "b", "a"])
|
||||
result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
|
||||
def test_astype_generic_timestamp_no_frequency(self, dtype):
|
||||
# see gh-15524, gh-15987
|
||||
data = [1]
|
||||
s = Series(data)
|
||||
|
||||
msg = (
|
||||
r"The '{dtype}' dtype has no unit\. "
|
||||
r"Please pass in '{dtype}\[ns\]' instead."
|
||||
).format(dtype=dtype.__name__)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", np.typecodes["All"])
|
||||
def test_astype_empty_constructor_equality(self, dtype):
|
||||
# see gh-15524
|
||||
|
||||
if dtype not in (
|
||||
"S",
|
||||
"V", # poor support (if any) currently
|
||||
"M",
|
||||
"m", # Generic timestamps raise a ValueError. Already tested.
|
||||
):
|
||||
init_empty = Series([], dtype=dtype)
|
||||
as_type_empty = Series([]).astype(dtype)
|
||||
tm.assert_series_equal(init_empty, as_type_empty)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_complex(self):
|
||||
# see gh-4819: complex access for ndarray compat
|
||||
a = np.arange(5, dtype=np.float64)
|
||||
b = Series(a + 4j * a)
|
||||
|
||||
tm.assert_numpy_array_equal(a, np.real(b))
|
||||
tm.assert_numpy_array_equal(4 * a, np.imag(b))
|
||||
|
||||
b.real = np.arange(5) + 5
|
||||
tm.assert_numpy_array_equal(a + 5, np.real(b))
|
||||
tm.assert_numpy_array_equal(4 * a, np.imag(b))
|
||||
|
||||
def test_real_imag_deprecated(self):
|
||||
# GH 18262
|
||||
s = pd.Series([1])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.imag
|
||||
s.real
|
||||
|
||||
def test_arg_for_errors_in_astype(self):
|
||||
# see gh-14878
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
msg = (
|
||||
r"Expected value of kwarg 'errors' to be one of \['raise',"
|
||||
r" 'ignore'\]\. Supplied value is 'False'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(np.float64, errors=False)
|
||||
|
||||
s.astype(np.int8, errors="raise")
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
|
||||
# This test no longer makes sense, as
|
||||
# Series is by default already M8[ns].
|
||||
expected = series.astype("object")
|
||||
|
||||
df = DataFrame({"a": series, "b": np.random.randn(len(series))})
|
||||
exp_dtypes = Series(
|
||||
[np.dtype("datetime64[ns]"), np.dtype("float64")], index=["a", "b"]
|
||||
)
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({"a": series, "b": ["foo"] * len(series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_series_to_categorical(self):
|
||||
# see gh-16524: test conversion of Series to Categorical
|
||||
series = Series(["a", "b", "c"])
|
||||
|
||||
result = Series(series, dtype="category")
|
||||
expected = Series(["a", "b", "c"], dtype="category")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_infer_objects_series(self):
|
||||
# GH 11221
|
||||
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
|
||||
expected = Series([1, 2, 3])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
|
||||
expected = Series([1.0, 2.0, 3.0, np.nan])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
# only soft conversions, unconvertable pass thru unchanged
|
||||
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
|
||||
expected = Series([1, 2, 3, None, "a"])
|
||||
|
||||
assert actual.dtype == "object"
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
def test_is_homogeneous_type(self):
|
||||
assert Series()._is_homogeneous_type
|
||||
assert Series([1, 2])._is_homogeneous_type
|
||||
assert Series(pd.Categorical([1, 2]))._is_homogeneous_type
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.period_range("2000", periods=4),
|
||||
pd.IntervalIndex.from_breaks([1, 2, 3, 4]),
|
||||
],
|
||||
)
|
||||
def test_values_compatibility(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/23995
|
||||
result = pd.Series(data).values
|
||||
expected = np.array(data.astype(object))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,160 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_value_counts_nunique():
|
||||
# basics.rst doc example
|
||||
series = Series(np.random.randn(500))
|
||||
series[20:500] = np.nan
|
||||
series[10:20] = 5000
|
||||
result = series.nunique()
|
||||
assert result == 11
|
||||
|
||||
# GH 18051
|
||||
s = Series(Categorical([]))
|
||||
assert s.nunique() == 0
|
||||
s = Series(Categorical([np.nan]))
|
||||
assert s.nunique() == 0
|
||||
|
||||
|
||||
def test_unique():
|
||||
# GH714 also, dtype=float
|
||||
s = Series([1.2345] * 100)
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
s = Series([1.2345] * 100, dtype="f4")
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
# NAs in object arrays #714
|
||||
s = Series(["foo"] * 100, dtype="O")
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
# decision about None
|
||||
s = Series([1, 2, 3, None, None, None], dtype=object)
|
||||
result = s.unique()
|
||||
expected = np.array([1, 2, 3, None], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# GH 18051
|
||||
s = Series(Categorical([]))
|
||||
tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False)
|
||||
s = Series(Categorical([np.nan]))
|
||||
tm.assert_categorical_equal(s.unique(), Categorical([np.nan]), check_dtype=False)
|
||||
|
||||
|
||||
def test_unique_data_ownership():
|
||||
# it works! #1807
|
||||
Series(Series(["a", "c", "b"]).unique()).sort_values()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected",
|
||||
[
|
||||
(np.random.randint(0, 10, size=1000), False),
|
||||
(np.arange(1000), True),
|
||||
([], True),
|
||||
([np.nan], True),
|
||||
(["foo", "bar", np.nan], True),
|
||||
(["foo", "foo", np.nan], False),
|
||||
(["foo", "bar", np.nan, np.nan], False),
|
||||
],
|
||||
)
|
||||
def test_is_unique(data, expected):
|
||||
# GH11946 / GH25180
|
||||
s = Series(data)
|
||||
assert s.is_unique is expected
|
||||
|
||||
|
||||
def test_is_unique_class_ne(capsys):
|
||||
# GH 20661
|
||||
class Foo:
|
||||
def __init__(self, val):
|
||||
self._value = val
|
||||
|
||||
def __ne__(self, other):
|
||||
raise Exception("NEQ not supported")
|
||||
|
||||
with capsys.disabled():
|
||||
li = [Foo(i) for i in range(5)]
|
||||
s = Series(li, index=[i for i in range(5)])
|
||||
s.is_unique
|
||||
captured = capsys.readouterr()
|
||||
assert len(captured.err) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, False, False, True, True, False])),
|
||||
("last", Series([False, True, True, False, False, False, False])),
|
||||
(False, Series([False, True, True, False, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(any_numpy_dtype, keep, expected):
|
||||
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
|
||||
|
||||
if tc.dtype == "bool":
|
||||
pytest.skip("tested separately in test_drop_duplicates_bool")
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, True])),
|
||||
("last", Series([True, True, False, False])),
|
||||
(False, Series([True, True, True, True])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates_bool(keep, expected):
|
||||
tc = Series([True, False, True, False])
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True], name="name")),
|
||||
("last", Series([True, True, False, False, False], name="name")),
|
||||
(False, Series([True, True, True, False, True], name="name")),
|
||||
],
|
||||
)
|
||||
def test_duplicated_keep(keep, expected):
|
||||
s = Series(["a", "b", "b", "c", "a"], name="name")
|
||||
|
||||
result = s.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True])),
|
||||
("last", Series([True, True, False, False, False])),
|
||||
(False, Series([True, True, True, False, True])),
|
||||
],
|
||||
)
|
||||
def test_duplicated_nan_none(keep, expected):
|
||||
s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
|
||||
|
||||
result = s.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,113 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
def test_basic():
|
||||
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mixed_type():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
|
||||
index=[0, 0, 0, 1, 2, 3, 4, 4],
|
||||
dtype=object,
|
||||
name="foo",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_empty():
|
||||
s = pd.Series()
|
||||
result = s.explode()
|
||||
expected = s.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_lists():
|
||||
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
|
||||
result = s.explode()
|
||||
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_multi_index():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, [], (3, 4)],
|
||||
name="foo",
|
||||
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
|
||||
)
|
||||
result = s.explode()
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_large():
|
||||
s = pd.Series([range(256)]).explode()
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_invert_array():
|
||||
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
|
||||
|
||||
listify = df.apply(lambda x: x.array, axis=1)
|
||||
result = listify.explode()
|
||||
tm.assert_series_equal(result, df["a"].rename())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
|
||||
)
|
||||
def non_object_dtype(s):
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_typical_usecase():
|
||||
|
||||
df = pd.DataFrame(
|
||||
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
|
||||
columns=["var1", "var2"],
|
||||
)
|
||||
exploded = df.var1.str.split(",").explode()
|
||||
exploded
|
||||
result = df[["var2"]].join(exploded)
|
||||
expected = pd.DataFrame(
|
||||
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
|
||||
columns=["var2", "var1"],
|
||||
index=[0, 0, 0, 1, 1, 1],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_EA():
|
||||
# a nested EA array
|
||||
s = pd.Series(
|
||||
[
|
||||
pd.date_range("20170101", periods=3, tz="UTC"),
|
||||
pd.date_range("20170104", periods=3, tz="UTC"),
|
||||
]
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,252 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import NaT, Series, Timestamp
|
||||
from pandas.core.internals.blocks import IntBlock
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesInternals:
|
||||
|
||||
# GH 10265
|
||||
def test_convert(self):
|
||||
# Tests: All to nans, coerce, true
|
||||
# Test coercion returns correct type
|
||||
s = Series(["a", "b", "c"])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([np.nan] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
dt = datetime(2001, 1, 1, 0, 0)
|
||||
td = dt - datetime(2000, 1, 1, 0, 0)
|
||||
|
||||
# Test coercion with mixed types
|
||||
s = Series(["a", "3.1415", dt, td])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT, NaT, dt, NaT])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([np.nan, 3.1415, np.nan, np.nan])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
# Test standard conversion returns original
|
||||
results = s._convert(datetime=True)
|
||||
assert_series_equal(results, s)
|
||||
results = s._convert(numeric=True)
|
||||
expected = Series([np.nan, 3.1415, np.nan, np.nan])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
# test pass-through and non-conversion when other types selected
|
||||
s = Series(["1.0", "2.0", "3.0"])
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([1.0, 2.0, 3.0])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, False, True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O")
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(datetime=False, numeric=True, timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
|
||||
s = Series([td, td], dtype="O")
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([td, td])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, True, False)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([1.0, 2, 3], index=["a", "b", "c"])
|
||||
result = s._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# force numeric conversion
|
||||
r = s.copy().astype("O")
|
||||
r["a"] = "1"
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype("O")
|
||||
r["a"] = "1."
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype("O")
|
||||
r["a"] = "garbled"
|
||||
result = r._convert(numeric=True)
|
||||
expected = s.copy()
|
||||
expected["a"] = np.nan
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4119, not converting a mixed type (e.g.floats and object)
|
||||
s = Series([1, "na", 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, "", 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# dates
|
||||
s = Series(
|
||||
[
|
||||
datetime(2001, 1, 1, 0, 0),
|
||||
datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0),
|
||||
]
|
||||
)
|
||||
s2 = Series(
|
||||
[
|
||||
datetime(2001, 1, 1, 0, 0),
|
||||
datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0),
|
||||
"foo",
|
||||
1.0,
|
||||
1,
|
||||
Timestamp("20010104"),
|
||||
"20010105",
|
||||
],
|
||||
dtype="O",
|
||||
)
|
||||
|
||||
result = s._convert(datetime=True)
|
||||
expected = Series(
|
||||
[Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(
|
||||
[
|
||||
Timestamp("20010101"),
|
||||
Timestamp("20010102"),
|
||||
Timestamp("20010103"),
|
||||
NaT,
|
||||
NaT,
|
||||
NaT,
|
||||
Timestamp("20010104"),
|
||||
Timestamp("20010105"),
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
result = s2._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(["foo", "bar", 1, 1.0], dtype="O")
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver if non-object
|
||||
s = Series([1], dtype="float32")
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# r = s.copy()
|
||||
# r[0] = np.nan
|
||||
# result = r._convert(convert_dates=True,convert_numeric=False)
|
||||
# assert result.dtype == 'M8[ns]'
|
||||
|
||||
# dateutil parses some single letters into today's value as a date
|
||||
expected = Series([NaT])
|
||||
for x in "abcdefghijklmnopqrstuvwxyz":
|
||||
s = Series([x])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
s = Series([x.upper()])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_no_arg_error(self):
|
||||
s = Series(["1.0", "2"])
|
||||
msg = r"At least one of datetime, numeric or timedelta must be True\."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s._convert()
|
||||
|
||||
def test_convert_preserve_bool(self):
|
||||
s = Series([1, True, 3, 5], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([1, 1, 3, 5], dtype="i8")
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_convert_preserve_all_bool(self):
|
||||
s = Series([False, True, False, False], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([False, True, False, False], dtype=bool)
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_constructor_no_pandas_array(self):
|
||||
ser = pd.Series([1, 2, 3])
|
||||
result = pd.Series(ser.array)
|
||||
tm.assert_series_equal(ser, result)
|
||||
assert isinstance(result._data.blocks[0], IntBlock)
|
||||
|
||||
def test_astype_no_pandas_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/24866
|
||||
ser = pd.Series([1, 2], dtype="int64")
|
||||
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
|
||||
# which is a PandasDtype.
|
||||
result = ser.astype(ser.array.dtype)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_from_array(self):
|
||||
result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]"))
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
def test_from_list_dtype(self):
|
||||
result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]")
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
result = pd.Series(["2015"], dtype="datetime64[ns]")
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
|
||||
def test_hasnans_unchached_for_series():
|
||||
# GH#19700
|
||||
idx = pd.Index([0, 1])
|
||||
assert idx.hasnans is False
|
||||
assert "hasnans" in idx._cache
|
||||
ser = idx.to_series()
|
||||
assert ser.hasnans is False
|
||||
assert not hasattr(ser, "_cache")
|
||||
ser.iloc[-1] = np.nan
|
||||
assert ser.hasnans is True
|
||||
assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__
|
||||
|
||||
|
||||
def test_put_deprecated():
|
||||
# GH 18262
|
||||
s = pd.Series([1])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.put(0, 0)
|
||||
272
venv/lib/python3.6/site-packages/pandas/tests/series/test_io.py
Normal file
272
venv/lib/python3.6/site-packages/pandas/tests/series/test_io.py
Normal file
@@ -0,0 +1,272 @@
|
||||
import collections
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_series_equal,
|
||||
ensure_clean,
|
||||
)
|
||||
|
||||
from pandas.io.common import _get_handle
|
||||
|
||||
|
||||
class TestSeriesToCSV:
|
||||
def read_csv(self, path, **kwargs):
|
||||
params = dict(squeeze=True, index_col=0, header=None, parse_dates=True)
|
||||
params.update(**kwargs)
|
||||
|
||||
header = params.get("header")
|
||||
out = pd.read_csv(path, **params)
|
||||
|
||||
if header is None:
|
||||
out.name = out.index.name = None
|
||||
|
||||
return out
|
||||
|
||||
@pytest.mark.parametrize("arg", ["path", "header", "both"])
|
||||
def test_to_csv_deprecation(self, arg, datetime_series):
|
||||
# see gh-19715
|
||||
with ensure_clean() as path:
|
||||
if arg == "path":
|
||||
kwargs = dict(path=path, header=False)
|
||||
elif arg == "header":
|
||||
kwargs = dict(path_or_buf=path)
|
||||
else: # Both discrepancies match.
|
||||
kwargs = dict(path=path)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
datetime_series.to_csv(**kwargs)
|
||||
|
||||
# Make sure roundtrip still works.
|
||||
ts = self.read_csv(path)
|
||||
assert_series_equal(datetime_series, ts, check_names=False)
|
||||
|
||||
def test_from_csv(self, datetime_series, string_series):
|
||||
|
||||
with ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
ts = self.read_csv(path)
|
||||
assert_series_equal(datetime_series, ts, check_names=False)
|
||||
|
||||
assert ts.name is None
|
||||
assert ts.index.name is None
|
||||
|
||||
# see gh-10483
|
||||
datetime_series.to_csv(path, header=True)
|
||||
ts_h = self.read_csv(path, header=0)
|
||||
assert ts_h.name == "ts"
|
||||
|
||||
string_series.to_csv(path, header=False)
|
||||
series = self.read_csv(path)
|
||||
assert_series_equal(string_series, series, check_names=False)
|
||||
|
||||
assert series.name is None
|
||||
assert series.index.name is None
|
||||
|
||||
string_series.to_csv(path, header=True)
|
||||
series_h = self.read_csv(path, header=0)
|
||||
assert series_h.name == "series"
|
||||
|
||||
with open(path, "w") as outfile:
|
||||
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
|
||||
|
||||
series = self.read_csv(path, sep="|")
|
||||
check_series = Series(
|
||||
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
|
||||
)
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
series = self.read_csv(path, sep="|", parse_dates=False)
|
||||
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
def test_to_csv(self, datetime_series):
|
||||
import io
|
||||
|
||||
with ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
|
||||
with io.open(path, newline=None) as f:
|
||||
lines = f.readlines()
|
||||
assert lines[1] != "\n"
|
||||
|
||||
datetime_series.to_csv(path, index=False, header=False)
|
||||
arr = np.loadtxt(path)
|
||||
assert_almost_equal(arr, datetime_series.values)
|
||||
|
||||
def test_to_csv_unicode_index(self):
|
||||
buf = StringIO()
|
||||
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
|
||||
|
||||
s.to_csv(buf, encoding="UTF-8", header=False)
|
||||
buf.seek(0)
|
||||
|
||||
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
|
||||
assert_series_equal(s, s2)
|
||||
|
||||
def test_to_csv_float_format(self):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
ser = Series([0.123456, 0.234567, 0.567567])
|
||||
ser.to_csv(filename, float_format="%.2f", header=False)
|
||||
|
||||
rs = self.read_csv(filename)
|
||||
xp = Series([0.12, 0.23, 0.57])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
def test_to_csv_list_entries(self):
|
||||
s = Series(["jack and jill", "jesse and frank"])
|
||||
|
||||
split = s.str.split(r"\s+and\s+")
|
||||
|
||||
buf = StringIO()
|
||||
split.to_csv(buf, header=False)
|
||||
|
||||
def test_to_csv_path_is_none(self):
|
||||
# GH 8215
|
||||
# Series.to_csv() was returning None, inconsistent with
|
||||
# DataFrame.to_csv() which returned string
|
||||
s = Series([1, 2, 3])
|
||||
csv_str = s.to_csv(path_or_buf=None, header=False)
|
||||
assert isinstance(csv_str, str)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s,encoding",
|
||||
[
|
||||
(
|
||||
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
|
||||
None,
|
||||
),
|
||||
# GH 21241, 21118
|
||||
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
|
||||
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
|
||||
(Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
|
||||
],
|
||||
)
|
||||
def test_to_csv_compression(self, s, encoding, compression):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
|
||||
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
|
||||
# test the round trip - to_csv -> read_csv
|
||||
result = pd.read_csv(
|
||||
filename,
|
||||
compression=compression,
|
||||
encoding=encoding,
|
||||
index_col=0,
|
||||
squeeze=True,
|
||||
)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# test the round trip using file handle - to_csv -> read_csv
|
||||
f, _handles = _get_handle(
|
||||
filename, "w", compression=compression, encoding=encoding
|
||||
)
|
||||
with f:
|
||||
s.to_csv(f, encoding=encoding, header=True)
|
||||
result = pd.read_csv(
|
||||
filename,
|
||||
compression=compression,
|
||||
encoding=encoding,
|
||||
index_col=0,
|
||||
squeeze=True,
|
||||
)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# explicitly ensure file was compressed
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
text = fh.read().decode(encoding or "utf8")
|
||||
assert s.name in text
|
||||
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
assert_series_equal(
|
||||
s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding)
|
||||
)
|
||||
|
||||
|
||||
class TestSeriesIO:
|
||||
def test_to_frame(self, datetime_series):
|
||||
datetime_series.name = None
|
||||
rs = datetime_series.to_frame()
|
||||
xp = pd.DataFrame(datetime_series.values, index=datetime_series.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
datetime_series.name = "testname"
|
||||
rs = datetime_series.to_frame()
|
||||
xp = pd.DataFrame(
|
||||
dict(testname=datetime_series.values), index=datetime_series.index
|
||||
)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
rs = datetime_series.to_frame(name="testdifferent")
|
||||
xp = pd.DataFrame(
|
||||
dict(testdifferent=datetime_series.values), index=datetime_series.index
|
||||
)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
def test_timeseries_periodindex(self):
|
||||
# GH2891
|
||||
from pandas import period_range
|
||||
|
||||
prng = period_range("1/1/2011", "1/1/2012", freq="M")
|
||||
ts = Series(np.random.randn(len(prng)), prng)
|
||||
new_ts = tm.round_trip_pickle(ts)
|
||||
assert new_ts.index.freq == "M"
|
||||
|
||||
def test_pickle_preserve_name(self):
|
||||
for n in [777, 777.0, "name", datetime(2001, 11, 11), (1, 2)]:
|
||||
unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
|
||||
assert unpickled.name == n
|
||||
|
||||
def test_pickle_categorical_ordered_from_sentinel(self):
|
||||
# GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
|
||||
s = Series(["a", "b", "c", "a"], dtype="category")
|
||||
result = tm.round_trip_pickle(s)
|
||||
result = result.astype("category")
|
||||
|
||||
tm.assert_series_equal(result, s)
|
||||
assert result.dtype._ordered_from_sentinel is False
|
||||
|
||||
def _pickle_roundtrip_name(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_to_frame_expanddim(self):
|
||||
# GH 9762
|
||||
|
||||
class SubclassedSeries(Series):
|
||||
@property
|
||||
def _constructor_expanddim(self):
|
||||
return SubclassedFrame
|
||||
|
||||
class SubclassedFrame(DataFrame):
|
||||
pass
|
||||
|
||||
s = SubclassedSeries([1, 2, 3], name="X")
|
||||
result = s.to_frame()
|
||||
assert isinstance(result, SubclassedFrame)
|
||||
expected = SubclassedFrame({"X": [1, 2, 3]})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
|
||||
)
|
||||
def test_to_dict(self, mapping, datetime_series):
|
||||
# GH16122
|
||||
tm.assert_series_equal(
|
||||
Series(datetime_series.to_dict(mapping), name="ts"), datetime_series
|
||||
)
|
||||
from_method = Series(datetime_series.to_dict(collections.Counter))
|
||||
from_constructor = Series(collections.Counter(datetime_series.items()))
|
||||
tm.assert_series_equal(from_method, from_constructor)
|
||||
1679
venv/lib/python3.6/site-packages/pandas/tests/series/test_missing.py
Normal file
1679
venv/lib/python3.6/site-packages/pandas/tests/series/test_missing.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,794 @@
|
||||
from datetime import datetime, timedelta
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, DataFrame, Index, Series, bdate_range, date_range, isna
|
||||
from pandas.core import ops
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
import pandas.core.nanops as nanops
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_index_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesLogicalOps:
|
||||
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
|
||||
def test_bool_operators_with_nas(self, bool_op):
|
||||
# boolean &, |, ^ should work with object arrays and propagate NAs
|
||||
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
mask = ser.isna()
|
||||
filled = ser.fillna(ser[0])
|
||||
|
||||
result = bool_op(ser < ser[9], ser > ser[3])
|
||||
|
||||
expected = bool_op(filled < filled[9], filled > filled[3])
|
||||
expected[mask] = False
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_operators_bitwise(self):
|
||||
# GH#9016: support bitwise op for integer types
|
||||
index = list("bca")
|
||||
|
||||
s_tft = Series([True, False, True], index=index)
|
||||
s_fff = Series([False, False, False], index=index)
|
||||
s_tff = Series([True, False, False], index=index)
|
||||
s_empty = Series([])
|
||||
|
||||
# TODO: unused
|
||||
# s_0101 = Series([0, 1, 0, 1])
|
||||
|
||||
s_0123 = Series(range(4), dtype="int64")
|
||||
s_3333 = Series([3] * 4)
|
||||
s_4444 = Series([4] * 4)
|
||||
|
||||
res = s_tft & s_empty
|
||||
expected = s_fff
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_tft | s_empty
|
||||
expected = s_tft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & s_3333
|
||||
expected = Series(range(4), dtype="int64")
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 | s_4444
|
||||
expected = Series(range(4, 8), dtype="int64")
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
s_a0b1c0 = Series([1], list("b"))
|
||||
|
||||
res = s_tft & s_a0b1c0
|
||||
expected = s_tff.reindex(list("abc"))
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_tft | s_a0b1c0
|
||||
expected = s_tft.reindex(list("abc"))
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
n0 = 0
|
||||
res = s_tft & n0
|
||||
expected = s_fff
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & n0
|
||||
expected = Series([0] * 4)
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
n1 = 1
|
||||
res = s_tft & n1
|
||||
expected = s_tft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & n1
|
||||
expected = Series([0, 1, 0, 1])
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
s_1111 = Series([1] * 4, dtype="int8")
|
||||
res = s_0123 & s_1111
|
||||
expected = Series([0, 1, 0, 1], dtype="int64")
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
|
||||
expected = Series([1, 1, 3, 3], dtype="int32")
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s_1111 & "a"
|
||||
with pytest.raises(TypeError):
|
||||
s_1111 & ["a", "b", "c", "d"]
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & np.NaN
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & 3.14
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & [0.1, 4, 3.14, 2]
|
||||
|
||||
# s_0123 will be all false now because of reindexing like s_tft
|
||||
exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
|
||||
assert_series_equal(s_tft & s_0123, exp)
|
||||
|
||||
# s_tft will be all false now because of reindexing like s_0123
|
||||
exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
|
||||
assert_series_equal(s_0123 & s_tft, exp)
|
||||
|
||||
assert_series_equal(s_0123 & False, Series([False] * 4))
|
||||
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
|
||||
assert_series_equal(s_0123 & [False], Series([False] * 4))
|
||||
assert_series_equal(s_0123 & (False), Series([False] * 4))
|
||||
assert_series_equal(
|
||||
s_0123 & Series([False, np.NaN, False, False]), Series([False] * 4)
|
||||
)
|
||||
|
||||
s_ftft = Series([False, True, False, True])
|
||||
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)
|
||||
|
||||
s_abNd = Series(["a", "b", np.NaN, "d"])
|
||||
res = s_0123 & s_abNd
|
||||
expected = s_ftft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
def test_scalar_na_logical_ops_corners(self):
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s & datetime(2005, 1, 1)
|
||||
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
|
||||
s[::2] = np.nan
|
||||
|
||||
expected = Series(True, index=s.index)
|
||||
expected[::2] = False
|
||||
result = s & list(s)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
d = DataFrame({"A": s})
|
||||
# TODO: Fix this exception - needs to be fixed! (see GH5035)
|
||||
# (previously this was a TypeError because series returned
|
||||
# NotImplemented
|
||||
|
||||
# this is an alignment issue; these are equivalent
|
||||
# https://github.com/pandas-dev/pandas/issues/5284
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
d.__and__(s, axis="columns")
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s & d
|
||||
|
||||
# this is wrong as its not a boolean result
|
||||
# result = d.__and__(s,axis='index')
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
|
||||
def test_logical_ops_with_index(self, op):
|
||||
# GH#22092, GH#19792
|
||||
ser = Series([True, True, False, False])
|
||||
idx1 = Index([True, False, True, False])
|
||||
idx2 = Index([1, 0, 1, 0])
|
||||
|
||||
expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
|
||||
|
||||
result = op(ser, idx1)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool)
|
||||
|
||||
result = op(ser, idx2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op",
|
||||
[
|
||||
pytest.param(
|
||||
ops.rand_,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="GH#22092 Index implementation returns Index",
|
||||
raises=AssertionError,
|
||||
strict=True,
|
||||
),
|
||||
),
|
||||
pytest.param(
|
||||
ops.ror_,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="Index.get_indexer with non unique index",
|
||||
raises=InvalidIndexError,
|
||||
strict=True,
|
||||
),
|
||||
),
|
||||
ops.rxor,
|
||||
],
|
||||
)
|
||||
def test_reversed_logical_ops_with_index(self, op):
|
||||
# GH#22092, GH#19792
|
||||
ser = Series([True, True, False, False])
|
||||
idx1 = Index([True, False, True, False])
|
||||
idx2 = Index([1, 0, 1, 0])
|
||||
|
||||
# symmetric_difference is only for rxor, but other 2 should fail
|
||||
expected = idx1.symmetric_difference(ser)
|
||||
|
||||
result = op(ser, idx1)
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
expected = idx2.symmetric_difference(ser)
|
||||
|
||||
result = op(ser, idx2)
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
(ops.rand_, pd.Index([False, True])),
|
||||
(ops.ror_, pd.Index([False, True])),
|
||||
(ops.rxor, pd.Index([])),
|
||||
],
|
||||
)
|
||||
def test_reverse_ops_with_index(self, op, expected):
|
||||
# https://github.com/pandas-dev/pandas/pull/23628
|
||||
# multi-set Index ops are buggy, so let's avoid duplicates...
|
||||
ser = Series([True, False])
|
||||
idx = Index([False, True])
|
||||
result = op(ser, idx)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_logical_ops_label_based(self):
|
||||
# GH#4947
|
||||
# logical ops should be label based
|
||||
|
||||
a = Series([True, False, True], list("bca"))
|
||||
b = Series([False, True, False], list("abc"))
|
||||
|
||||
expected = Series([False, True, False], list("abc"))
|
||||
result = a & b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, True, False], list("abc"))
|
||||
result = a | b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, False, False], list("abc"))
|
||||
result = a ^ b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# rhs is bigger
|
||||
a = Series([True, False, True], list("bca"))
|
||||
b = Series([False, True, False, True], list("abcd"))
|
||||
|
||||
expected = Series([False, True, False, False], list("abcd"))
|
||||
result = a & b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, True, False, False], list("abcd"))
|
||||
result = a | b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# filling
|
||||
|
||||
# vs empty
|
||||
result = a & Series([])
|
||||
expected = Series([False, False, False], list("bca"))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = a | Series([])
|
||||
expected = Series([True, False, True], list("bca"))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# vs non-matching
|
||||
result = a & Series([1], ["z"])
|
||||
expected = Series([False, False, False, False], list("abcz"))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = a | Series([1], ["z"])
|
||||
expected = Series([True, True, False, False], list("abcz"))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# identity
|
||||
# we would like s[s|e] == s to hold for any e, whether empty or not
|
||||
for e in [
|
||||
Series([]),
|
||||
Series([1], ["z"]),
|
||||
Series(np.nan, b.index),
|
||||
Series(np.nan, a.index),
|
||||
]:
|
||||
result = a[a | e]
|
||||
assert_series_equal(result, a[a])
|
||||
|
||||
for e in [Series(["z"])]:
|
||||
result = a[a | e]
|
||||
assert_series_equal(result, a[a])
|
||||
|
||||
# vs scalars
|
||||
index = list("bca")
|
||||
t = Series([True, False, True])
|
||||
|
||||
for v in [True, 1, 2]:
|
||||
result = Series([True, False, True], index=index) | v
|
||||
expected = Series([True, True, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [np.nan, "foo"]:
|
||||
with pytest.raises(TypeError):
|
||||
t | v
|
||||
|
||||
for v in [False, 0]:
|
||||
result = Series([True, False, True], index=index) | v
|
||||
expected = Series([True, False, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [True, 1]:
|
||||
result = Series([True, False, True], index=index) & v
|
||||
expected = Series([True, False, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [False, 0]:
|
||||
result = Series([True, False, True], index=index) & v
|
||||
expected = Series([False, False, False], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
for v in [np.nan]:
|
||||
with pytest.raises(TypeError):
|
||||
t & v
|
||||
|
||||
def test_logical_ops_df_compat(self):
|
||||
# GH#1134
|
||||
s1 = pd.Series([True, False, True], index=list("ABC"), name="x")
|
||||
s2 = pd.Series([True, True, False], index=list("ABD"), name="x")
|
||||
|
||||
exp = pd.Series([True, False, False, False], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s1 & s2, exp)
|
||||
assert_series_equal(s2 & s1, exp)
|
||||
|
||||
# True | np.nan => True
|
||||
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s1 | s2, exp)
|
||||
# np.nan | True => np.nan, filled with False
|
||||
exp = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s2 | s1, exp)
|
||||
|
||||
# DataFrame doesn't fill nan with False
|
||||
exp = pd.DataFrame({"x": [True, False, np.nan, np.nan]}, index=list("ABCD"))
|
||||
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
|
||||
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
|
||||
|
||||
exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
|
||||
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
|
||||
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
|
||||
|
||||
# different length
|
||||
s3 = pd.Series([True, False, True], index=list("ABC"), name="x")
|
||||
s4 = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
|
||||
|
||||
exp = pd.Series([True, False, True, False], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s3 & s4, exp)
|
||||
assert_series_equal(s4 & s3, exp)
|
||||
|
||||
# np.nan | True => np.nan, filled with False
|
||||
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s3 | s4, exp)
|
||||
# True | np.nan => True
|
||||
exp = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
|
||||
assert_series_equal(s4 | s3, exp)
|
||||
|
||||
exp = pd.DataFrame({"x": [True, False, True, np.nan]}, index=list("ABCD"))
|
||||
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
|
||||
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
|
||||
|
||||
exp = pd.DataFrame({"x": [True, True, True, np.nan]}, index=list("ABCD"))
|
||||
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
|
||||
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
|
||||
|
||||
|
||||
class TestSeriesComparisons:
|
||||
def test_comparisons(self):
|
||||
left = np.random.randn(10)
|
||||
right = np.random.randn(10)
|
||||
left[:3] = np.nan
|
||||
|
||||
result = nanops.nangt(left, right)
|
||||
with np.errstate(invalid="ignore"):
|
||||
expected = (left > right).astype("O")
|
||||
expected[:3] = np.nan
|
||||
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
s = Series(["a", "b", "c"])
|
||||
s2 = Series([False, True, False])
|
||||
|
||||
# it works!
|
||||
exp = Series([False, False, False])
|
||||
assert_series_equal(s == s2, exp)
|
||||
assert_series_equal(s2 == s, exp)
|
||||
|
||||
def test_categorical_comparisons(self):
|
||||
# GH 8938
|
||||
# allow equality comparisons
|
||||
a = Series(list("abc"), dtype="category")
|
||||
b = Series(list("abc"), dtype="object")
|
||||
c = Series(["a", "b", "cc"], dtype="object")
|
||||
d = Series(list("acb"), dtype="object")
|
||||
e = Categorical(list("abc"))
|
||||
f = Categorical(list("acb"))
|
||||
|
||||
# vs scalar
|
||||
assert not (a == "a").all()
|
||||
assert ((a != "a") == ~(a == "a")).all()
|
||||
|
||||
assert not ("a" == a).all()
|
||||
assert (a == "a")[0]
|
||||
assert ("a" == a)[0]
|
||||
assert not ("a" != a)[0]
|
||||
|
||||
# vs list-like
|
||||
assert (a == a).all()
|
||||
assert not (a != a).all()
|
||||
|
||||
assert (a == list(a)).all()
|
||||
assert (a == b).all()
|
||||
assert (b == a).all()
|
||||
assert ((~(a == b)) == (a != b)).all()
|
||||
assert ((~(b == a)) == (b != a)).all()
|
||||
|
||||
assert not (a == c).all()
|
||||
assert not (c == a).all()
|
||||
assert not (a == d).all()
|
||||
assert not (d == a).all()
|
||||
|
||||
# vs a cat-like
|
||||
assert (a == e).all()
|
||||
assert (e == a).all()
|
||||
assert not (a == f).all()
|
||||
assert not (f == a).all()
|
||||
|
||||
assert (~(a == e) == (a != e)).all()
|
||||
assert (~(e == a) == (e != a)).all()
|
||||
assert (~(a == f) == (a != f)).all()
|
||||
assert (~(f == a) == (f != a)).all()
|
||||
|
||||
# non-equality is not comparable
|
||||
with pytest.raises(TypeError):
|
||||
a < b
|
||||
with pytest.raises(TypeError):
|
||||
b < a
|
||||
with pytest.raises(TypeError):
|
||||
a > b
|
||||
with pytest.raises(TypeError):
|
||||
b > a
|
||||
|
||||
def test_comparison_tuples(self):
|
||||
# GH11339
|
||||
# comparisons vs tuple
|
||||
s = Series([(1, 1), (1, 2)])
|
||||
|
||||
result = s == (1, 2)
|
||||
expected = Series([False, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (1, 2)
|
||||
expected = Series([True, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s == (0, 0)
|
||||
expected = Series([False, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (0, 0)
|
||||
expected = Series([True, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([(1, 1), (1, 1)])
|
||||
|
||||
result = s == (1, 1)
|
||||
expected = Series([True, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (1, 1)
|
||||
expected = Series([False, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([frozenset([1]), frozenset([1, 2])])
|
||||
|
||||
result = s == frozenset([1])
|
||||
expected = Series([True, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_comparison_operators_with_nas(self):
|
||||
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
# test that comparisons work
|
||||
ops = ["lt", "le", "gt", "ge", "eq", "ne"]
|
||||
for op in ops:
|
||||
val = ser[5]
|
||||
|
||||
f = getattr(operator, op)
|
||||
result = f(ser, val)
|
||||
|
||||
expected = f(ser.dropna(), val).reindex(ser.index)
|
||||
|
||||
if op == "ne":
|
||||
expected = expected.fillna(True).astype(bool)
|
||||
else:
|
||||
expected = expected.fillna(False).astype(bool)
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# fffffffuuuuuuuuuuuu
|
||||
# result = f(val, s)
|
||||
# expected = f(val, s.dropna()).reindex(s.index)
|
||||
# assert_series_equal(result, expected)
|
||||
|
||||
def test_unequal_categorical_comparison_raises_type_error(self):
|
||||
# unequal comparison should raise for unordered cats
|
||||
cat = Series(Categorical(list("abc")))
|
||||
with pytest.raises(TypeError):
|
||||
cat > "b"
|
||||
|
||||
cat = Series(Categorical(list("abc"), ordered=False))
|
||||
with pytest.raises(TypeError):
|
||||
cat > "b"
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Series(Categorical(list("abc"), ordered=True))
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat < "d"
|
||||
with pytest.raises(TypeError):
|
||||
cat > "d"
|
||||
with pytest.raises(TypeError):
|
||||
"d" < cat
|
||||
with pytest.raises(TypeError):
|
||||
"d" > cat
|
||||
|
||||
tm.assert_series_equal(cat == "d", Series([False, False, False]))
|
||||
tm.assert_series_equal(cat != "d", Series([True, True, True]))
|
||||
|
||||
def test_ne(self):
|
||||
ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float)
|
||||
expected = [True, True, False, True, True]
|
||||
assert tm.equalContents(ts.index != 5, expected)
|
||||
assert tm.equalContents(~(ts.index == 5), expected)
|
||||
|
||||
def test_comp_ops_df_compat(self):
|
||||
# GH 1134
|
||||
s1 = pd.Series([1, 2, 3], index=list("ABC"), name="x")
|
||||
s2 = pd.Series([2, 2, 2], index=list("ABD"), name="x")
|
||||
|
||||
s3 = pd.Series([1, 2, 3], index=list("ABC"), name="x")
|
||||
s4 = pd.Series([2, 2, 2, 2], index=list("ABCD"), name="x")
|
||||
|
||||
for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]:
|
||||
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left == right
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left != right
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left < right
|
||||
|
||||
msg = "Can only compare identically-labeled DataFrame objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() == right.to_frame()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() != right.to_frame()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() < right.to_frame()
|
||||
|
||||
def test_compare_series_interval_keyword(self):
|
||||
# GH 25338
|
||||
s = Series(["IntervalA", "IntervalB", "IntervalC"])
|
||||
result = s == "IntervalA"
|
||||
expected = Series([True, False, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesFlexComparisonOps:
|
||||
def test_comparison_flex_alignment(self):
|
||||
left = Series([1, 3, 2], index=list("abc"))
|
||||
right = Series([2, 2, 2], index=list("bcd"))
|
||||
|
||||
exp = pd.Series([False, False, True, False], index=list("abcd"))
|
||||
assert_series_equal(left.eq(right), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, True], index=list("abcd"))
|
||||
assert_series_equal(left.ne(right), exp)
|
||||
|
||||
exp = pd.Series([False, False, True, False], index=list("abcd"))
|
||||
assert_series_equal(left.le(right), exp)
|
||||
|
||||
exp = pd.Series([False, False, False, False], index=list("abcd"))
|
||||
assert_series_equal(left.lt(right), exp)
|
||||
|
||||
exp = pd.Series([False, True, True, False], index=list("abcd"))
|
||||
assert_series_equal(left.ge(right), exp)
|
||||
|
||||
exp = pd.Series([False, True, False, False], index=list("abcd"))
|
||||
assert_series_equal(left.gt(right), exp)
|
||||
|
||||
def test_comparison_flex_alignment_fill(self):
|
||||
left = Series([1, 3, 2], index=list("abc"))
|
||||
right = Series([2, 2, 2], index=list("bcd"))
|
||||
|
||||
exp = pd.Series([False, False, True, True], index=list("abcd"))
|
||||
assert_series_equal(left.eq(right, fill_value=2), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, False], index=list("abcd"))
|
||||
assert_series_equal(left.ne(right, fill_value=2), exp)
|
||||
|
||||
exp = pd.Series([False, False, True, True], index=list("abcd"))
|
||||
assert_series_equal(left.le(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([False, False, False, True], index=list("abcd"))
|
||||
assert_series_equal(left.lt(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([True, True, True, False], index=list("abcd"))
|
||||
assert_series_equal(left.ge(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, False], index=list("abcd"))
|
||||
assert_series_equal(left.gt(right, fill_value=0), exp)
|
||||
|
||||
|
||||
class TestSeriesOperators(TestData):
|
||||
def test_operators_empty_int_corner(self):
|
||||
s1 = Series([], [], dtype=np.int32)
|
||||
s2 = Series({"x": 0.0})
|
||||
assert_series_equal(s1 * s2, Series([np.nan], index=["x"]))
|
||||
|
||||
def test_ops_datetimelike_align(self):
|
||||
# GH 7500
|
||||
# datetimelike ops need to align
|
||||
dt = Series(date_range("2012-1-1", periods=3, freq="D"))
|
||||
dt.iloc[2] = np.nan
|
||||
dt2 = dt[::-1]
|
||||
|
||||
expected = Series([timedelta(0), timedelta(0), pd.NaT])
|
||||
# name is reset
|
||||
result = dt2 - dt
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(expected, name=0)
|
||||
result = (dt2.to_frame() - dt.to_frame())[0]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_operators_corner(self):
|
||||
series = self.ts
|
||||
|
||||
empty = Series([], index=Index([]))
|
||||
|
||||
result = series + empty
|
||||
assert np.isnan(result).all()
|
||||
|
||||
result = empty + Series([], index=Index([]))
|
||||
assert len(result) == 0
|
||||
|
||||
# TODO: this returned NotImplemented earlier, what to do?
|
||||
# deltas = Series([timedelta(1)] * 5, index=np.arange(5))
|
||||
# sub_deltas = deltas[::2]
|
||||
# deltas5 = deltas * 5
|
||||
# deltas = deltas + sub_deltas
|
||||
|
||||
# float + int
|
||||
int_ts = self.ts.astype(int)[:-5]
|
||||
added = self.ts + int_ts
|
||||
expected = Series(
|
||||
self.ts.values[:-5] + int_ts.values, index=self.ts.index[:-5], name="ts"
|
||||
)
|
||||
tm.assert_series_equal(added[:-5], expected)
|
||||
|
||||
pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)]
|
||||
for op in ["add", "sub", "mul", "pow", "truediv", "floordiv"]:
|
||||
fv = 0
|
||||
lop = getattr(Series, op)
|
||||
lequiv = getattr(operator, op)
|
||||
rop = getattr(Series, "r" + op)
|
||||
# bind op at definition time...
|
||||
requiv = lambda x, y, op=op: getattr(operator, op)(y, x)
|
||||
pairings.append((lop, lequiv, fv))
|
||||
pairings.append((rop, requiv, fv))
|
||||
|
||||
@pytest.mark.parametrize("op, equiv_op, fv", pairings)
|
||||
def test_operators_combine(self, op, equiv_op, fv):
|
||||
def _check_fill(meth, op, a, b, fill_value=0):
|
||||
exp_index = a.index.union(b.index)
|
||||
a = a.reindex(exp_index)
|
||||
b = b.reindex(exp_index)
|
||||
|
||||
amask = isna(a)
|
||||
bmask = isna(b)
|
||||
|
||||
exp_values = []
|
||||
for i in range(len(exp_index)):
|
||||
with np.errstate(all="ignore"):
|
||||
if amask[i]:
|
||||
if bmask[i]:
|
||||
exp_values.append(np.nan)
|
||||
continue
|
||||
exp_values.append(op(fill_value, b[i]))
|
||||
elif bmask[i]:
|
||||
if amask[i]:
|
||||
exp_values.append(np.nan)
|
||||
continue
|
||||
exp_values.append(op(a[i], fill_value))
|
||||
else:
|
||||
exp_values.append(op(a[i], b[i]))
|
||||
|
||||
result = meth(a, b, fill_value=fill_value)
|
||||
expected = Series(exp_values, exp_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
a = Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=np.arange(5))
|
||||
b = Series([np.nan, 1, np.nan, 3, np.nan, 4.0], index=np.arange(6))
|
||||
|
||||
result = op(a, b)
|
||||
exp = equiv_op(a, b)
|
||||
assert_series_equal(result, exp)
|
||||
_check_fill(op, equiv_op, a, b, fill_value=fv)
|
||||
# should accept axis=0 or axis='rows'
|
||||
op(a, b, axis=0)
|
||||
|
||||
def test_operators_na_handling(self):
|
||||
from decimal import Decimal
|
||||
from datetime import date
|
||||
|
||||
s = Series(
|
||||
[Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)]
|
||||
)
|
||||
|
||||
result = s + s.shift(1)
|
||||
result2 = s.shift(1) + s
|
||||
assert isna(result[0])
|
||||
assert isna(result2[0])
|
||||
|
||||
def test_op_duplicate_index(self):
|
||||
# GH14227
|
||||
s1 = Series([1, 2], index=[1, 1])
|
||||
s2 = Series([10, 10], index=[1, 2])
|
||||
result = s1 + s2
|
||||
expected = pd.Series([11, 12, np.nan], index=[1, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_divmod(self):
|
||||
# GH25557
|
||||
a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"])
|
||||
b = Series([2, np.nan, 1, np.nan], index=["a", "b", "d", "e"])
|
||||
|
||||
result = a.divmod(b)
|
||||
expected = divmod(a, b)
|
||||
assert_series_equal(result[0], expected[0])
|
||||
assert_series_equal(result[1], expected[1])
|
||||
|
||||
result = a.rdivmod(b)
|
||||
expected = divmod(b, a)
|
||||
assert_series_equal(result[0], expected[0])
|
||||
assert_series_equal(result[1], expected[1])
|
||||
|
||||
|
||||
class TestSeriesUnaryOps:
|
||||
# __neg__, __pos__, __inv__
|
||||
|
||||
def test_neg(self):
|
||||
ser = tm.makeStringSeries()
|
||||
ser.name = "series"
|
||||
assert_series_equal(-ser, -1 * ser)
|
||||
|
||||
def test_invert(self):
|
||||
ser = tm.makeStringSeries()
|
||||
ser.name = "series"
|
||||
assert_series_equal(-(ser < 0), ~(ser < 0))
|
||||
@@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Period, Series, period_range
|
||||
from pandas.core.arrays import PeriodArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesPeriod:
|
||||
def setup_method(self, method):
|
||||
self.series = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
def test_auto_conversion(self):
|
||||
series = Series(list(period_range("2000-01-01", periods=10, freq="D")))
|
||||
assert series.dtype == "Period[D]"
|
||||
|
||||
series = pd.Series(
|
||||
[pd.Period("2011-01-01", freq="D"), pd.Period("2011-02-01", freq="D")]
|
||||
)
|
||||
assert series.dtype == "Period[D]"
|
||||
|
||||
def test_getitem(self):
|
||||
assert self.series[1] == pd.Period("2000-01-02", freq="D")
|
||||
|
||||
result = self.series[[2, 4]]
|
||||
exp = pd.Series(
|
||||
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
|
||||
index=[2, 4],
|
||||
dtype="Period[D]",
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == "Period[D]"
|
||||
|
||||
def test_isna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
|
||||
tm.assert_series_equal(s.isna(), Series([False, True]))
|
||||
tm.assert_series_equal(s.notna(), Series([True, False]))
|
||||
|
||||
def test_fillna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
|
||||
|
||||
res = s.fillna(pd.Period("2012-01", freq="M"))
|
||||
exp = Series([pd.Period("2011-01", freq="M"), pd.Period("2012-01", freq="M")])
|
||||
tm.assert_series_equal(res, exp)
|
||||
assert res.dtype == "Period[M]"
|
||||
|
||||
def test_dropna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
|
||||
tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")]))
|
||||
|
||||
def test_between(self):
|
||||
left, right = self.series[[2, 7]]
|
||||
result = self.series.between(left, right)
|
||||
expected = (self.series >= left) & (self.series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# NaT support
|
||||
|
||||
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
|
||||
def test_NaT_scalar(self):
|
||||
series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]")
|
||||
|
||||
val = series[3]
|
||||
assert pd.isna(val)
|
||||
|
||||
series[2] = val
|
||||
assert pd.isna(series[2])
|
||||
|
||||
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
|
||||
def test_NaT_cast(self):
|
||||
result = Series([np.nan]).astype("period[D]")
|
||||
expected = Series([pd.NaT])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_none(self):
|
||||
self.series[3] = None
|
||||
assert self.series[3] is pd.NaT
|
||||
|
||||
self.series[3:5] = None
|
||||
assert self.series[4] is pd.NaT
|
||||
|
||||
def test_set_nan(self):
|
||||
# Do we want to allow this?
|
||||
self.series[5] = np.nan
|
||||
assert self.series[5] is pd.NaT
|
||||
|
||||
self.series[5:7] = np.nan
|
||||
assert self.series[6] is pd.NaT
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
expected = self.series.astype("object")
|
||||
|
||||
df = DataFrame({"a": self.series, "b": np.random.randn(len(self.series))})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({"a": self.series, "b": ["foo"] * len(self.series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_align_series(self, join_type):
|
||||
rng = period_range("1/1/2000", "1/1/2010", freq="A")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts.align(ts[::2], join=join_type)
|
||||
|
||||
def test_truncate(self):
|
||||
# GH 17717
|
||||
idx1 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
|
||||
)
|
||||
series1 = pd.Series([1, 2, 3], index=idx1)
|
||||
result1 = series1.truncate(after="2017-09-02")
|
||||
|
||||
expected_idx1 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
|
||||
)
|
||||
tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
|
||||
|
||||
idx2 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
|
||||
)
|
||||
series2 = pd.Series([1, 2, 3], index=idx2)
|
||||
result2 = series2.sort_index().truncate(after="2017-09-02")
|
||||
|
||||
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
|
||||
tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_vals",
|
||||
[
|
||||
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
|
||||
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="H"),
|
||||
Period("2016-01-01 01:00:00", freq="H"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="M"),
|
||||
Period("2016-01-01 00:01:00", freq="M"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="S"),
|
||||
Period("2016-01-01 00:00:01", freq="S"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_end_time_timevalues(self, input_vals):
|
||||
# GH 17157
|
||||
# Check that the time part of the Period is adjusted by end_time
|
||||
# when using the dt accessor on a Series
|
||||
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
|
||||
|
||||
s = Series(input_vals)
|
||||
result = s.dt.end_time
|
||||
expected = s.apply(lambda x: x.end_time)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
|
||||
def test_to_period(self, input_vals):
|
||||
# GH 21205
|
||||
expected = Series([input_vals], dtype="Period[D]")
|
||||
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,212 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesQuantile(TestData):
|
||||
def test_quantile(self):
|
||||
|
||||
q = self.ts.quantile(0.1)
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
q = self.ts.quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# object dtype
|
||||
q = Series(self.ts, dtype=object).quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# datetime64[ns] dtype
|
||||
dts = self.ts.index.to_series()
|
||||
q = dts.quantile(0.2)
|
||||
assert q == Timestamp("2000-01-10 19:12:00")
|
||||
|
||||
# timedelta64[ns] dtype
|
||||
tds = dts.diff()
|
||||
q = tds.quantile(0.25)
|
||||
assert q == pd.to_timedelta("24:00:00")
|
||||
|
||||
# GH7661
|
||||
result = Series([np.timedelta64("NaT")]).sum()
|
||||
assert result == pd.Timedelta(0)
|
||||
|
||||
msg = "percentiles should all be in the interval \\[0, 1\\]"
|
||||
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.ts.quantile(invalid)
|
||||
|
||||
def test_quantile_multi(self):
|
||||
|
||||
qs = [0.1, 0.9]
|
||||
result = self.ts.quantile(qs)
|
||||
expected = pd.Series(
|
||||
[np.percentile(self.ts.dropna(), 10), np.percentile(self.ts.dropna(), 90)],
|
||||
index=qs,
|
||||
name=self.ts.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dts = self.ts.index.to_series()
|
||||
dts.name = "xxx"
|
||||
result = dts.quantile((0.2, 0.2))
|
||||
expected = Series(
|
||||
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
|
||||
index=[0.2, 0.2],
|
||||
name="xxx",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = self.ts.quantile([])
|
||||
expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_interpolation(self):
|
||||
# see gh-10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = self.ts.quantile(0.1, interpolation="linear")
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
q1 = self.ts.quantile(0.1)
|
||||
assert q1 == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
# test with and without interpolation keyword
|
||||
assert q == q1
|
||||
|
||||
def test_quantile_interpolation_dtype(self):
|
||||
# GH #10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="lower")
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="higher")
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
def test_quantile_nan(self):
|
||||
|
||||
# GH 13098
|
||||
s = pd.Series([1, 2, 3, 4, np.nan])
|
||||
result = s.quantile(0.5)
|
||||
expected = 2.5
|
||||
assert result == expected
|
||||
|
||||
# all nan/empty
|
||||
cases = [Series([]), Series([np.nan, np.nan])]
|
||||
|
||||
for s in cases:
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
|
||||
|
||||
res = s.quantile([0.2, 0.3])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan, np.nan], index=[0.2, 0.3]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
[
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.Timestamp("2011-01-02"),
|
||||
pd.Timestamp("2011-01-03"),
|
||||
],
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-03", tz="US/Eastern"),
|
||||
],
|
||||
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
|
||||
# NaT
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.Timestamp("2011-01-02"),
|
||||
pd.Timestamp("2011-01-03"),
|
||||
pd.NaT,
|
||||
],
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-03", tz="US/Eastern"),
|
||||
pd.NaT,
|
||||
],
|
||||
[
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
pd.Timedelta("3 days"),
|
||||
pd.NaT,
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_quantile_box(self, case):
|
||||
s = pd.Series(case, name="XXX")
|
||||
res = s.quantile(0.5)
|
||||
assert res == case[1]
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = pd.Series([case[1]], index=[0.5], name="XXX")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_datetime_timedelta_quantiles(self):
|
||||
# covers #9694
|
||||
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
|
||||
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
|
||||
|
||||
def test_quantile_nat(self):
|
||||
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype",
|
||||
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
|
||||
)
|
||||
def test_quantile_sparse(self, values, dtype):
|
||||
ser = pd.Series(values, dtype=dtype)
|
||||
result = ser.quantile([0.5])
|
||||
expected = pd.Series(np.asarray(ser)).quantile([0.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_empty(self):
|
||||
|
||||
# floats
|
||||
s = Series([], dtype="float64")
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# int
|
||||
s = Series([], dtype="int64")
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# datetime
|
||||
s = Series([], dtype="datetime64[ns]")
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([pd.NaT], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
@@ -0,0 +1,567 @@
|
||||
from itertools import chain, product
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
from pandas._libs.algos import Infinity, NegInfinity
|
||||
from pandas._libs.tslib import iNaT
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import NaT, Series, Timestamp, date_range
|
||||
from pandas.api.types import CategoricalDtype
|
||||
from pandas.tests.series.common import TestData
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesRank(TestData):
|
||||
s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3])
|
||||
|
||||
results = {
|
||||
"average": np.array([1.5, 5.5, 7.0, 3.5, nan, 3.5, 1.5, 8.0, nan, 5.5]),
|
||||
"min": np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]),
|
||||
"max": np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]),
|
||||
"first": np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]),
|
||||
"dense": np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]),
|
||||
}
|
||||
|
||||
def test_rank(self):
|
||||
pytest.importorskip("scipy.stats.special")
|
||||
rankdata = pytest.importorskip("scipy.stats.rankdata")
|
||||
|
||||
self.ts[::2] = np.nan
|
||||
self.ts[:10][::3] = 4.0
|
||||
|
||||
ranks = self.ts.rank()
|
||||
oranks = self.ts.astype("O").rank()
|
||||
|
||||
assert_series_equal(ranks, oranks)
|
||||
|
||||
mask = np.isnan(self.ts)
|
||||
filled = self.ts.fillna(np.inf)
|
||||
|
||||
# rankdata returns a ndarray
|
||||
exp = Series(rankdata(filled), index=filled.index, name="ts")
|
||||
exp[mask] = np.nan
|
||||
|
||||
tm.assert_series_equal(ranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5).repeat(2))
|
||||
|
||||
iranks = iseries.rank()
|
||||
exp = iseries.astype(float).rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
exp = iseries / 5.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(1, 100))
|
||||
exp = Series(np.repeat(0.505, 100))
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries[1] = np.nan
|
||||
exp = Series(np.repeat(50.0 / 99.0, 100))
|
||||
exp[1] = np.nan
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(np.nan, 100))
|
||||
exp = iseries.copy()
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
rng = date_range("1/1/1990", periods=5)
|
||||
iseries = Series(np.arange(5), rng) + 1
|
||||
iseries.iloc[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
|
||||
exp = Series([2, 1, 3, 5, 4, 6.0])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
# GH 5968
|
||||
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
|
||||
exp = Series([3, 2, 1, np.nan])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
values = np.array(
|
||||
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
|
||||
dtype="float64",
|
||||
)
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(random_order + 1.0, dtype="float64")
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_categorical(self):
|
||||
# GH issue #15420 rank incorrectly orders ordered categories
|
||||
|
||||
# Test ascending/descending ranking for ordered categoricals
|
||||
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
|
||||
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
|
||||
ordered = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth"]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
assert_series_equal(ordered.rank(), exp)
|
||||
assert_series_equal(ordered.rank(ascending=False), exp_desc)
|
||||
|
||||
# Unordered categoricals should be ranked as objects
|
||||
unordered = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth"]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
|
||||
res = unordered.rank()
|
||||
assert_series_equal(res, exp_unordered)
|
||||
|
||||
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
|
||||
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
|
||||
)
|
||||
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
|
||||
res1 = unordered1.rank()
|
||||
assert_series_equal(res1, exp_unordered1)
|
||||
|
||||
# Test na_option for rank data
|
||||
na_ser = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth", np.NaN]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
|
||||
True,
|
||||
)
|
||||
)
|
||||
|
||||
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
|
||||
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
|
||||
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option="top"), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
|
||||
|
||||
# Test na_option for rank data with ascending False
|
||||
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
|
||||
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
|
||||
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option="bottom", ascending=False), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
|
||||
|
||||
# Test invalid values for na_option
|
||||
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option="bad", ascending=False)
|
||||
|
||||
# invalid type
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option=True, ascending=False)
|
||||
|
||||
# Test with pct=True
|
||||
na_ser = Series(["first", "second", "third", "fourth", np.NaN]).astype(
|
||||
CategoricalDtype(["first", "second", "third", "fourth"], True)
|
||||
)
|
||||
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
|
||||
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
|
||||
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
|
||||
|
||||
def test_rank_signature(self):
|
||||
s = Series([0, 1])
|
||||
s.rank(method="average")
|
||||
msg = (
|
||||
"No axis named average for object type"
|
||||
" <class 'pandas.core.series.Series'>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.rank("average")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"contents,dtype",
|
||||
[
|
||||
(
|
||||
[
|
||||
-np.inf,
|
||||
-50,
|
||||
-1,
|
||||
-1e-20,
|
||||
-1e-25,
|
||||
-1e-50,
|
||||
0,
|
||||
1e-40,
|
||||
1e-20,
|
||||
1e-10,
|
||||
2,
|
||||
40,
|
||||
np.inf,
|
||||
],
|
||||
"float64",
|
||||
),
|
||||
(
|
||||
[
|
||||
-np.inf,
|
||||
-50,
|
||||
-1,
|
||||
-1e-20,
|
||||
-1e-25,
|
||||
-1e-45,
|
||||
0,
|
||||
1e-40,
|
||||
1e-20,
|
||||
1e-10,
|
||||
2,
|
||||
40,
|
||||
np.inf,
|
||||
],
|
||||
"float32",
|
||||
),
|
||||
([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"),
|
||||
pytest.param(
|
||||
[
|
||||
np.iinfo(np.int64).min,
|
||||
-100,
|
||||
0,
|
||||
1,
|
||||
9999,
|
||||
100000,
|
||||
1e10,
|
||||
np.iinfo(np.int64).max,
|
||||
],
|
||||
"int64",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="iNaT is equivalent to minimum value of dtype"
|
||||
"int64 pending issue GH#16674"
|
||||
),
|
||||
),
|
||||
([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"),
|
||||
],
|
||||
)
|
||||
def test_rank_inf(self, contents, dtype):
|
||||
dtype_na_map = {
|
||||
"float64": np.nan,
|
||||
"float32": np.nan,
|
||||
"int64": iNaT,
|
||||
"object": None,
|
||||
}
|
||||
# Insert nans at random positions if underlying dtype has missing
|
||||
# value. Then adjust the expected order by adding nans accordingly
|
||||
# This is for testing whether rank calculation is affected
|
||||
# when values are interwined with nan values.
|
||||
values = np.array(contents, dtype=dtype)
|
||||
exp_order = np.array(range(len(values)), dtype="float64") + 1.0
|
||||
if dtype in dtype_na_map:
|
||||
na_value = dtype_na_map[dtype]
|
||||
nan_indices = np.random.choice(range(len(values)), 5)
|
||||
values = np.insert(values, nan_indices, na_value)
|
||||
exp_order = np.insert(exp_order, nan_indices, np.nan)
|
||||
# shuffle the testing array and expected results in the same way
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(exp_order[random_order], dtype="float64")
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_tie_methods(self):
|
||||
s = self.s
|
||||
|
||||
def _check(s, expected, method="average"):
|
||||
result = s.rank(method=method)
|
||||
tm.assert_series_equal(result, Series(expected))
|
||||
|
||||
dtypes = [None, object]
|
||||
disabled = {(object, "first")}
|
||||
results = self.results
|
||||
|
||||
for method, dtype in product(results, dtypes):
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
series = s if dtype is None else s.astype(dtype)
|
||||
_check(series, results[method], method=method)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
|
||||
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
|
||||
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
|
||||
dtypes = [
|
||||
("object", None, Infinity(), NegInfinity()),
|
||||
("float64", np.nan, np.inf, -np.inf),
|
||||
]
|
||||
chunk = 3
|
||||
disabled = {("object", "first")}
|
||||
|
||||
def _check(s, method, na_option, ascending):
|
||||
exp_ranks = {
|
||||
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
|
||||
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
|
||||
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
|
||||
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
|
||||
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
|
||||
}
|
||||
ranks = exp_ranks[method]
|
||||
if na_option == "top":
|
||||
order = [ranks[1], ranks[0], ranks[2]]
|
||||
elif na_option == "bottom":
|
||||
order = [ranks[0], ranks[2], ranks[1]]
|
||||
else:
|
||||
order = [ranks[0], [np.nan] * chunk, ranks[1]]
|
||||
expected = order if ascending else order[::-1]
|
||||
expected = list(chain.from_iterable(expected))
|
||||
result = s.rank(method=method, na_option=na_option, ascending=ascending)
|
||||
tm.assert_series_equal(result, Series(expected, dtype="float64"))
|
||||
|
||||
for dtype, na_value, pos_inf, neg_inf in dtypes:
|
||||
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
|
||||
iseries = Series(in_arr, dtype=dtype)
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
_check(iseries, method, na_option, ascending)
|
||||
|
||||
def test_rank_desc_mix_nans_infs(self):
|
||||
# GH 19538
|
||||
# check descending ranking when mix nans and infs
|
||||
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
|
||||
result = iseries.rank(ascending=False)
|
||||
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_rank_methods_series(self):
|
||||
pytest.importorskip("scipy.stats.special")
|
||||
rankdata = pytest.importorskip("scipy.stats.rankdata")
|
||||
|
||||
xs = np.random.randn(9)
|
||||
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
|
||||
np.random.shuffle(xs)
|
||||
|
||||
index = [chr(ord("a") + i) for i in range(len(xs))]
|
||||
|
||||
for vals in [xs, xs + 1e6, xs * 1e-6]:
|
||||
ts = Series(vals, index=index)
|
||||
|
||||
for m in ["average", "min", "max", "first", "dense"]:
|
||||
result = ts.rank(method=m)
|
||||
sprank = rankdata(vals, m if m != "first" else "ordinal")
|
||||
expected = Series(sprank, index=index).astype("float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_dense_method(self):
|
||||
dtypes = ["O", "f8", "i8"]
|
||||
in_out = [
|
||||
([1], [1]),
|
||||
([2], [1]),
|
||||
([0], [1]),
|
||||
([2, 2], [1, 1]),
|
||||
([1, 2, 3], [1, 2, 3]),
|
||||
([4, 2, 1], [3, 2, 1]),
|
||||
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
|
||||
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
|
||||
]
|
||||
|
||||
for ser, exp in in_out:
|
||||
for dtype in dtypes:
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="dense")
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_descending(self):
|
||||
dtypes = ["O", "f8", "i8"]
|
||||
|
||||
for dtype, method in product(dtypes, self.results):
|
||||
if "i" in dtype:
|
||||
s = self.s.dropna()
|
||||
else:
|
||||
s = self.s.astype(dtype)
|
||||
|
||||
res = s.rank(ascending=False)
|
||||
expected = (s.max() - s).rank()
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
if method == "first" and dtype == "O":
|
||||
continue
|
||||
|
||||
expected = (s.max() - s).rank(method=method)
|
||||
res2 = s.rank(method=method, ascending=False)
|
||||
assert_series_equal(res2, expected)
|
||||
|
||||
def test_rank_int(self):
|
||||
s = self.s.dropna().astype("i8")
|
||||
|
||||
for method, res in self.results.items():
|
||||
result = s.rank(method=method)
|
||||
expected = Series(res).dropna()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_object_bug(self):
|
||||
# GH 13445
|
||||
|
||||
# smoke tests
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=True)
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=False)
|
||||
|
||||
def test_rank_modify_inplace(self):
|
||||
# GH 18521
|
||||
# Check rank does not mutate series
|
||||
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
|
||||
expected = s.copy()
|
||||
|
||||
s.rank()
|
||||
result = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# GH15630, pct should be on 100% basis when method='dense'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0, 1.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_dense_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="dense", pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0 / 2, 1.0 / 2]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_min_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="min", pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0, 1.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_max_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="max", pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.5 / 2, 1.5 / 2]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_average_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="average", pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["f8", "i8"])
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0 / 2, 2.0 / 2.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_first_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="first", pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.single
|
||||
@pytest.mark.high_memory
|
||||
def test_pct_max_many_rows():
|
||||
# GH 18271
|
||||
s = Series(np.arange(2 ** 24 + 1))
|
||||
result = s.rank(pct=True).max()
|
||||
assert result == 1
|
||||
@@ -0,0 +1,309 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesReplace(TestData):
|
||||
def test_replace(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.random.randn(N))
|
||||
ser[0:4] = np.nan
|
||||
ser[6:10] = 0
|
||||
|
||||
# replace list with a single value
|
||||
ser.replace([np.nan], -1, inplace=True)
|
||||
|
||||
exp = ser.fillna(-1)
|
||||
tm.assert_series_equal(ser, exp)
|
||||
|
||||
rs = ser.replace(0.0, np.nan)
|
||||
ser[ser == 0.0] = np.nan
|
||||
tm.assert_series_equal(rs, ser)
|
||||
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = "foo"
|
||||
ser[20:30] = "bar"
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, "foo", "bar"], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
|
||||
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
ser = pd.Series([np.nan, 0, np.inf])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
filled = ser.copy()
|
||||
filled[4] = 0
|
||||
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
|
||||
|
||||
ser = pd.Series(self.ts.index)
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
# malformed
|
||||
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.replace([1, 2, 3], [np.nan, 0])
|
||||
|
||||
# make sure that we aren't just masking a TypeError because bools don't
|
||||
# implement indexing
|
||||
with pytest.raises(TypeError, match="Cannot compare types .+"):
|
||||
ser.replace([1, 2], [np.nan, 0])
|
||||
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
|
||||
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
|
||||
|
||||
def test_replace_gh5319(self):
|
||||
# API change from 0.12?
|
||||
# GH 5319
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace(np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# GH 5797
|
||||
ser = pd.Series(pd.date_range("20130101", periods=5))
|
||||
expected = ser.copy()
|
||||
expected.loc[2] = pd.Timestamp("20120101")
|
||||
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 11792: Test with replacing NaT in a list with tz data
|
||||
ts = pd.Timestamp("2015/01/01", tz="UTC")
|
||||
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
|
||||
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
|
||||
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_single_list(self):
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([1, 2, 3])
|
||||
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
s = ser.copy()
|
||||
s.replace([1, 2, 3], inplace=True)
|
||||
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
# make sure things don't get corrupted when fillna call fails
|
||||
s = ser.copy()
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
|
||||
r" \(bfill\)\. Got crash_cymbal"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
|
||||
tm.assert_series_equal(s, ser)
|
||||
|
||||
def test_replace_with_empty_list(self):
|
||||
# GH 21977
|
||||
s = pd.Series([[1], [2, 3], [], np.nan, [4]])
|
||||
expected = s
|
||||
result = s.replace([], np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 19266
|
||||
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
||||
s.replace({np.nan: []})
|
||||
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
||||
s.replace({np.nan: ["dummy", "alt"]})
|
||||
|
||||
def test_replace_mixed_types(self):
|
||||
s = pd.Series(np.arange(5), dtype="int64")
|
||||
|
||||
def check_replace(to_rep, val, expected):
|
||||
sc = s.copy()
|
||||
r = s.replace(to_rep, val)
|
||||
sc.replace(to_rep, val, inplace=True)
|
||||
tm.assert_series_equal(expected, r)
|
||||
tm.assert_series_equal(expected, sc)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0])
|
||||
tr, v = [3], [3.0]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0, 1, 2, 3.5, 4])
|
||||
tr, v = [3], [3.5]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, "a"])
|
||||
tr, v = [3, 4], [3.5, "a"]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# again casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
|
||||
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
|
||||
tr, v = [3, 4], [3.5, True]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# test an object with dates + floats + integers + strings
|
||||
dr = (
|
||||
pd.date_range("1/1/2001", "1/10/2001", freq="D")
|
||||
.to_series()
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
|
||||
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_bool_with_string_no_op(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace("fun", "in-the-sun")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_replace_bool_with_string(self):
|
||||
# nonexistent elements
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, "2u")
|
||||
expected = pd.Series(["2u", False, "2u"])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_bool_with_bool(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, False)
|
||||
expected = pd.Series([False] * len(s))
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_dict_with_bool_keys(self):
|
||||
s = pd.Series([True, False, True])
|
||||
with pytest.raises(TypeError, match="Cannot compare types .+"):
|
||||
s.replace({"asdf": "asdb", True: "yes"})
|
||||
|
||||
def test_replace2(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = "foo"
|
||||
ser[20:30] = "bar"
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, "foo", "bar"], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
def test_replace_with_empty_dictlike(self):
|
||||
# GH 15289
|
||||
s = pd.Series(list("abcd"))
|
||||
tm.assert_series_equal(s, s.replace(dict()))
|
||||
tm.assert_series_equal(s, s.replace(pd.Series([])))
|
||||
|
||||
def test_replace_string_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace("2", np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_replacer_equals_replacement(self):
|
||||
# GH 20656
|
||||
# make sure all replacers are matching against original values
|
||||
s = pd.Series(["a", "b"])
|
||||
expected = pd.Series(["b", "a"])
|
||||
result = s.replace({"a": "b", "b": "a"})
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_unicode_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace("2", np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_mixed_types_with_string(self):
|
||||
# Testing mixed
|
||||
s = pd.Series([1, 2, 3, "4", 4, 5])
|
||||
result = s.replace([2, "4"], np.nan)
|
||||
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"categorical, numeric",
|
||||
[
|
||||
(pd.Categorical("A", categories=["A", "B"]), [1]),
|
||||
(pd.Categorical(("A",), categories=["A", "B"]), [1]),
|
||||
(pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]),
|
||||
],
|
||||
)
|
||||
def test_replace_categorical(self, categorical, numeric):
|
||||
# GH 24971
|
||||
# Do not check if dtypes are equal due to a known issue that
|
||||
# Categorical.replace sometimes coerces to object (GH 23305)
|
||||
s = pd.Series(categorical)
|
||||
result = s.replace({"A": 1, "B": 2})
|
||||
expected = pd.Series(numeric)
|
||||
tm.assert_series_equal(expected, result, check_dtype=False)
|
||||
|
||||
def test_replace_with_no_overflowerror(self):
|
||||
# GH 25616
|
||||
# casts to object without Exception from OverflowError
|
||||
s = pd.Series([0, 1, 2, 3, 4])
|
||||
result = s.replace([3], ["100000000000000000000"])
|
||||
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
|
||||
result = s.replace(["100000000000000000000"], [1])
|
||||
expected = pd.Series([0, 1, "100000000000000000001"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,492 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
option_context,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.core.base import StringMixin
|
||||
from pandas.core.index import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesRepr(TestData):
|
||||
def test_multilevel_name_print(self):
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
s = Series(range(len(index)), index=index, name="sth")
|
||||
expected = [
|
||||
"first second",
|
||||
"foo one 0",
|
||||
" two 1",
|
||||
" three 2",
|
||||
"bar one 3",
|
||||
" two 4",
|
||||
"baz two 5",
|
||||
" three 6",
|
||||
"qux one 7",
|
||||
" two 8",
|
||||
" three 9",
|
||||
"Name: sth, dtype: int64",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
assert repr(s) == expected
|
||||
|
||||
def test_name_printing(self):
|
||||
# Test small Series.
|
||||
s = Series([0, 1, 2])
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
# Test big Series (diff code path).
|
||||
s = Series(range(1000))
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
s = Series(index=date_range("20010101", "20020101"), name="test")
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
def test_repr(self):
|
||||
str(self.ts)
|
||||
str(self.series)
|
||||
str(self.series.astype(int))
|
||||
str(self.objSeries)
|
||||
|
||||
str(Series(tm.randn(1000), index=np.arange(1000)))
|
||||
str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1)))
|
||||
|
||||
# empty
|
||||
str(self.empty)
|
||||
|
||||
# with NaNs
|
||||
self.series[5:7] = np.NaN
|
||||
str(self.series)
|
||||
|
||||
# with Nones
|
||||
ots = self.ts.astype("O")
|
||||
ots[::2] = None
|
||||
repr(ots)
|
||||
|
||||
# various names
|
||||
for name in [
|
||||
"",
|
||||
1,
|
||||
1.2,
|
||||
"foo",
|
||||
"\u03B1\u03B2\u03B3",
|
||||
"loooooooooooooooooooooooooooooooooooooooooooooooooooong",
|
||||
("foo", "bar", "baz"),
|
||||
(1, 2),
|
||||
("foo", 1, 2.3),
|
||||
("\u03B1", "\u03B2", "\u03B3"),
|
||||
("\u03B1", "bar"),
|
||||
]:
|
||||
self.series.name = name
|
||||
repr(self.series)
|
||||
|
||||
biggie = Series(
|
||||
tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz")
|
||||
)
|
||||
repr(biggie)
|
||||
|
||||
# 0 as name
|
||||
ser = Series(np.random.randn(100), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
# tidy repr
|
||||
ser = Series(np.random.randn(1001), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
|
||||
assert "\t" not in repr(ser)
|
||||
assert "\r" not in repr(ser)
|
||||
assert "a\n" not in repr(ser)
|
||||
|
||||
# with empty series (#4651)
|
||||
s = Series([], dtype=np.int64, name="foo")
|
||||
assert repr(s) == "Series([], Name: foo, dtype: int64)"
|
||||
|
||||
s = Series([], dtype=np.int64, name=None)
|
||||
assert repr(s) == "Series([], dtype: int64)"
|
||||
|
||||
def test_tidy_repr(self):
|
||||
a = Series(["\u05d0"] * 1000)
|
||||
a.name = "title1"
|
||||
repr(a) # should not raise exception
|
||||
|
||||
def test_repr_bool_fails(self, capsys):
|
||||
s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
|
||||
|
||||
# It works (with no Cython exception barf)!
|
||||
repr(s)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.err == ""
|
||||
|
||||
def test_repr_name_iterable_indexable(self):
|
||||
s = Series([1, 2, 3], name=np.int64(3))
|
||||
|
||||
# it works!
|
||||
repr(s)
|
||||
|
||||
s.name = ("\u05d0",) * 2
|
||||
repr(s)
|
||||
|
||||
def test_repr_should_return_str(self):
|
||||
# https://docs.python.org/3/reference/datamodel.html#object.__repr__
|
||||
# ...The return value must be a string object.
|
||||
|
||||
# (str on py2.x, str (unicode) on py3)
|
||||
|
||||
data = [8, 5, 3, 5]
|
||||
index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"]
|
||||
df = Series(data, index=index1)
|
||||
assert type(df.__repr__() == str) # both py2 / 3
|
||||
|
||||
def test_repr_max_rows(self):
|
||||
# GH 6863
|
||||
with pd.option_context("max_rows", None):
|
||||
str(Series(range(1001))) # should not raise exception
|
||||
|
||||
def test_unicode_string_with_unicode(self):
|
||||
df = Series(["\u05d0"], name="\u05d1")
|
||||
str(df)
|
||||
|
||||
def test_str_to_bytes_raises(self):
|
||||
# GH 26447
|
||||
df = Series(["abc"], name="abc")
|
||||
msg = "^'str' object cannot be interpreted as an integer$"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
bytes(df)
|
||||
|
||||
def test_timeseries_repr_object_dtype(self):
|
||||
index = Index(
|
||||
[datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
|
||||
)
|
||||
ts = Series(np.random.randn(len(index)), index)
|
||||
repr(ts)
|
||||
|
||||
ts = tm.makeTimeSeries(1000)
|
||||
assert repr(ts).splitlines()[-1].startswith("Freq:")
|
||||
|
||||
ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
|
||||
repr(ts2).splitlines()[-1]
|
||||
|
||||
def test_latex_repr(self):
|
||||
result = r"""\begin{tabular}{ll}
|
||||
\toprule
|
||||
{} & 0 \\
|
||||
\midrule
|
||||
0 & $\alpha$ \\
|
||||
1 & b \\
|
||||
2 & c \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
"""
|
||||
with option_context("display.latex.escape", False, "display.latex.repr", True):
|
||||
s = Series([r"$\alpha$", "b", "c"])
|
||||
assert result == s._repr_latex_()
|
||||
|
||||
assert s._repr_latex_() is None
|
||||
|
||||
def test_index_repr_in_frame_with_nan(self):
|
||||
# see gh-25061
|
||||
i = Index([1, np.nan])
|
||||
s = Series([1, 2], index=i)
|
||||
exp = """1.0 1\nNaN 2\ndtype: int64"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
|
||||
class TestCategoricalRepr:
|
||||
def test_categorical_repr_unicode(self):
|
||||
# see gh-21002
|
||||
|
||||
class County(StringMixin):
|
||||
name = "San Sebastián"
|
||||
state = "PR"
|
||||
|
||||
def __str__(self):
|
||||
return self.name + ", " + self.state
|
||||
|
||||
cat = pd.Categorical([County() for _ in range(61)])
|
||||
idx = pd.Index(cat)
|
||||
ser = idx.to_series()
|
||||
|
||||
repr(ser)
|
||||
str(ser)
|
||||
|
||||
def test_categorical_repr(self):
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = (
|
||||
"0 1\n1 2\n2 3\n3 4\n"
|
||||
+ "dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
|
||||
)
|
||||
|
||||
assert exp == a.__str__()
|
||||
|
||||
a = Series(Categorical(["a", "b"] * 25))
|
||||
exp = (
|
||||
"0 a\n1 b\n"
|
||||
+ " ..\n"
|
||||
+ "48 a\n49 b\n"
|
||||
+ "Length: 50, dtype: category\nCategories (2, object): [a, b]"
|
||||
)
|
||||
with option_context("display.max_rows", 5):
|
||||
assert exp == repr(a)
|
||||
|
||||
levs = list("abcdefghijklmnopqrstuvwxyz")
|
||||
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
|
||||
exp = (
|
||||
"0 a\n1 b\n" + "dtype: category\n"
|
||||
"Categories (26, object): [a < b < c < d ... w < x < y < z]"
|
||||
)
|
||||
assert exp == a.__str__()
|
||||
|
||||
def test_categorical_series_repr(self):
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10)))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_ordered(self):
|
||||
s = Series(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10), ordered=True))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
|
||||
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
|
||||
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
|
||||
8 days 01:00:00, 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
|
||||
8 days 01:00:00 < 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
@@ -0,0 +1,267 @@
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, DataFrame, IntervalIndex, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesSorting(TestData):
|
||||
def test_sort_values(self):
|
||||
|
||||
# check indexes are reordered corresponding with the values
|
||||
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
|
||||
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
|
||||
result = ser.sort_values()
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
ts = self.ts.copy()
|
||||
ts[:5] = np.NaN
|
||||
vals = ts.values
|
||||
|
||||
result = ts.sort_values()
|
||||
assert np.isnan(result[-5:]).all()
|
||||
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
|
||||
|
||||
# na_position
|
||||
result = ts.sort_values(na_position="first")
|
||||
assert np.isnan(result[:5]).all()
|
||||
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
|
||||
|
||||
# something object-type
|
||||
ser = Series(["A", "B"], [1, 2])
|
||||
# no failure
|
||||
ser.sort_values()
|
||||
|
||||
# ascending=False
|
||||
ordered = ts.sort_values(ascending=False)
|
||||
expected = np.sort(ts.dropna().values)[::-1]
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
ordered = ts.sort_values(ascending=False, na_position="first")
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
|
||||
# ascending=[False] should behave the same as ascending=False
|
||||
ordered = ts.sort_values(ascending=[False])
|
||||
expected = ts.sort_values(ascending=False)
|
||||
assert_series_equal(expected, ordered)
|
||||
ordered = ts.sort_values(ascending=[False], na_position="first")
|
||||
expected = ts.sort_values(ascending=False, na_position="first")
|
||||
assert_series_equal(expected, ordered)
|
||||
|
||||
msg = "ascending must be boolean"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=None)
|
||||
msg = r"Length of ascending \(0\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[])
|
||||
msg = r"Length of ascending \(3\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[1, 2, 3])
|
||||
msg = r"Length of ascending \(2\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[False, False])
|
||||
msg = "ascending must be boolean"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending="foobar")
|
||||
|
||||
# inplace=True
|
||||
ts = self.ts.copy()
|
||||
ts.sort_values(ascending=False, inplace=True)
|
||||
tm.assert_series_equal(ts, self.ts.sort_values(ascending=False))
|
||||
tm.assert_index_equal(ts.index, self.ts.sort_values(ascending=False).index)
|
||||
|
||||
# GH 5856/5853
|
||||
# Series.sort_values operating on a view
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0]
|
||||
|
||||
msg = (
|
||||
"This Series is a view of some other array, to sort in-place"
|
||||
" you must create a copy"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.sort_values(inplace=True)
|
||||
|
||||
def test_sort_index(self):
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
random_order = self.ts.reindex(rindex)
|
||||
sorted_series = random_order.sort_index()
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# descending
|
||||
sorted_series = random_order.sort_index(ascending=False)
|
||||
assert_series_equal(sorted_series, self.ts.reindex(self.ts.index[::-1]))
|
||||
|
||||
# compat on level
|
||||
sorted_series = random_order.sort_index(level=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# compat on axis
|
||||
sorted_series = random_order.sort_index(axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
msg = "No axis named 1 for object type <class 'pandas.core.series.Series'>"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_values(axis=1)
|
||||
|
||||
sorted_series = random_order.sort_index(level=0, axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_index(level=0, axis=1)
|
||||
|
||||
def test_sort_index_inplace(self):
|
||||
|
||||
# For #11402
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
# descending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=False, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1]))
|
||||
|
||||
# ascending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=True, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts)
|
||||
|
||||
@pytest.mark.parametrize("level", ["A", 0]) # GH 21052
|
||||
def test_sort_index_multiindex(self, level):
|
||||
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
# implicit sort_remaining=True
|
||||
res = s.sort_index(level=level)
|
||||
assert_series_equal(backwards, res)
|
||||
|
||||
# GH13496
|
||||
# sort has no effect without remaining lvls
|
||||
res = s.sort_index(level=level, sort_remaining=False)
|
||||
assert_series_equal(s, res)
|
||||
|
||||
def test_sort_index_kind(self):
|
||||
# GH #14444 & #13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3])
|
||||
expected_series = Series(index=[1, 2, 3, 3, 4])
|
||||
|
||||
index_sorted_series = series.sort_index(kind="mergesort")
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind="quicksort")
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind="heapsort")
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_na_position(self):
|
||||
series = Series(index=[3, 2, 1, 4, 3, np.nan])
|
||||
|
||||
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4])
|
||||
index_sorted_series = series.sort_index(na_position="first")
|
||||
assert_series_equal(expected_series_first, index_sorted_series)
|
||||
|
||||
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan])
|
||||
index_sorted_series = series.sort_index(na_position="last")
|
||||
assert_series_equal(expected_series_last, index_sorted_series)
|
||||
|
||||
def test_sort_index_intervals(self):
|
||||
s = Series(
|
||||
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
|
||||
)
|
||||
|
||||
result = s.sort_index()
|
||||
expected = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(ascending=False)
|
||||
expected = Series(
|
||||
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_values_categorical(self):
|
||||
|
||||
c = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat = Series(c.copy())
|
||||
|
||||
# sort in the categories order
|
||||
expected = Series(
|
||||
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
|
||||
)
|
||||
result = cat.sort_values()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
cat = Series(
|
||||
Categorical(
|
||||
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
raw_cat1 = Categorical(
|
||||
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
|
||||
)
|
||||
raw_cat2 = Categorical(
|
||||
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
s = ["a", "b", "c", "d"]
|
||||
df = DataFrame(
|
||||
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
|
||||
)
|
||||
|
||||
# Cats must be sorted in a dataframe
|
||||
res = df.sort_values(by=["string"], ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
|
||||
assert res["sort"].dtype == "category"
|
||||
|
||||
res = df.sort_values(by=["sort"], ascending=False)
|
||||
exp = df.sort_values(by=["string"], ascending=True)
|
||||
tm.assert_series_equal(res["values"], exp["values"])
|
||||
assert res["sort"].dtype == "category"
|
||||
assert res["unsort"].dtype == "category"
|
||||
|
||||
# unordered cat, but we allow this
|
||||
df.sort_values(by=["unsort"], ascending=False)
|
||||
|
||||
# multi-columns sort
|
||||
# GH 7848
|
||||
df = DataFrame(
|
||||
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
|
||||
)
|
||||
df["grade"] = Categorical(df["raw_grade"], ordered=True)
|
||||
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
|
||||
|
||||
# sorts 'grade' according to the order of the categories
|
||||
result = df.sort_values(by=["grade"])
|
||||
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi
|
||||
result = df.sort_values(by=["grade", "id"])
|
||||
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,104 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesSubclassing:
|
||||
def test_indexing_sliced(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"))
|
||||
res = s.loc[["a", "b"]]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list("ab"))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.iloc[[2, 3]]
|
||||
exp = tm.SubclassedSeries([3, 4], index=list("cd"))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.loc[["a", "b"]]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list("ab"))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_to_frame(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"), name="xxx")
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedDataFrame({"xxx": [1, 2, 3, 4]}, index=list("abcd"))
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_subclass_unstack(self):
|
||||
# GH 15564
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=[list("aabb"), list("xyxy")])
|
||||
|
||||
res = s.unstack()
|
||||
exp = tm.SubclassedDataFrame({"x": [1, 3], "y": [2, 4]}, index=["a", "b"])
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_subclass_empty_repr(self):
|
||||
assert "SubclassedSeries" in repr(tm.SubclassedSeries())
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
class TestSparseSeriesSubclassing:
|
||||
def test_subclass_sparse_slice(self):
|
||||
# int64
|
||||
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
|
||||
exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
# float64
|
||||
s = tm.SubclassedSparseSeries([1.0, 2.0, 3.0, 4.0, 5.0])
|
||||
exp = tm.SubclassedSparseSeries([2.0, 3.0, 4.0], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2.0, 3.0], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2.0, 3.0], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
def test_subclass_sparse_addition(self):
|
||||
s1 = tm.SubclassedSparseSeries([1, 3, 5])
|
||||
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
|
||||
exp = tm.SubclassedSparseSeries([-1, 8, 17])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
|
||||
s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
|
||||
exp = tm.SubclassedSparseSeries([5.0, 7.0, 9.0])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
def test_subclass_sparse_to_frame(self):
|
||||
s = tm.SubclassedSparseSeries([1, 2], index=list("ab"), name="xxx")
|
||||
res = s.to_frame()
|
||||
|
||||
exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind="block", fill_value=0)
|
||||
exp = tm.SubclassedSparseDataFrame(
|
||||
{"xxx": exp_arr}, index=list("ab"), default_fill_value=0
|
||||
)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# create from int dict
|
||||
res = tm.SubclassedSparseDataFrame(
|
||||
{"xxx": [1, 2]}, index=list("ab"), default_fill_value=0
|
||||
)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
s = tm.SubclassedSparseSeries([1.1, 2.1], index=list("ab"), name="xxx")
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedSparseDataFrame({"xxx": [1.1, 2.1]}, index=list("ab"))
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
Tests for Series timezone-related methods
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
from dateutil.tz import tzoffset
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import conversion, timezones
|
||||
|
||||
from pandas import DatetimeIndex, Index, NaT, Series, Timestamp
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesTimezones:
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_localize
|
||||
def test_series_tz_localize(self):
|
||||
|
||||
rng = date_range("1/1/2011", periods=100, freq="H")
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_localize("utc")
|
||||
assert result.index.tz.zone == "UTC"
|
||||
|
||||
# Can't localize if already tz-aware
|
||||
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
with pytest.raises(TypeError, match="Already tz-aware"):
|
||||
ts.tz_localize("US/Eastern")
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_tz_localize_errors_deprecation(self):
|
||||
# GH 22644
|
||||
tz = "Europe/Warsaw"
|
||||
n = 60
|
||||
rng = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
|
||||
ts = Series(rng)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
with pytest.raises(ValueError):
|
||||
ts.dt.tz_localize(tz, errors="foo")
|
||||
# make sure errors='coerce' gets mapped correctly to nonexistent
|
||||
result = ts.dt.tz_localize(tz, errors="coerce")
|
||||
expected = ts.dt.tz_localize(tz, nonexistent="NaT")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_tz_localize_ambiguous_bool(self):
|
||||
# make sure that we are correctly accepting bool values as ambiguous
|
||||
|
||||
# GH#14402
|
||||
ts = Timestamp("2015-11-01 01:00:03")
|
||||
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
|
||||
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
|
||||
|
||||
ser = Series([ts])
|
||||
expected0 = Series([expected0])
|
||||
expected1 = Series([expected1])
|
||||
|
||||
with pytest.raises(pytz.AmbiguousTimeError):
|
||||
ser.dt.tz_localize("US/Central")
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=True)
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=False)
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"])
|
||||
@pytest.mark.parametrize(
|
||||
"method, exp",
|
||||
[
|
||||
["shift_forward", "2015-03-29 03:00:00"],
|
||||
["NaT", NaT],
|
||||
["raise", None],
|
||||
["foo", "invalid"],
|
||||
],
|
||||
)
|
||||
def test_series_tz_localize_nonexistent(self, tz, method, exp):
|
||||
# GH 8917
|
||||
n = 60
|
||||
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
|
||||
s = Series(1, dti)
|
||||
if method == "raise":
|
||||
with pytest.raises(pytz.NonExistentTimeError):
|
||||
s.tz_localize(tz, nonexistent=method)
|
||||
elif exp == "invalid":
|
||||
with pytest.raises(ValueError):
|
||||
dti.tz_localize(tz, nonexistent=method)
|
||||
else:
|
||||
result = s.tz_localize(tz, nonexistent=method)
|
||||
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_series_tz_localize_empty(self, tzstr):
|
||||
# GH#2248
|
||||
ser = Series()
|
||||
|
||||
ser2 = ser.tz_localize("utc")
|
||||
assert ser2.index.tz == pytz.utc
|
||||
|
||||
ser2 = ser.tz_localize(tzstr)
|
||||
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_convert
|
||||
|
||||
def test_series_tz_convert(self):
|
||||
rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_convert("Europe/Berlin")
|
||||
assert result.index.tz.zone == "Europe/Berlin"
|
||||
|
||||
# can't convert tz-naive
|
||||
rng = date_range("1/1/2011", periods=200, freq="D")
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
with pytest.raises(TypeError, match="Cannot convert tz-naive"):
|
||||
ts.tz_convert("US/Eastern")
|
||||
|
||||
def test_series_tz_convert_to_utc(self):
|
||||
base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
|
||||
idx1 = base.tz_convert("Asia/Tokyo")[:2]
|
||||
idx2 = base.tz_convert("US/Eastern")[1:]
|
||||
|
||||
res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
|
||||
tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.append
|
||||
|
||||
def test_series_append_aware(self):
|
||||
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
|
||||
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(
|
||||
["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern"
|
||||
)
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC")
|
||||
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC")
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC")
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
utc = rng1.tz
|
||||
assert utc == ts_result.index.tz
|
||||
|
||||
# GH#7795
|
||||
# different tz coerces to object dtype, not UTC
|
||||
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
|
||||
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central")
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
exp_index = Index(
|
||||
[
|
||||
Timestamp("1/1/2011 01:00", tz="US/Eastern"),
|
||||
Timestamp("1/1/2011 02:00", tz="US/Central"),
|
||||
]
|
||||
)
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
|
||||
def test_series_append_aware_naive(self):
|
||||
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
|
||||
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index.astype(object))
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
# mixed
|
||||
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
|
||||
rng2 = range(100)
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index)
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
def test_series_append_dst(self):
|
||||
rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
|
||||
rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
|
||||
ser1 = Series([1, 2, 3], index=rng1)
|
||||
ser2 = Series([10, 11, 12], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(
|
||||
[
|
||||
"2016-01-01 01:00",
|
||||
"2016-01-01 02:00",
|
||||
"2016-01-01 03:00",
|
||||
"2016-08-01 01:00",
|
||||
"2016-08-01 02:00",
|
||||
"2016-08-01 03:00",
|
||||
],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
def test_dateutil_tzoffset_support(self):
|
||||
values = [188.5, 328.25]
|
||||
tzinfo = tzoffset(None, 7200)
|
||||
index = [
|
||||
datetime(2012, 5, 11, 11, tzinfo=tzinfo),
|
||||
datetime(2012, 5, 11, 12, tzinfo=tzinfo),
|
||||
]
|
||||
series = Series(data=values, index=index)
|
||||
|
||||
assert series.index.tz == tzinfo
|
||||
|
||||
# it works! #2443
|
||||
repr(series.index[0])
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_tz_aware_asfreq(self, tz):
|
||||
dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz)
|
||||
|
||||
ser = Series(np.random.randn(len(dr)), index=dr)
|
||||
|
||||
# it works!
|
||||
ser.asfreq("T")
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range("1/1/2000", periods=10, tz=tz)
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ser["1/3/2000"]
|
||||
tm.assert_almost_equal(result, ser[2])
|
||||
|
||||
# TODO: De-duplicate with test below
|
||||
def test_series_add_tz_mismatch_converts_to_utc_duplicate(self):
|
||||
rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_moscow = ser.tz_convert("Europe/Moscow")
|
||||
|
||||
result = ser + ts_moscow
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
result = ts_moscow + ser
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
def test_series_add_tz_mismatch_converts_to_utc(self):
|
||||
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser1 = Series(
|
||||
np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern")
|
||||
)
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser2 = Series(
|
||||
np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin")
|
||||
)
|
||||
|
||||
result = ser1 + ser2
|
||||
|
||||
uts1 = ser1.tz_convert("utc")
|
||||
uts2 = ser2.tz_convert("utc")
|
||||
expected = uts1 + uts2
|
||||
|
||||
assert result.index.tz == pytz.UTC
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_add_aware_naive_raises(self):
|
||||
rng = date_range("1/1/2011", periods=10, freq="H")
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ser_utc = ser.tz_localize("utc")
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser + ser_utc
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser_utc + ser
|
||||
|
||||
def test_series_align_aware(self):
|
||||
idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
|
||||
ser = Series(np.random.randn(len(idx1)), index=idx1)
|
||||
ser_central = ser.tz_convert("US/Central")
|
||||
# # different timezones convert to UTC
|
||||
|
||||
new1, new2 = ser.align(ser_central)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_localized_at_time_between_time(self, tzstr):
|
||||
from datetime import time
|
||||
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range("4/16/2012", "5/1/2012", freq="H")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_local = ts.tz_localize(tzstr)
|
||||
|
||||
result = ts_local.at_time(time(10, 0))
|
||||
expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
t1, t2 = time(10, 0), time(11, 0)
|
||||
result = ts_local.between_time(t1, t2)
|
||||
expected = ts.between_time(t1, t2).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(
|
||||
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr
|
||||
)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
def test_series_truncate_datetimeindex_tz(self):
|
||||
# GH 9243
|
||||
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
|
||||
expected = Series([1, 2, 3], index=idx[1:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]]
|
||||
)
|
||||
def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz):
|
||||
# GH 6326
|
||||
result = Series(
|
||||
np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz)
|
||||
)
|
||||
getattr(result, method)("UTC", copy=copy)
|
||||
expected = Series(
|
||||
np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
|
||||
# GH 25843
|
||||
tz = tz_aware_fixture
|
||||
result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]")
|
||||
expected = Series([Timestamp("2019")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,308 @@
|
||||
from collections import deque
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
UNARY_UFUNCS = [np.positive, np.floor, np.exp]
|
||||
BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op
|
||||
SPARSE = [True, False]
|
||||
SPARSE_IDS = ["sparse", "dense"]
|
||||
SHUFFLE = [True, False]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arrays_for_binary_ufunc():
|
||||
"""
|
||||
A pair of random, length-100 integer-dtype arrays, that are mostly 0.
|
||||
"""
|
||||
a1 = np.random.randint(0, 10, 100, dtype="int64")
|
||||
a2 = np.random.randint(0, 10, 100, dtype="int64")
|
||||
a1[::3] = 0
|
||||
a2[::4] = 0
|
||||
return a1, a2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", UNARY_UFUNCS)
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
def test_unary_ufunc(ufunc, sparse):
|
||||
# Test that ufunc(Series) == Series(ufunc)
|
||||
array = np.random.randint(0, 10, 10, dtype="int64")
|
||||
array[::2] = 0
|
||||
if sparse:
|
||||
array = pd.SparseArray(array, dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
index = list(string.ascii_letters[:10])
|
||||
name = "name"
|
||||
series = pd.Series(array, index=index, name=name)
|
||||
|
||||
result = ufunc(series)
|
||||
expected = pd.Series(ufunc(array), index=index, name=name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
|
||||
def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
|
||||
# Test that ufunc(Series(a), array) == Series(ufunc(a, b))
|
||||
a1, a2 = arrays_for_binary_ufunc
|
||||
if sparse:
|
||||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
|
||||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
name = "name" # op(Series, array) preserves the name.
|
||||
series = pd.Series(a1, name=name)
|
||||
other = a2
|
||||
|
||||
array_args = (a1, a2)
|
||||
series_args = (series, other) # ufunc(series, array)
|
||||
|
||||
if flip:
|
||||
array_args = reversed(array_args)
|
||||
series_args = reversed(series_args) # ufunc(array, series)
|
||||
|
||||
expected = pd.Series(ufunc(*array_args), name=name)
|
||||
result = ufunc(*series_args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
|
||||
def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
|
||||
# Test that
|
||||
# * func(Series(a), Series(b)) == Series(ufunc(a, b))
|
||||
# * ufunc(Index, Series) dispatches to Series (returns a Series)
|
||||
a1, a2 = arrays_for_binary_ufunc
|
||||
if sparse:
|
||||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
|
||||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
name = "name" # op(Series, array) preserves the name.
|
||||
series = pd.Series(a1, name=name)
|
||||
other = pd.Index(a2, name=name).astype("int64")
|
||||
|
||||
array_args = (a1, a2)
|
||||
series_args = (series, other) # ufunc(series, array)
|
||||
|
||||
if flip:
|
||||
array_args = reversed(array_args)
|
||||
series_args = reversed(series_args) # ufunc(array, series)
|
||||
|
||||
expected = pd.Series(ufunc(*array_args), name=name)
|
||||
result = ufunc(*series_args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
|
||||
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
|
||||
def test_binary_ufunc_with_series(
|
||||
flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
|
||||
):
|
||||
# Test that
|
||||
# * func(Series(a), Series(b)) == Series(ufunc(a, b))
|
||||
# with alignment between the indices
|
||||
a1, a2 = arrays_for_binary_ufunc
|
||||
if sparse:
|
||||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
|
||||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
name = "name" # op(Series, array) preserves the name.
|
||||
series = pd.Series(a1, name=name)
|
||||
other = pd.Series(a2, name=name)
|
||||
|
||||
idx = np.random.permutation(len(a1))
|
||||
|
||||
if shuffle:
|
||||
other = other.take(idx)
|
||||
if flip:
|
||||
index = other.align(series)[0].index
|
||||
else:
|
||||
index = series.align(other)[0].index
|
||||
else:
|
||||
index = series.index
|
||||
|
||||
array_args = (a1, a2)
|
||||
series_args = (series, other) # ufunc(series, array)
|
||||
|
||||
if flip:
|
||||
array_args = tuple(reversed(array_args))
|
||||
series_args = tuple(reversed(series_args)) # ufunc(array, series)
|
||||
|
||||
expected = pd.Series(ufunc(*array_args), index=index, name=name)
|
||||
result = ufunc(*series_args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("flip", [True, False])
|
||||
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
|
||||
# Test that
|
||||
# * ufunc(Series, scalar) == Series(ufunc(array, scalar))
|
||||
# * ufunc(Series, scalar) == ufunc(scalar, Series)
|
||||
array, _ = arrays_for_binary_ufunc
|
||||
if sparse:
|
||||
array = pd.SparseArray(array)
|
||||
other = 2
|
||||
series = pd.Series(array, name="name")
|
||||
|
||||
series_args = (series, other)
|
||||
array_args = (array, other)
|
||||
|
||||
if flip:
|
||||
series_args = tuple(reversed(series_args))
|
||||
array_args = tuple(reversed(array_args))
|
||||
|
||||
expected = pd.Series(ufunc(*array_args), name="name")
|
||||
result = ufunc(*series_args)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.divmod]) # any others?
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("shuffle", SHUFFLE)
|
||||
@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
|
||||
def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
|
||||
# Test that
|
||||
# the same conditions from binary_ufunc_scalar apply to
|
||||
# ufuncs with multiple outputs.
|
||||
if sparse and ufunc is np.divmod:
|
||||
pytest.skip("sparse divmod not implemented.")
|
||||
|
||||
a1, a2 = arrays_for_binary_ufunc
|
||||
# work around https://github.com/pandas-dev/pandas/issues/26987
|
||||
a1[a1 == 0] = 1
|
||||
a2[a2 == 0] = 1
|
||||
|
||||
if sparse:
|
||||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
|
||||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
s1 = pd.Series(a1)
|
||||
s2 = pd.Series(a2)
|
||||
|
||||
if shuffle:
|
||||
# ensure we align before applying the ufunc
|
||||
s2 = s2.sample(frac=1)
|
||||
|
||||
expected = ufunc(a1, a2)
|
||||
assert isinstance(expected, tuple)
|
||||
|
||||
result = ufunc(s1, s2)
|
||||
assert isinstance(result, tuple)
|
||||
tm.assert_series_equal(result[0], pd.Series(expected[0]))
|
||||
tm.assert_series_equal(result[1], pd.Series(expected[1]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc):
|
||||
# Test that the same conditions from unary input apply to multi-output
|
||||
# ufuncs
|
||||
array, _ = arrays_for_binary_ufunc
|
||||
|
||||
if sparse:
|
||||
array = pd.SparseArray(array)
|
||||
|
||||
series = pd.Series(array, name="name")
|
||||
result = np.modf(series)
|
||||
expected = np.modf(array)
|
||||
|
||||
assert isinstance(result, tuple)
|
||||
assert isinstance(expected, tuple)
|
||||
|
||||
tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
|
||||
tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
|
||||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
|
||||
def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
|
||||
# Drop the names when they differ.
|
||||
a1, a2 = arrays_for_binary_ufunc
|
||||
s1 = pd.Series(a1, name="a")
|
||||
s2 = pd.Series(a2, name="b")
|
||||
|
||||
result = ufunc(s1, s2)
|
||||
assert result.name is None
|
||||
|
||||
|
||||
def test_object_series_ok():
|
||||
class Dummy:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __add__(self, other):
|
||||
return self.value + other.value
|
||||
|
||||
arr = np.array([Dummy(0), Dummy(1)])
|
||||
ser = pd.Series(arr)
|
||||
tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr)))
|
||||
tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
pd.array([1, 3, 2]),
|
||||
pytest.param(
|
||||
pd.array([1, 10, 0], dtype="Sparse[int]"),
|
||||
marks=pytest.mark.xfail(resason="GH-27080. Bug in SparseArray"),
|
||||
),
|
||||
pd.to_datetime(["2000", "2010", "2001"]),
|
||||
pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
|
||||
pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
|
||||
],
|
||||
)
|
||||
def test_reduce(values):
|
||||
a = pd.Series(values)
|
||||
assert np.maximum.reduce(a) == values[1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_", [list, deque, tuple])
|
||||
def test_binary_ufunc_other_types(type_):
|
||||
a = pd.Series([1, 2, 3], name="name")
|
||||
b = type_([3, 4, 5])
|
||||
|
||||
result = np.add(a, b)
|
||||
expected = pd.Series(np.add(a.to_numpy(), b), name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_object_dtype_ok():
|
||||
class Thing:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __add__(self, other):
|
||||
other = getattr(other, "value", other)
|
||||
return type(self)(self.value + other)
|
||||
|
||||
def __eq__(self, other):
|
||||
return type(other) is Thing and self.value == other.value
|
||||
|
||||
def __repr__(self):
|
||||
return "Thing({})".format(self.value)
|
||||
|
||||
s = pd.Series([Thing(1), Thing(2)])
|
||||
result = np.add(s, Thing(1))
|
||||
expected = pd.Series([Thing(2), Thing(3)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_outer():
|
||||
# https://github.com/pandas-dev/pandas/issues/27186
|
||||
s = pd.Series([1, 2, 3])
|
||||
o = np.array([1, 2, 3])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = np.subtract.outer(s, o)
|
||||
expected = np.array([[0, -1, -2], [1, 0, -1], [2, 1, 0]], dtype=np.dtype("int64"))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
|
||||
|
||||
class TestSeriesValidate:
|
||||
"""Tests for error handling related to data types of method arguments."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"],
|
||||
)
|
||||
@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_bool_args(self, string_series, func, inplace):
|
||||
msg = 'For argument "inplace" expected type bool'
|
||||
kwargs = dict(inplace=inplace)
|
||||
|
||||
if func == "_set_name":
|
||||
kwargs["name"] = "hello"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(string_series, func)(**kwargs)
|
||||
Reference in New Issue
Block a user