8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,30 @@
from pandas.util._decorators import cache_readonly
import pandas as pd
import pandas.util.testing as tm
_ts = tm.makeTimeSeries()
class TestData:
@cache_readonly
def ts(self):
ts = _ts.copy()
ts.name = "ts"
return ts
@cache_readonly
def series(self):
series = tm.makeStringSeries()
series.name = "series"
return series
@cache_readonly
def objSeries(self):
objSeries = tm.makeObjectSeries()
objSeries.name = "objects"
return objSeries
@cache_readonly
def empty(self):
return pd.Series([], index=[])

View File

@@ -0,0 +1,33 @@
import pytest
import pandas.util.testing as tm
@pytest.fixture
def datetime_series():
"""
Fixture for Series of floats with DatetimeIndex
"""
s = tm.makeTimeSeries()
s.name = "ts"
return s
@pytest.fixture
def string_series():
"""
Fixture for Series of floats with Index of unique strings
"""
s = tm.makeStringSeries()
s.name = "series"
return s
@pytest.fixture
def object_series():
"""
Fixture for Series of dtype datetime64[ns] with Index of unique strings
"""
s = tm.makeObjectSeries()
s.name = "objects"
return s

View File

@@ -0,0 +1,8 @@
import pytest
from pandas.tests.series.common import TestData
@pytest.fixture(scope="module")
def test_data():
return TestData()

View File

@@ -0,0 +1,558 @@
from datetime import datetime
import numpy as np
from numpy import nan
import pytest
import pandas as pd
from pandas import Categorical, Series, date_range, isna
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("fill", [None, -1])
def test_align(test_data, first_slice, second_slice, join_type, fill):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
assert aa.name == "ts"
assert ea.name == "ts"
assert ab.name == "ts"
assert eb.name == "ts"
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("method", ["pad", "bfill"])
@pytest.mark.parametrize("limit", [None, 1])
def test_align_fill_method(
test_data, first_slice, second_slice, join_type, method, limit
):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
def test_align_nocopy(test_data):
b = test_data.ts[:5].copy()
# do copy
a = test_data.ts.copy()
ra, _ = a.align(b, join="left")
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = test_data.ts.copy()
ra, _ = a.align(b, join="left", copy=False)
ra[:5] = 5
assert (a[:5] == 5).all()
# do copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join="right")
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join="right", copy=False)
rb[:2] = 5
assert (b[:2] == 5).all()
def test_align_same_index(test_data):
a, b = test_data.ts.align(test_data.ts, copy=False)
assert a.index is test_data.ts.index
assert b.index is test_data.ts.index
a, b = test_data.ts.align(test_data.ts, copy=True)
assert a.index is not test_data.ts.index
assert b.index is not test_data.ts.index
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product(
[range(2), range(3), range(2)], names=("a", "b", "c")
)
idx = pd.Index(range(2), name="b")
s1 = pd.Series(np.arange(12, dtype="int64"), index=midx)
s2 = pd.Series(np.arange(2, dtype="int64"), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join="left")
res2l, res2r = s2.align(s1, join="right")
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join="right")
res2l, res2r = s2.align(s1, join="left")
exp_idx = pd.MultiIndex.from_product(
[range(2), range(2), range(2)], names=("a", "b", "c")
)
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
def test_reindex(test_data):
identity = test_data.series.reindex(test_data.series.index)
# __array_interface__ is not defined for older numpies
# and on some pythons
try:
assert np.may_share_memory(test_data.series.index, identity.index)
except AttributeError:
pass
assert identity.index.is_(test_data.series.index)
assert identity.index.identical(test_data.series.index)
subIndex = test_data.series.index[10:20]
subSeries = test_data.series.reindex(subIndex)
for idx, val in subSeries.items():
assert val == test_data.series[idx]
subIndex2 = test_data.ts.index[10:20]
subTS = test_data.ts.reindex(subIndex2)
for idx, val in subTS.items():
assert val == test_data.ts[idx]
stuffSeries = test_data.ts.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = test_data.ts.index[::2]
subNonContig = test_data.ts.reindex(nonContigIndex)
for idx, val in subNonContig.items():
assert val == test_data.ts[idx]
# return a copy the same index here
result = test_data.ts.reindex()
assert not (result is test_data.ts)
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype("object")
# reindex coerces index.dtype to float, loc/iloc doesn't
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
series = Series(rng)
result = series.reindex(range(15))
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.randn(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(test_data):
# (don't forget to fix this) I think it's fixed
test_data.empty.reindex(test_data.ts.index, method="pad") # it works
# corner case: pad empty series
reindexed = test_data.empty.reindex(test_data.ts.index, method="pad")
# pass non-Index
reindexed = test_data.ts.reindex(list(test_data.ts.index))
assert_series_equal(test_data.ts, reindexed)
# bad fill method
ts = test_data.ts[::2]
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\) or nearest\. Got foo"
)
with pytest.raises(ValueError, match=msg):
ts.reindex(test_data.ts.index, method="foo")
def test_reindex_pad():
s = Series(np.arange(10), dtype="int64")
s2 = s[::2]
reindexed = s2.reindex(s.index, method="pad")
reindexed2 = s2.reindex(s.index, method="ffill")
assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
assert_series_equal(reindexed, expected)
# GH4604
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
new_index = ["a", "g", "c", "f"]
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
assert_series_equal(result, expected.astype("float64"))
result = s.reindex(new_index).ffill(downcast="infer")
assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method="ffill")
assert_series_equal(result, expected)
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
assert_series_equal(result, expected)
# GH4618 shifted series downcasting
s = Series(False, index=range(0, 5))
result = s.shift(1).fillna(method="bfill")
expected = Series(False, index=range(0, 5))
assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
actual = s.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method="nearest")
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method="nearest", tolerance=1)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method="nearest", tolerance=[1, 2, 3, 4])
assert_series_equal(expected, actual)
actual = s.reindex(target, method="nearest", tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
assert_series_equal(expected, actual)
actual = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
assert_series_equal(expected, actual)
def test_reindex_backfill():
pass
def test_reindex_int(test_data):
ts = test_data.ts[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float_
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.int_
def test_reindex_bool(test_data):
# A series other than float, int, string, or object
ts = test_data.ts[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(test_data):
# fail
ts = test_data.ts[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(test_data.ts.index, method="pad")
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range("20000101", periods=3)
# reindexing to an invalid Categorical
s = Series(["a", "b", "c"], dtype="category")
result = s.reindex(index)
expected = Series(
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
)
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_like(test_data):
other = test_data.ts[::2]
assert_series_equal(
test_data.ts.reindex(other.index), test_data.ts.reindex_like(other)
)
# GH 7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method="pad")
expected = Series([5, np.nan], index=[day1, day3])
assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1.0, 2.0, 3.0])
result = floats.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value="foo")
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
assert_series_equal(result, expected)
def test_reindex_datetimeindexes_tz_naive_and_aware():
# GH 8306
idx = date_range("20131101", tz="America/Chicago", periods=7)
newidx = date_range("20131103", periods=10, freq="H")
s = Series(range(7), index=idx)
with pytest.raises(TypeError):
s.reindex(newidx, method="ffill")
def test_reindex_empty_series_tz_dtype():
# GH 20869
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
expected = Series([pd.NaT] * 2, dtype="datetime64[ns, UTC]")
tm.assert_equal(result, expected)
def test_rename():
# GH 17407
s = Series(range(1, 6), index=pd.Index(range(2, 7), name="IntIndex"))
result = s.rename(str)
expected = s.rename(lambda i: str(i))
assert_series_equal(result, expected)
assert result.name == expected.name
@pytest.mark.parametrize(
"data, index, drop_labels, axis, expected_data, expected_index",
[
# Unique Index
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
# GH 5248 Non-Unique Index
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
],
)
def test_drop_unique_and_non_unique_index(
data, index, axis, drop_labels, expected_data, expected_index
):
s = Series(data=data, index=index)
result = s.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, index, drop_labels, axis, error_type, error_desc",
[
# single string/tuple-like
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
# bad axis
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
],
)
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
with pytest.raises(error_type, match=error_desc):
Series(data, index=index).drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
s = Series(range(3), index=list("abc"))
result = s.drop("bc", errors="ignore")
tm.assert_series_equal(result, s)
result = s.drop(["a", "d"], errors="ignore")
expected = s.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))
@pytest.mark.parametrize(
"data, index, drop_labels",
[
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True]),
],
)
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
with pytest.raises(KeyError, match="not found in axis"):
pd.Series(data=data, index=index).drop(drop_labels)

View File

@@ -0,0 +1,628 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import Index, Series, Timestamp, date_range, isna
from pandas.core.indexing import IndexingError
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
from pandas.tseries.offsets import BDay
def test_getitem_boolean(test_data):
s = test_data.series
mask = s > s.median()
# passing list is OK
result = s[list(mask)]
expected = s[mask]
assert_series_equal(result, expected)
tm.assert_index_equal(result.index, s.index[mask])
def test_getitem_boolean_empty():
s = Series([], dtype=np.int64)
s.index.name = "index_name"
s = s[s.isna()]
assert s.index.name == "index_name"
assert s.dtype == np.int64
# GH5877
# indexing with empty series
s = Series(["A", "B"])
expected = Series(np.nan, index=["C"], dtype=object)
result = s[Series(["C"], dtype=object)]
assert_series_equal(result, expected)
s = Series(["A", "B"])
expected = Series(dtype=object, index=Index([], dtype="int64"))
result = s[Series([], dtype=object)]
assert_series_equal(result, expected)
# invalid because of the boolean indexer
# that's empty or not-aligned
msg = (
r"Unalignable boolean Series provided as indexer \(index of"
r" the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
s[Series([], dtype=bool)]
with pytest.raises(IndexingError, match=msg):
s[Series([True], dtype=bool)]
def test_getitem_boolean_object(test_data):
# using column from DataFrame
s = test_data.series
mask = s > s.median()
omask = mask.astype(object)
# getitem
result = s[omask]
expected = s[mask]
assert_series_equal(result, expected)
# setitem
s2 = s.copy()
cop = s.copy()
cop[omask] = 5
s2[mask] = 5
assert_series_equal(cop, s2)
# nans raise exception
omask[5:10] = np.nan
msg = "cannot index with vector containing NA / NaN values"
with pytest.raises(ValueError, match=msg):
s[omask]
with pytest.raises(ValueError, match=msg):
s[omask] = 5
def test_getitem_setitem_boolean_corner(test_data):
ts = test_data.ts
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
# these used to raise...??
msg = (
r"Unalignable boolean Series provided as indexer \(index of"
r" the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted] = 1
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted] = 1
def test_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
# similar indexed series
result = test_data.series.copy()
result[mask] = test_data.series * 2
expected = test_data.series * 2
assert_series_equal(result[mask], expected[mask])
# needs alignment
result = test_data.series.copy()
result[mask] = (test_data.series * 2)[0:5]
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
expected[-mask] = test_data.series[mask]
assert_series_equal(result[mask], expected[mask])
def test_get_set_boolean_different_order(test_data):
ordered = test_data.series.sort_values()
# setting
copy = test_data.series.copy()
copy[ordered > 0] = 0
expected = test_data.series.copy()
expected[expected > 0] = 0
assert_series_equal(copy, expected)
# getting
sel = test_data.series[ordered > 0]
exp = test_data.series[test_data.series > 0]
assert_series_equal(sel, exp)
def test_where_unsafe_int(sint_dtype):
s = Series(np.arange(10), dtype=sint_dtype)
mask = s < 5
s[mask] = range(2, 7)
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype)
assert_series_equal(s, expected)
def test_where_unsafe_float(float_dtype):
s = Series(np.arange(10), dtype=float_dtype)
mask = s < 5
s[mask] = range(2, 7)
data = list(range(2, 7)) + list(range(5, 10))
expected = Series(data, dtype=float_dtype)
assert_series_equal(s, expected)
@pytest.mark.parametrize(
"dtype,expected_dtype",
[
(np.int8, np.float64),
(np.int16, np.float64),
(np.int32, np.float64),
(np.int64, np.float64),
(np.float32, np.float32),
(np.float64, np.float64),
],
)
def test_where_unsafe_upcast(dtype, expected_dtype):
# see gh-9743
s = Series(np.arange(10), dtype=dtype)
values = [2.5, 3.5, 4.5, 5.5, 6.5]
mask = s < 5
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
s[mask] = values
assert_series_equal(s, expected)
def test_where_unsafe():
# see gh-9731
s = Series(np.arange(10), dtype="int64")
values = [2.5, 3.5, 4.5, 5.5]
mask = s > 5
expected = Series(list(range(6)) + values, dtype="float64")
s[mask] = values
assert_series_equal(s, expected)
# see gh-3235
s = Series(np.arange(10), dtype="int64")
mask = s < 5
s[mask] = range(2, 7)
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
assert_series_equal(s, expected)
assert s.dtype == expected.dtype
s = Series(np.arange(10), dtype="int64")
mask = s > 5
s[mask] = [0] * 4
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
assert_series_equal(s, expected)
s = Series(np.arange(10))
mask = s > 5
msg = "cannot assign mismatch length to masked array"
with pytest.raises(ValueError, match=msg):
s[mask] = [5, 4, 3, 2, 1]
with pytest.raises(ValueError, match=msg):
s[mask] = [0] * 5
# dtype changes
s = Series([1, 2, 3, 4])
result = s.where(s > 2, np.nan)
expected = Series([np.nan, np.nan, 3, 4])
assert_series_equal(result, expected)
# GH 4667
# setting with None changes dtype
s = Series(range(10)).astype(float)
s[8] = None
result = s[8]
assert isna(result)
s = Series(range(10)).astype(float)
s[s > 8] = None
result = s[isna(s)]
expected = Series(np.nan, index=[9])
assert_series_equal(result, expected)
def test_where():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(cond).dropna()
rs2 = s[cond]
assert_series_equal(rs, rs2)
rs = s.where(cond, -s)
assert_series_equal(rs, s.abs())
rs = s.where(cond)
assert s.shape == rs.shape
assert rs is not s
# test alignment
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
rs = s2.where(cond[:3])
assert_series_equal(rs, expected)
expected = s2.abs()
expected.iloc[0] = s2[0]
rs = s2.where(cond[:3], -s2)
assert_series_equal(rs, expected)
def test_where_error():
s = Series(np.random.randn(5))
cond = s > 0
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where(1)
with pytest.raises(ValueError, match=msg):
s.where(cond[:3].values, -s)
# GH 2745
s = Series([1, 2])
s[[True, False]] = [0, 1]
expected = Series([0, 2])
assert_series_equal(s, expected)
# failures
msg = "cannot assign mismatch length to masked array"
with pytest.raises(ValueError, match=msg):
s[[True, False]] = [0, 2, 3]
msg = (
"NumPy boolean array indexing assignment cannot assign 0 input"
" values to the 1 output values where the mask is true"
)
with pytest.raises(ValueError, match=msg):
s[[True, False]] = []
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
def test_where_array_like(klass):
# see gh-15414
s = Series([1, 2, 3])
cond = [False, True, True]
expected = Series([np.nan, 2, 3])
result = s.where(klass(cond))
assert_series_equal(result, expected)
@pytest.mark.parametrize(
"cond",
[
[1, 0, 1],
Series([2, 5, 7]),
["True", "False", "True"],
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
],
)
def test_where_invalid_input(cond):
# see gh-15414: only boolean arrays accepted
s = Series([1, 2, 3])
msg = "Boolean array expected for the condition"
with pytest.raises(ValueError, match=msg):
s.where(cond)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where([True])
def test_where_ndframe_align():
msg = "Array conditional must be same shape as self"
s = Series([1, 2, 3])
cond = [True]
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([1, np.nan, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
cond = np.array([False, True, False, True])
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([np.nan, 2, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
msg = "cannot set using a {} indexer with a different length than the value"
# slice
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg.format("slice")):
s[0:3] = list(range(27))
s[0:3] = list(range(3))
expected = Series([0, 1, 2])
assert_series_equal(s.astype(np.int64), expected)
# slice with step
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg.format("slice")):
s[0:4:2] = list(range(27))
s = Series(list("abcdef"))
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
assert_series_equal(s, expected)
# neg slices
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg.format("slice")):
s[:-1] = list(range(27))
s[-3:-1] = list(range(2))
expected = Series(["a", "b", "c", 0, 1, "f"])
assert_series_equal(s, expected)
# list
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg.format("list-like")):
s[[0, 1, 2]] = list(range(27))
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg.format("list-like")):
s[[0, 1, 2]] = list(range(2))
# scalar
s = Series(list("abc"))
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
assert_series_equal(s, expected)
@pytest.mark.parametrize("size", range(2, 6))
@pytest.mark.parametrize(
"mask", [[True, False, False, False, False], [True, False], [False]]
)
@pytest.mark.parametrize(
"item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]
)
# Test numpy arrays, lists and tuples as the input to be
# broadcast
@pytest.mark.parametrize(
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
)
def test_broadcast(size, mask, item, box):
selection = np.resize(mask, size)
data = np.arange(size, dtype=float)
# Construct the expected series by taking the source
# data or item based on the selection
expected = Series(
[item if use_item else data[i] for i, use_item in enumerate(selection)]
)
s = Series(data)
s[selection] = box(item)
assert_series_equal(s, expected)
s = Series(data)
result = s.where(~selection, box(item))
assert_series_equal(result, expected)
s = Series(data)
result = s.mask(selection, box(item))
assert_series_equal(result, expected)
def test_where_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.where(cond, inplace=True)
assert_series_equal(rs.dropna(), s[cond])
assert_series_equal(rs, s.where(cond))
rs = s.copy()
rs.where(cond, -s, inplace=True)
assert_series_equal(rs, s.where(cond, -s))
def test_where_dups():
# GH 4550
# where crashes with dups in index
s1 = Series(list(range(3)))
s2 = Series(list(range(3)))
comb = pd.concat([s1, s2])
result = comb.where(comb < 2)
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(result, expected)
# GH 4548
# inplace updating not working with dups
comb[comb < 1] = 5
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
comb[comb < 2] += 10
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
def test_where_numeric_with_string():
# GH 9280
s = pd.Series([1, 2, 3])
w = s.where(s > 1, "X")
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, ["X", "Y", "Z"])
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
def test_where_timedelta_coerce():
s = Series([1, 2], dtype="timedelta64[ns]")
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype="object")
assert_series_equal(rs, expected)
def test_where_datetime_conversion():
s = Series(date_range("20130102", periods=2))
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype="object")
assert_series_equal(rs, expected)
# GH 15701
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
s = Series([pd.Timestamp(t) for t in timestamps])
rs = s.where(Series([False, True]))
expected = Series([pd.NaT, s[1]])
assert_series_equal(rs, expected)
def test_where_dt_tz_values(tz_naive_fixture):
ser1 = pd.Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture)
)
ser2 = pd.Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture)
)
mask = pd.Series([True, True, False])
result = ser1.where(mask, ser2)
exp = pd.Series(
pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture)
)
assert_series_equal(exp, result)
def test_mask():
# compare with tested results in test_where
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(~cond, np.nan)
assert_series_equal(rs, s.mask(cond))
rs = s.where(~cond)
rs2 = s.mask(cond)
assert_series_equal(rs, rs2)
rs = s.where(~cond, -s)
rs2 = s.mask(cond, -s)
assert_series_equal(rs, rs2)
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
rs = s2.where(~cond[:3])
rs2 = s2.mask(cond[:3])
assert_series_equal(rs, rs2)
rs = s2.where(~cond[:3], -s2)
rs2 = s2.mask(cond[:3], -s2)
assert_series_equal(rs, rs2)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.mask(1)
with pytest.raises(ValueError, match=msg):
s.mask(cond[:3].values, -s)
# dtype changes
s = Series([1, 2, 3, 4])
result = s.mask(s > 2, np.nan)
expected = Series([1, 2, np.nan, np.nan])
assert_series_equal(result, expected)
# see gh-21891
s = Series([1, 2])
res = s.mask([True, False])
exp = Series([np.nan, 2])
tm.assert_series_equal(res, exp)
def test_mask_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.mask(cond, inplace=True)
assert_series_equal(rs.dropna(), s[~cond])
assert_series_equal(rs, s.mask(cond))
rs = s.copy()
rs.mask(cond, -s, inplace=True)
assert_series_equal(rs, s.mask(cond, -s))

View File

@@ -0,0 +1,33 @@
import pandas as pd
import pandas.util.testing as tm
def test_getitem_callable():
# GH 12533
s = pd.Series(4, index=list("ABCD"))
result = s[lambda x: "A"]
assert result == s.loc["A"]
result = s[lambda x: ["A", "B"]]
tm.assert_series_equal(result, s.loc[["A", "B"]])
result = s[lambda x: [True, False, True, True]]
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
def test_setitem_callable():
# GH 12533
s = pd.Series([1, 2, 3, 4], index=list("ABCD"))
s[lambda x: "A"] = -1
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list("ABCD")))
def test_setitem_other_callable():
# GH 13299
inc = lambda x: x + 1
s = pd.Series([1, 2, -1, 4])
s[s < 0] = inc
expected = pd.Series([1, 2, inc, 4])
tm.assert_series_equal(s, expected)

View File

@@ -0,0 +1,778 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs import iNaT
import pandas._libs.index as _index
import pandas as pd
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_series_equal,
)
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
def test_fancy_getitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
assert s[48] == 48
assert s["1/2/2009"] == 48
assert s["2009-1-2"] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
s["2009-1-3"]
assert_series_equal(
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
)
def test_fancy_setitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
s[48] = -1
assert s[48] == -1
s["1/2/2009"] = -2
assert s[48] == -2
s["1/2/2009":"2009-06-05"] = -3
assert (s[48:54] == -3).all()
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
@pytest.mark.parametrize("name", [None, "my_dti"])
def test_dti_snap(name, tz):
dti = DatetimeIndex(
[
"1/1/2002",
"1/2/2002",
"1/3/2002",
"1/4/2002",
"1/5/2002",
"1/6/2002",
"1/7/2002",
],
name=name,
tz=tz,
freq="D",
)
result = dti.snap(freq="W-MON")
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
expected = expected.repeat([3, 4])
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
result = dti.snap(freq="B")
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
expected = expected.repeat([1, 1, 1, 2, 2])
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
def test_dti_reset_index_round_trip():
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")
d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes[0] == np.dtype("M8[ns]")
d3 = d2.set_index("index")
assert_frame_equal(d1, d3, check_names=False)
# #2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
df = df.set_index("Date")
assert df.index[0] == stamp
assert df.reset_index()["Date"][0] == stamp
def test_series_set_value():
# #1561
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s = Series().set_value(dates[0], 1.0)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s2 = s.set_value(dates[1], np.nan)
exp = Series([1.0, np.nan], index=index)
assert_series_equal(s2, exp)
# s = Series(index[:1], index[:1])
# s2 = s.set_value(dates[1], index[1])
# assert s2.values.dtype == 'M8[ns]'
@pytest.mark.slow
def test_slice_locs_indexerror():
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)]
s = Series(range(100000), times)
s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
def test_slicing_datetimes():
# GH 7523
# unique
df = DataFrame(
np.arange(4.0, dtype="float64"),
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
)
result = df.loc[datetime(2001, 1, 1, 10) :]
assert_frame_equal(result, df)
result = df.loc[: datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11) :]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc["20010101 11":]
assert_frame_equal(result, expected)
# duplicates
df = pd.DataFrame(
np.arange(5.0, dtype="float64"),
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
)
result = df.loc[datetime(2001, 1, 1, 10) :]
assert_frame_equal(result, df)
result = df.loc[: datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11) :]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc["20010101 11":]
assert_frame_equal(result, expected)
def test_frame_datetime64_duplicated():
dates = date_range("2010-07-01", end="2010-08-05")
tst = DataFrame({"symbol": "AAA", "date": dates})
result = tst.duplicated(["date", "symbol"])
assert (-result).all()
tst = DataFrame({"date": dates})
result = tst.duplicated()
assert (-result).all()
def test_getitem_setitem_datetime_tz_pytz():
from pytz import timezone as tz
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
# comparison dates with datetime MUST be localized!
date = tz("US/Central").localize(datetime(1990, 1, 1, 3))
result[date] = 0
result[date] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetime_tz_dateutil():
from dateutil.tz import tzutc
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
tz = (
lambda x: tzutc() if x == "UTC" else gettz(x)
) # handle special case for utc in dateutil
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz="America/New_York")
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = 0
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# repeat all the above with naive datetimes
result = ts[datetime(1990, 1, 1, 4)]
expected = ts[4]
assert result == expected
result = ts.copy()
result[datetime(1990, 1, 1, 4)] = 0
result[datetime(1990, 1, 1, 4)] = ts[4]
assert_series_equal(result, ts)
result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8]
assert_series_equal(result, ts)
lb = datetime(1990, 1, 1, 4)
rb = datetime(1990, 1, 1, 7)
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
with pytest.raises(TypeError, match=msg):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
from pandas import period_range
N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
# FutureWarning from NumPy.
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_getitem_median_slice_bug():
index = date_range("20090415", "20090519", freq="2B")
s = Series(np.random.randn(13), index=index)
indexer = [slice(6, 7, None)]
result = s[indexer]
expected = s[indexer[0]]
assert_series_equal(result, expected)
def test_datetime_indexing():
from pandas import date_range
index = date_range("1/1/2000", "1/7/2000")
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp("1/8/2000")
with pytest.raises(KeyError, match=r"^947289600000000000$"):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
with pytest.raises(KeyError, match=r"^947289600000000000$"):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
"""
test duplicates in time series
"""
@pytest.fixture
def dups():
dates = [
datetime(2000, 1, 2),
datetime(2000, 1, 2),
datetime(2000, 1, 2),
datetime(2000, 1, 3),
datetime(2000, 1, 3),
datetime(2000, 1, 3),
datetime(2000, 1, 4),
datetime(2000, 1, 4),
datetime(2000, 1, 4),
datetime(2000, 1, 5),
]
return Series(np.random.randn(len(dates)), index=dates)
def test_constructor(dups):
assert isinstance(dups, Series)
assert isinstance(dups.index, DatetimeIndex)
def test_is_unique_monotonic(dups):
assert not dups.index.is_unique
def test_index_unique(dups):
uniques = dups.index.unique()
expected = DatetimeIndex(
[
datetime(2000, 1, 2),
datetime(2000, 1, 3),
datetime(2000, 1, 4),
datetime(2000, 1, 5),
]
)
assert uniques.dtype == "M8[ns]" # sanity
tm.assert_index_equal(uniques, expected)
assert dups.index.nunique() == 4
# #2563
assert isinstance(uniques, DatetimeIndex)
dups_local = dups.index.tz_localize("US/Eastern")
dups_local.name = "foo"
result = dups_local.unique()
expected = DatetimeIndex(expected, name="foo")
expected = expected.tz_localize("US/Eastern")
assert result.tz is not None
assert result.name == "foo"
tm.assert_index_equal(result, expected)
# NaT, note this is excluded
arr = [1370745748 + t for t in range(20)] + [iNaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
arr = [
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
] + [NaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_index_dupes_contains():
d = datetime(2011, 12, 5, 20, 30)
ix = DatetimeIndex([d, d])
assert d in ix
def test_duplicate_dates_indexing(dups):
ts = dups
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
assert_series_equal(result, expected)
else:
assert_almost_equal(result, expected[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
assert_series_equal(cp, expected)
with pytest.raises(KeyError, match=r"^947116800000000000$"):
ts[datetime(2000, 1, 6)]
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_range_slice():
idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"])
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts["1/2/2000":]
expected = ts[1:]
assert_series_equal(result, expected)
result = ts["1/2/2000":"1/3/2000"]
expected = ts[1:4]
assert_series_equal(result, expected)
def test_groupby_average_dup_values(dups):
result = dups.groupby(level=0).mean()
expected = dups.groupby(dups.index).mean()
assert_series_equal(result, expected)
def test_indexing_over_size_cutoff():
import datetime
# #1821
old_cutoff = _index._SIZE_CUTOFF
try:
_index._SIZE_CUTOFF = 1000
# create large list of non periodic datetime
dates = []
sec = datetime.timedelta(seconds=1)
half_sec = datetime.timedelta(microseconds=500000)
d = datetime.datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(
np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")
)
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
finally:
_index._SIZE_CUTOFF = old_cutoff
def test_indexing_over_size_cutoff_period_index(monkeypatch):
# GH 27136
monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000)
n = 1100
idx = pd.period_range("1/1/2000", freq="T", periods=n)
assert idx._engine.over_size_threshold
s = pd.Series(np.random.randn(len(idx)), index=idx)
pos = n - 1
timestamp = idx[pos]
assert timestamp in s.index
# it works!
s[timestamp]
assert len(s.loc[[timestamp]]) > 0
def test_indexing_unordered():
# GH 2437
rng = date_range(start="2011-01-01", end="2011-01-15")
ts = Series(np.random.rand(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
# TODO: unused?
s = str(t) # noqa
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
assert_series_equal(result, expected)
compare(slice("2011-01-01", "2011-01-15"))
compare(slice("2010-12-30", "2011-01-15"))
compare(slice("2011-01-01", "2011-01-16"))
# partial ranges
compare(slice("2011-01-01", "2011-01-6"))
compare(slice("2011-01-06", "2011-01-8"))
compare(slice("2011-01-06", "2011-01-12"))
# single values
result = ts2["2011"].sort_index()
expected = ts["2011"]
assert_series_equal(result, expected)
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.permutation(20))
result = ts["2005"]
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq="M")
ts = Series(np.random.rand(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
expected = ts["2001"]
expected.name = "A"
df = DataFrame(dict(A=ts))
result = df["2001"]["A"]
assert_series_equal(expected, result)
# setting
ts["2001"] = 1
expected = ts["2001"]
expected.name = "A"
df.loc["2001", "A"] = 1
result = df["2001"]["A"]
assert_series_equal(expected, result)
# GH3546 (not including times on the last day)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
assert_series_equal(expected, ts)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
assert_series_equal(expected, ts)
idx = [
Timestamp("2013-05-31 00:00"),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
]
ts = Series(range(len(idx)), index=idx)
expected = ts["2013"]
assert_series_equal(expected, ts)
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(
np.random.rand(5, 5),
columns=["open", "high", "low", "close", "volume"],
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
)
expected = df.loc[[df.index[2]]]
# this is a single date, so will raise
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
df["2012-01-02 18:01:02"]
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]
"""
test NaT support
"""
def test_set_none_nan():
series = Series(date_range("1/1/2000", periods=10))
series[3] = None
assert series[3] is NaT
series[3:5] = None
assert series[4] is NaT
series[5] = np.nan
assert series[5] is NaT
series[5:7] = np.nan
assert series[6] is NaT
def test_nat_operations():
# GH 8617
s = Series([0, pd.NaT], dtype="m8[ns]")
exp = s[0]
assert s.median() == exp
assert s.min() == exp
assert s.max() == exp
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(method, freq):
# GH14940
s = Series([pd.NaT])
expected = Series(pd.NaT)
round_method = getattr(s.dt, method)
assert_series_equal(round_method(freq), expected)
def test_setitem_tuple_with_datetimetz():
# GH 20441
arr = date_range("2017", periods=4, tz="US/Eastern")
index = [(0, 1), (0, 2), (0, 3), (0, 4)]
result = Series(arr, index=index)
expected = result.copy()
result[(0, 1)] = np.nan
expected.iloc[0] = np.nan
assert_series_equal(result, expected)

View File

@@ -0,0 +1,32 @@
import numpy as np
from pandas import Series
from pandas.util.testing import assert_almost_equal, assert_series_equal
def test_iloc():
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
for i in range(len(s)):
result = s.iloc[i]
exp = s[s.index[i]]
assert_almost_equal(result, exp)
# pass a slice
result = s.iloc[slice(1, 3)]
expected = s.loc[2:4]
assert_series_equal(result, expected)
# test slice is a view
result[:] = 0
assert (s[1:3] == 0).all()
# list of integers
result = s.iloc[[0, 2, 3, 4, 5]]
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
assert_series_equal(result, expected)
def test_iloc_nonunique():
s = Series([0, 1, 2], index=[0, 1, 0])
assert s.iloc[2] == 2

View File

@@ -0,0 +1,879 @@
""" test get/set & misc """
from datetime import timedelta
import numpy as np
import pytest
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
from pandas.tseries.offsets import BDay
def test_basic_indexing():
s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
s[5]
msg = "index 5 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s[5] = 0
with pytest.raises(KeyError, match=r"^'c'$"):
s["c"]
s = s.sort_index()
msg = r"index out of bounds|^5$"
with pytest.raises(IndexError, match=msg):
s[5]
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
with pytest.raises(IndexError, match=msg):
s[5] = 0
def test_basic_getitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
result = test_data.ts[indices]
expected = test_data.ts.reindex(indices)
assert_series_equal(result, expected)
result = test_data.ts[indices[0] : indices[2]]
expected = test_data.ts.loc[indices[0] : indices[2]]
assert_series_equal(result, expected)
# integer indexes, be careful
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
inds = [0, 2, 5, 7, 8]
arr_inds = np.array([0, 2, 5, 7, 8])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s[inds]
expected = s.reindex(inds)
assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s[arr_inds]
expected = s.reindex(arr_inds)
assert_series_equal(result, expected)
# GH12089
# with tz for values
s = Series(
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
)
expected = Timestamp("2011-01-01", tz="US/Eastern")
result = s.loc["a"]
assert result == expected
result = s.iloc[0]
assert result == expected
result = s["a"]
assert result == expected
def test_getitem_setitem_ellipsis():
s = Series(np.random.randn(10))
np.fix(s)
result = s[...]
assert_series_equal(result, s)
s[...] = 5
assert (result == 5).all()
def test_getitem_get(test_data):
test_series = test_data.series
test_obj_series = test_data.objSeries
idx1 = test_series.index[5]
idx2 = test_obj_series.index[5]
assert test_series[idx1] == test_series.get(idx1)
assert test_obj_series[idx2] == test_obj_series.get(idx2)
assert test_series[idx1] == test_series[5]
assert test_obj_series[idx2] == test_obj_series[5]
assert test_series.get(-1) == test_series.get(test_series.index[-1])
assert test_series[5] == test_series.get(test_series.index[5])
# missing
d = test_data.ts.index[0] - BDay()
msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)"
with pytest.raises(KeyError, match=msg):
test_data.ts[d]
# None
# GH 5652
for s in [Series(), Series(index=list("abc"))]:
result = s.get(None)
assert result is None
def test_getitem_fancy(test_data):
slice1 = test_data.series[[1, 2, 3]]
slice2 = test_data.objSeries[[1, 2, 3]]
assert test_data.series.index[2] == slice1.index[1]
assert test_data.objSeries.index[2] == slice2.index[1]
assert test_data.series[2] == slice1[1]
assert test_data.objSeries[2] == slice2[1]
def test_getitem_generator(test_data):
gen = (x > 0 for x in test_data.series)
result = test_data.series[gen]
result2 = test_data.series[iter(test_data.series > 0)]
expected = test_data.series[test_data.series > 0]
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
def test_type_promotion():
# GH12599
s = pd.Series()
s["a"] = pd.Timestamp("2016-01-01")
s["b"] = 3.0
s["c"] = "foo"
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
assert_series_equal(s, expected)
@pytest.mark.parametrize(
"result_1, duplicate_item, expected_1",
[
[
pd.Series({1: 12, 2: [1, 2, 2, 3]}),
pd.Series({1: 313}),
pd.Series({1: 12}, dtype=object),
],
[
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
pd.Series({1: [1, 2, 3]}),
pd.Series({1: [1, 2, 3]}),
],
],
)
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
# GH 17610
result = result_1.append(duplicate_item)
expected = expected_1.append(duplicate_item)
assert_series_equal(result[1], expected)
assert result[2] == result_1[2]
def test_getitem_out_of_bounds(test_data):
# don't segfault, GH #495
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
test_data.ts[len(test_data.ts)]
# GH #917
s = Series([])
with pytest.raises(IndexError, match=msg):
s[-1]
def test_getitem_setitem_integers():
# caused bug without test
s = Series([1, 2, 3], ["a", "b", "c"])
assert s.iloc[0] == s["a"]
s.iloc[0] = 5
tm.assert_almost_equal(s["a"], 5)
def test_getitem_box_float64(test_data):
value = test_data.ts[5]
assert isinstance(value, np.float64)
@pytest.mark.parametrize(
"arr",
[np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")],
)
def test_get(arr):
# GH 21260
s = Series(arr, index=[2 * i for i in range(len(arr))])
assert s.get(4) == s.iloc[2]
result = s.get([4, 6])
expected = s.iloc[[2, 3]]
tm.assert_series_equal(result, expected)
result = s.get(slice(2))
expected = s.iloc[[0, 1]]
tm.assert_series_equal(result, expected)
assert s.get(-1) is None
assert s.get(s.index.max() + 1) is None
s = Series(arr[:6], index=list("abcdef"))
assert s.get("c") == s.iloc[2]
result = s.get(slice("b", "d"))
expected = s.iloc[[1, 2, 3]]
tm.assert_series_equal(result, expected)
result = s.get("Z")
assert result is None
assert s.get(4) == s.iloc[4]
assert s.get(-1) == s.iloc[-1]
assert s.get(len(s)) is None
# GH 21257
s = pd.Series(arr)
s2 = s[::2]
assert s2.get(1) is None
def test_series_box_timestamp():
rng = pd.date_range("20090415", "20090519", freq="B")
ser = Series(rng)
assert isinstance(ser[5], pd.Timestamp)
rng = pd.date_range("20090415", "20090519", freq="B")
ser = Series(rng, index=rng)
assert isinstance(ser[5], pd.Timestamp)
assert isinstance(ser.iat[5], pd.Timestamp)
def test_getitem_ambiguous_keyerror():
s = Series(range(10), index=list(range(0, 20, 2)))
with pytest.raises(KeyError, match=r"^1$"):
s[1]
with pytest.raises(KeyError, match=r"^1$"):
s.loc[1]
def test_getitem_unordered_dup():
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
assert is_scalar(obj["c"])
assert obj["c"] == 0
def test_getitem_dups_with_missing():
# breaks reindex, so need to use .loc internally
# GH 4246
s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = s.loc[["foo", "bar", "bah", "bam"]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s[["foo", "bar", "bah", "bam"]]
assert_series_equal(result, expected)
def test_getitem_dups():
s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
result = s["C"]
assert_series_equal(result, expected)
def test_setitem_ambiguous_keyerror():
s = Series(range(10), index=list(range(0, 20, 2)))
# equivalent of an append
s2 = s.copy()
s2[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
s2 = s.copy()
s2.loc[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
def test_getitem_dataframe():
rng = list(range(10))
s = pd.Series(10, index=rng)
df = pd.DataFrame(rng, index=rng)
msg = (
"Indexing a Series with DataFrame is not supported,"
" use the appropriate DataFrame column"
)
with pytest.raises(TypeError, match=msg):
s[df > 5]
def test_setitem(test_data):
test_data.ts[test_data.ts.index[5]] = np.NaN
test_data.ts[[1, 2, 17]] = np.NaN
test_data.ts[6] = np.NaN
assert np.isnan(test_data.ts[6])
assert np.isnan(test_data.ts[2])
test_data.ts[np.isnan(test_data.ts)] = 5
assert not np.isnan(test_data.ts[2])
# caught this bug when writing tests
series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20))
series[::2] = 0
assert (series[::2] == 0).all()
# set item that's not contained
s = test_data.series.copy()
s["foobar"] = 1
app = Series([1], index=["foobar"], name="series")
expected = test_data.series.append(app)
assert_series_equal(s, expected)
# Test for issue #10193
key = pd.Timestamp("2012-01-01")
series = pd.Series()
series[key] = 47
expected = pd.Series(47, [key])
assert_series_equal(series, expected)
series = pd.Series([], pd.DatetimeIndex([], freq="D"))
series[key] = 47
expected = pd.Series(47, pd.DatetimeIndex([key], freq="D"))
assert_series_equal(series, expected)
def test_setitem_dtypes():
# change dtypes
# GH 4463
expected = Series([np.nan, 2, 3])
s = Series([1, 2, 3])
s.iloc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s.loc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s[0] = np.nan
assert_series_equal(s, expected)
s = Series([False])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan]))
s = Series([False, True])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan, 1.0]))
def test_set_value(test_data):
idx = test_data.ts.index[10]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
res = test_data.ts.set_value(idx, 0)
assert res is test_data.ts
assert test_data.ts[idx] == 0
# equiv
s = test_data.series.copy()
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
res = s.set_value("foobar", 0)
assert res is s
assert res.index[-1] == "foobar"
assert res["foobar"] == 0
s = test_data.series.copy()
s.loc["foobar"] = 0
assert s.index[-1] == "foobar"
assert s["foobar"] == 0
def test_setslice(test_data):
sl = test_data.ts[5:20]
assert len(sl) == len(sl.index)
assert sl.index.is_unique is True
# FutureWarning from NumPy about [slice(None, 5).
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_basic_getitem_setitem_corner(test_data):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
msg = "Can only tuple-index with a MultiIndex"
with pytest.raises(ValueError, match=msg):
test_data.ts[:, 2]
with pytest.raises(ValueError, match=msg):
test_data.ts[:, 2] = 2
# weird lists. [slice(0, 5)] will work but not two slices
result = test_data.ts[[slice(None, 5)]]
expected = test_data.ts[:5]
assert_series_equal(result, expected)
# OK
msg = r"unhashable type(: 'slice')?"
with pytest.raises(TypeError, match=msg):
test_data.ts[[5, slice(None, None)]]
with pytest.raises(TypeError, match=msg):
test_data.ts[[5, slice(None, None)]] = 2
@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"])
def test_setitem_with_tz(tz):
orig = pd.Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz))
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp("2011-01-01", tz=tz)
exp = pd.Series(
[
pd.Timestamp("2016-01-01 00:00", tz=tz),
pd.Timestamp("2011-01-01 00:00", tz=tz),
pd.Timestamp("2016-01-01 02:00", tz=tz),
]
)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series(
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
index=[1, 2],
)
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
s[[1, 2]] = vals
exp = pd.Series(
[
pd.Timestamp("2016-01-01 00:00", tz=tz),
pd.Timestamp("2011-01-01 00:00", tz=tz),
pd.Timestamp("2012-01-01 00:00", tz=tz),
]
)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_setitem_with_tz_dst():
# GH XXX
tz = "US/Eastern"
orig = pd.Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz))
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp("2011-01-01", tz=tz)
exp = pd.Series(
[
pd.Timestamp("2016-11-06 00:00-04:00", tz=tz),
pd.Timestamp("2011-01-01 00:00-05:00", tz=tz),
pd.Timestamp("2016-11-06 01:00-05:00", tz=tz),
]
)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series(
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
index=[1, 2],
)
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
s[[1, 2]] = vals
exp = pd.Series(
[
pd.Timestamp("2016-11-06 00:00", tz=tz),
pd.Timestamp("2011-01-01 00:00", tz=tz),
pd.Timestamp("2012-01-01 00:00", tz=tz),
]
)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_categorical_assigning_ops():
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
s = orig.copy()
s[:] = "a"
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[1] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[s.index > 0] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[[False, True]] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s.index = ["x", "y"]
s["y"] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"])
tm.assert_series_equal(s, exp)
# ensure that one can set something to np.nan
s = Series(Categorical([1, 2, 3]))
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
s[1] = np.nan
tm.assert_series_equal(s, exp)
def test_slice(test_data):
numSlice = test_data.series[10:20]
numSliceEnd = test_data.series[-10:]
objSlice = test_data.objSeries[10:20]
assert test_data.series.index[9] not in numSlice.index
assert test_data.objSeries.index[9] not in objSlice.index
assert len(numSlice) == len(numSlice.index)
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
assert numSlice.index[1] == test_data.series.index[11]
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
# Test return view.
sl = test_data.series[10:20]
sl[:] = 0
assert (test_data.series[10:20] == 0).all()
def test_slice_can_reorder_not_uniquely_indexed():
s = Series(1, index=["a", "a", "b", "b", "c"])
s[::-1] # it works!
def test_loc_setitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
result = test_data.series.copy()
result.loc[inds] = 5
expected = test_data.series.copy()
expected[[3, 4, 7]] = 5
assert_series_equal(result, expected)
result.iloc[5:10] = 10
expected[5:10] = 10
assert_series_equal(result, expected)
# set slice with indices
d1, d2 = test_data.series.index[[5, 15]]
result.loc[d1:d2] = 6
expected[5:16] = 6 # because it's inclusive
assert_series_equal(result, expected)
# set index value
test_data.series.loc[d1] = 4
test_data.series.loc[d2] = 6
assert test_data.series[d1] == 4
assert test_data.series[d2] == 6
def test_setitem_na():
# these induce dtype changes
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
s[::2] = np.nan
assert_series_equal(s, expected)
# gets coerced to float, right?
expected = Series([np.nan, 1, np.nan, 0])
s = Series([True, True, False, False])
s[::2] = np.nan
assert_series_equal(s, expected)
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9])
s = Series(np.arange(10))
s[:5] = np.nan
assert_series_equal(s, expected)
def test_timedelta_assignment():
# GH 8209
s = Series([])
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
result = s.fillna(timedelta(1))
expected = Series(Timedelta("1 days"), index=["A", "B"])
tm.assert_series_equal(result, expected)
s.loc["A"] = timedelta(1)
tm.assert_series_equal(s, expected)
# GH 14155
s = Series(10 * [np.timedelta64(10, "m")])
s.loc[[1, 2, 3]] = np.timedelta64(20, "m")
expected = pd.Series(10 * [np.timedelta64(10, "m")])
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m"))
tm.assert_series_equal(s, expected)
@pytest.mark.parametrize(
"nat_val,should_cast",
[
(pd.NaT, True),
(np.timedelta64("NaT", "ns"), True),
(np.datetime64("NaT", "ns"), False),
],
)
def test_td64_series_assign_nat(nat_val, should_cast):
# some nat-like values should be cast to timedelta64 when inserting
# into a timedelta64 series. Others should coerce to object
# and retain their dtypes.
base = pd.Series([0, 1, 2], dtype="m8[ns]")
expected = pd.Series([pd.NaT, 1, 2], dtype="m8[ns]")
if not should_cast:
expected = expected.astype(object)
ser = base.copy(deep=True)
ser[0] = nat_val
tm.assert_series_equal(ser, expected)
ser = base.copy(deep=True)
ser.loc[0] = nat_val
tm.assert_series_equal(ser, expected)
ser = base.copy(deep=True)
ser.iloc[0] = nat_val
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"td",
[
pd.Timedelta("9 days"),
pd.Timedelta("9 days").to_timedelta64(),
pd.Timedelta("9 days").to_pytimedelta(),
],
)
def test_append_timedelta_does_not_cast(td):
# GH#22717 inserting a Timedelta should _not_ cast to int64
expected = pd.Series(["x", td], index=[0, "td"], dtype=object)
ser = pd.Series(["x"])
ser["td"] = td
tm.assert_series_equal(ser, expected)
assert isinstance(ser["td"], pd.Timedelta)
ser = pd.Series(["x"])
ser.loc["td"] = pd.Timedelta("9 days")
tm.assert_series_equal(ser, expected)
assert isinstance(ser["td"], pd.Timedelta)
def test_underlying_data_conversion():
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
df.set_index(["a", "b", "c"], inplace=True)
s = Series([1], index=[(2, 2, 2)])
df["val"] = 0
df
df["val"].update(s)
expected = DataFrame(dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
expected.set_index(["a", "b", "c"], inplace=True)
tm.assert_frame_equal(df, expected)
# GH 3970
# these are chained assignments as well
pd.set_option("chained_assignment", None)
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
df["cc"] = 0.0
ck = [True] * len(df)
df["bb"].iloc[0] = 0.13
# TODO: unused
df_tmp = df.iloc[ck] # noqa
df["bb"].iloc[0] = 0.15
assert df["bb"].iloc[0] == 0.15
pd.set_option("chained_assignment", "raise")
# GH 3217
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
df["c"] = np.nan
df["c"].update(pd.Series(["foo"], index=[0]))
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=["foo", np.nan]))
tm.assert_frame_equal(df, expected)
def test_preserve_refs(test_data):
seq = test_data.ts[[5, 10, 15]]
seq[1] = np.NaN
assert not np.isnan(test_data.ts[10])
def test_cast_on_putmask():
# GH 2746
# need to upcast
s = Series([1, 2], index=[1, 2], dtype="int64")
s[[True, False]] = Series([0], index=[1], dtype="int64")
expected = Series([0, 2], index=[1, 2], dtype="int64")
assert_series_equal(s, expected)
def test_type_promote_putmask():
# GH8387: test that changing types does not break alignment
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
left, mask = ts.copy(), ts > 0
right = ts[mask].copy().map(str)
left[mask] = right
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
s = Series([0, 1, 2, 0])
mask = s > 0
s2 = s[mask].map(str)
s[mask] = s2
assert_series_equal(s, Series([0, "1", "2", 0]))
s = Series([0, "foo", "bar", 0])
mask = Series([False, True, True, False])
s2 = s[mask]
s[mask] = s2
assert_series_equal(s, Series([0, "foo", "bar", 0]))
def test_multilevel_preserve_name():
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
s = Series(np.random.randn(len(index)), index=index, name="sth")
result = s["foo"]
result2 = s.loc["foo"]
assert result.name == s.name
assert result2.name == s.name
def test_setitem_scalar_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
for n in range(len(series)):
msg = "assignment destination is read-only"
with pytest.raises(ValueError, match=msg):
series[n] = 1
assert array[n] == 0
def test_setitem_slice_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
msg = "assignment destination is read-only"
with pytest.raises(ValueError, match=msg):
series[1:3] = 1
assert not array.any()
"""
miscellaneous methods
"""
def test_pop():
# GH 6600
df = DataFrame({"A": 0, "B": np.arange(5, dtype="int64"), "C": 0})
k = df.iloc[4]
result = k.pop("B")
assert result == 4
expected = Series([0, 0], index=["A", "C"], name=4)
assert_series_equal(k, expected)
def test_take():
s = Series([-1, 5, 6, 2, 4])
actual = s.take([1, 3, 4])
expected = Series([5, 2, 4], index=[1, 3, 4])
tm.assert_series_equal(actual, expected)
actual = s.take([-1, 3, 4])
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)
msg = "index {} is out of bounds for size 5"
with pytest.raises(IndexError, match=msg.format(10)):
s.take([1, 10])
with pytest.raises(IndexError, match=msg.format(5)):
s.take([2, 5])
def test_take_categorical():
# https://github.com/pandas-dev/pandas/issues/20664
s = Series(pd.Categorical(["a", "b", "c"]))
result = s.take([-2, -2, 0])
expected = Series(
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
)
assert_series_equal(result, expected)
def test_head_tail(test_data):
assert_series_equal(test_data.series.head(), test_data.series[:5])
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])

View File

@@ -0,0 +1,159 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Series, Timestamp
from pandas.util.testing import assert_series_equal
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
def test_loc_uint64(val, expected):
# see gh-19399
s = Series({2 ** 63 - 1: 3, 2 ** 63: 4})
assert s.loc[val] == expected
def test_loc_getitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
assert_series_equal(test_data.series.loc[inds], test_data.series.reindex(inds))
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
# slice with indices
d1, d2 = test_data.ts.index[[5, 15]]
result = test_data.ts.loc[d1:d2]
expected = test_data.ts.truncate(d1, d2)
assert_series_equal(result, expected)
# boolean
mask = test_data.series > test_data.series.median()
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
# ask for index value
assert test_data.ts.loc[d1] == test_data.ts[d1]
assert test_data.ts.loc[d2] == test_data.ts[d2]
def test_loc_getitem_not_monotonic(test_data):
d1, d2 = test_data.ts.index[[5, 15]]
ts2 = test_data.ts[::2][[1, 2, 0]]
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2]
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2] = 0
def test_loc_getitem_setitem_integer_slice_keyerrors():
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
# this is OK
cp = s.copy()
cp.iloc[4:10] = 0
assert (cp.iloc[4:10] == 0).all()
# so is this
cp = s.copy()
cp.iloc[3:11] = 0
assert (cp.iloc[3:11] == 0).values.all()
result = s.iloc[2:6]
result2 = s.loc[3:11]
expected = s.reindex([4, 6, 8, 10])
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
# non-monotonic, raise KeyError
s2 = s.iloc[list(range(5)) + list(range(9, 4, -1))]
with pytest.raises(KeyError, match=r"^3$"):
s2.loc[3:11]
with pytest.raises(KeyError, match=r"^3$"):
s2.loc[3:11] = 0
def test_loc_getitem_iterator(test_data):
idx = iter(test_data.series.index[:10])
result = test_data.series.loc[idx]
assert_series_equal(result, test_data.series[:10])
def test_loc_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
result = test_data.series.copy()
result.loc[mask] = 0
expected = test_data.series
expected[mask] = 0
assert_series_equal(result, expected)
def test_loc_setitem_corner(test_data):
inds = list(test_data.series.index[[5, 8, 12]])
test_data.series.loc[inds] = 5
msg = r"\['foo'\] not in index"
with pytest.raises(KeyError, match=msg):
test_data.series.loc[inds + ["foo"]] = 5
def test_basic_setitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices] = 0
exp.loc[indices] = 0
assert_series_equal(cp, exp)
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices[0] : indices[2]] = 0
exp.loc[indices[0] : indices[2]] = 0
assert_series_equal(cp, exp)
# integer indexes, be careful
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
inds = [0, 4, 6]
arr_inds = np.array([0, 4, 6])
cp = s.copy()
exp = s.copy()
s[inds] = 0
s.loc[inds] = 0
assert_series_equal(cp, exp)
cp = s.copy()
exp = s.copy()
s[arr_inds] = 0
s.loc[arr_inds] = 0
assert_series_equal(cp, exp)
inds_notfound = [0, 4, 5, 6]
arr_inds_notfound = np.array([0, 4, 5, 6])
msg = r"\[5\] not contained in the index"
with pytest.raises(ValueError, match=msg):
s[inds_notfound] = 0
with pytest.raises(Exception, match=msg):
s[arr_inds_notfound] = 0
# GH12089
# with tz for values
s = Series(
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
)
s2 = s.copy()
expected = Timestamp("2011-01-03", tz="US/Eastern")
s2.loc["a"] = expected
result = s2.loc["a"]
assert result == expected
s2 = s.copy()
s2.iloc[0] = expected
result = s2.iloc[0]
assert result == expected
s2 = s.copy()
s2["a"] = expected
result = s2["a"]
assert result == expected

View File

@@ -0,0 +1,317 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, Series
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
def test_get():
# GH 6383
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
)
)
result = s.get(25, 0)
expected = 0
assert result == expected
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
),
index=pd.Float64Index(
[
25.0,
36.0,
49.0,
64.0,
81.0,
100.0,
121.0,
144.0,
169.0,
196.0,
1225.0,
1296.0,
1369.0,
1444.0,
1521.0,
1600.0,
1681.0,
1764.0,
1849.0,
1936.0,
],
dtype="object",
),
)
result = s.get(25, 0)
expected = 43
assert result == expected
# GH 7407
# with a boolean accessor
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
vc = df.i.value_counts()
result = vc.get(99, default="Missing")
assert result == "Missing"
vc = df.b.value_counts()
result = vc.get(False, default="Missing")
assert result == 3
result = vc.get(True, default="Missing")
assert result == "Missing"
def test_get_nan():
# GH 8569
s = pd.Float64Index(range(10)).to_series()
assert s.get(np.nan) is None
assert s.get(np.nan, default="Missing") == "Missing"
def test_get_nan_multiple():
# GH 8569
# ensure that fixing "test_get_nan" above hasn't broken get
# with multiple elements
s = pd.Float64Index(range(10)).to_series()
idx = [2, 30]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
idx = [2, np.nan]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
# GH 17295 - all missing keys
idx = [20, 30]
assert s.get(idx) is None
idx = [np.nan, np.nan]
assert s.get(idx) is None
def test_delitem():
# GH 5542
# should delete the item inplace
s = Series(range(5))
del s[0]
expected = Series(range(1, 5), index=range(1, 5))
assert_series_equal(s, expected)
del s[1]
expected = Series(range(2, 5), index=range(2, 5))
assert_series_equal(s, expected)
# empty
s = Series()
with pytest.raises(KeyError, match=r"^0$"):
del s[0]
# only 1 left, del, add, del
s = Series(1)
del s[0]
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
s[0] = 1
assert_series_equal(s, Series(1))
del s[0]
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
# Index(dtype=object)
s = Series(1, index=["a"])
del s["a"]
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
s["a"] = 1
assert_series_equal(s, Series(1, index=["a"]))
del s["a"]
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
def test_slice_float64():
values = np.arange(10.0, 50.0, 2)
index = Index(values)
start, end = values[[5, 15]]
s = Series(np.random.randn(20), index=index)
result = s[start:end]
expected = s.iloc[5:16]
assert_series_equal(result, expected)
result = s.loc[start:end]
assert_series_equal(result, expected)
df = DataFrame(np.random.randn(20, 3), index=index)
result = df[start:end]
expected = df.iloc[5:16]
tm.assert_frame_equal(result, expected)
result = df.loc[start:end]
tm.assert_frame_equal(result, expected)
def test_getitem_negative_out_of_bounds():
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
s[-11]
msg = "index -11 is out of bounds for axis 0 with size 10"
with pytest.raises(IndexError, match=msg):
s[-11] = "foo"
def test_getitem_regression():
s = Series(range(5), index=list(range(5)))
result = s[list(range(5))]
assert_series_equal(result, s)
def test_getitem_setitem_slice_bug():
s = Series(range(10), index=list(range(10)))
result = s[-12:]
assert_series_equal(result, s)
result = s[-7:]
assert_series_equal(result, s[3:])
result = s[:-12]
assert_series_equal(result, s[:0])
s = Series(range(10), index=list(range(10)))
s[-12:] = 0
assert (s == 0).all()
s[:-12] = 5
assert (s == 0).all()
def test_getitem_setitem_slice_integers():
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
result = s[:4]
expected = s.reindex([2, 4, 6, 8])
assert_series_equal(result, expected)
s[:4] = 0
assert (s[:4] == 0).all()
assert not (s[4:] == 0).any()
def test_setitem_float_labels():
# note labels are floats
s = Series(["a", "b", "c"], index=[0, 0.5, 1])
tmp = s.copy()
s.loc[1] = "zoo"
tmp.iloc[2] = "zoo"
assert_series_equal(s, tmp)
def test_slice_float_get_set(test_data):
msg = (
r"cannot do slice indexing on <class 'pandas\.core\.indexes"
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
r" of <class 'float'>"
)
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
test_data.ts[4.0:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
test_data.ts[4.0:10.0] = 0
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
test_data.ts[4.5:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
test_data.ts[4.5:10.0] = 0
def test_slice_floats2():
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
i = np.arange(10, 20, dtype=float)
i[2] = 12.2
s.index = i
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
def test_int_indexing():
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
with pytest.raises(KeyError, match=r"^5$"):
s[5]
with pytest.raises(KeyError, match=r"^'c'$"):
s["c"]
# not monotonic
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
with pytest.raises(KeyError, match=r"^5$"):
s[5]
with pytest.raises(KeyError, match=r"^'c'$"):
s["c"]
def test_getitem_int64(test_data):
idx = np.int64(5)
assert test_data.ts[idx] == test_data.ts[5]

View File

@@ -0,0 +1,346 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series
import pandas.util.testing as tm
class TestSeriesAlterAxes:
def test_setindex(self, string_series):
# wrong type
msg = (
r"Index\(\.\.\.\) must be called with a collection of some"
r" kind, None was passed"
)
with pytest.raises(TypeError, match=msg):
string_series.index = None
# wrong length
msg = (
"Length mismatch: Expected axis has 30 elements, new"
" values have 29 elements"
)
with pytest.raises(ValueError, match=msg):
string_series.index = np.arange(len(string_series) - 1)
# works
string_series.index = np.arange(len(string_series))
assert isinstance(string_series.index, Index)
# Renaming
def test_rename(self, datetime_series):
ts = datetime_series
renamer = lambda x: x.strftime("%Y%m%d")
renamed = ts.rename(renamer)
assert renamed.index[0] == renamer(ts.index[0])
# dict
rename_dict = dict(zip(ts.index, renamed.index))
renamed2 = ts.rename(rename_dict)
tm.assert_series_equal(renamed, renamed2)
# partial dict
s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
renamed = s.rename({"b": "foo", "d": "bar"})
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
# index with name
renamer = Series(
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
)
renamed = renamer.rename({})
assert renamed.index.name == renamer.index.name
def test_rename_by_series(self):
s = Series(range(5), name="foo")
renamer = Series({1: 10, 2: 20})
result = s.rename(renamer)
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
tm.assert_series_equal(result, expected)
def test_rename_set_name(self):
s = Series(range(4), index=list("abcd"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
result = s.rename(name)
assert result.name == name
tm.assert_numpy_array_equal(result.index.values, s.index.values)
assert s.name is None
def test_rename_set_name_inplace(self):
s = Series(range(3), index=list("abc"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
s.rename(name, inplace=True)
assert s.name == name
exp = np.array(["a", "b", "c"], dtype=np.object_)
tm.assert_numpy_array_equal(s.index.values, exp)
def test_rename_axis_supported(self):
# Supporting axis for compatibility, detailed in GH-18589
s = Series(range(5))
s.rename({}, axis=0)
s.rename({}, axis="index")
with pytest.raises(ValueError, match="No axis named 5"):
s.rename({}, axis=5)
def test_set_name_attribute(self):
s = Series([1, 2, 3])
s2 = Series([1, 2, 3], name="bar")
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
s.name = name
assert s.name == name
s2.name = name
assert s2.name == name
def test_set_name(self):
s = Series([1, 2, 3])
s2 = s._set_name("foo")
assert s2.name == "foo"
assert s.name is None
assert s is not s2
def test_rename_inplace(self, datetime_series):
renamer = lambda x: x.strftime("%Y%m%d")
expected = renamer(datetime_series.index[0])
datetime_series.rename(renamer, inplace=True)
assert datetime_series.index[0] == expected
def test_set_index_makes_timeseries(self):
idx = tm.makeDateIndex(10)
s = Series(range(10))
s.index = idx
assert s.index.is_all_dates
def test_reset_index(self):
df = tm.makeDataFrame()[:5]
ser = df.stack()
ser.index.names = ["hash", "category"]
ser.name = "value"
df = ser.reset_index()
assert "value" in df
df = ser.reset_index(name="value2")
assert "value2" in df
# check inplace
s = ser.reset_index(drop=True)
s2 = ser
s2.reset_index(drop=True, inplace=True)
tm.assert_series_equal(s, s2)
# level
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.randn(6), index=index)
rs = s.reset_index(level=1)
assert len(rs.columns) == 2
rs = s.reset_index(level=[0, 2], drop=True)
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
assert isinstance(rs, Series)
def test_reset_index_name(self):
s = Series([1, 2, 3], index=Index(range(3), name="x"))
assert s.reset_index().index.name is None
assert s.reset_index(drop=True).index.name is None
def test_reset_index_level(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
for levels in ["A", "B"], [0, 1]:
# With MultiIndex
s = df.set_index(["A", "B"])["C"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels)
tm.assert_frame_equal(result, df)
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
tm.assert_frame_equal(result, df[["C"]])
with pytest.raises(KeyError, match="Level E "):
s.reset_index(level=["A", "E"])
# With single-level Index
s = df.set_index("A")["B"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[0], drop=True)
tm.assert_series_equal(result, df["B"])
with pytest.raises(IndexError, match="Too many levels"):
s.reset_index(level=[0, 1, 2])
# Check that .reset_index([],drop=True) doesn't fail
result = Series(range(4)).reset_index([], drop=True)
expected = Series(range(4))
tm.assert_series_equal(result, expected)
def test_reset_index_range(self):
# GH 12071
s = Series(range(2), name="A", dtype="int64")
series_result = s.reset_index()
assert isinstance(series_result.index, RangeIndex)
series_expected = DataFrame(
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
)
tm.assert_frame_equal(series_result, series_expected)
def test_reorder_levels(self):
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
names=["L0", "L1", "L2"],
)
s = Series(np.arange(6), index=index)
# no change, position
result = s.reorder_levels([0, 1, 2])
tm.assert_series_equal(s, result)
# no change, labels
result = s.reorder_levels(["L0", "L1", "L2"])
tm.assert_series_equal(s, result)
# rotate, position
result = s.reorder_levels([1, 2, 0])
e_idx = MultiIndex(
levels=[["one", "two", "three"], [0, 1], ["bar"]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]],
names=["L1", "L2", "L0"],
)
expected = Series(np.arange(6), index=e_idx)
tm.assert_series_equal(result, expected)
def test_rename_axis_mapper(self):
# GH 19978
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
s = Series([i for i in range(len(mi))], index=mi)
result = s.rename_axis(index={"ll": "foo"})
assert result.index.names == ["foo", "nn"]
result = s.rename_axis(index=str.upper, axis=0)
assert result.index.names == ["LL", "NN"]
result = s.rename_axis(index=["foo", "goo"])
assert result.index.names == ["foo", "goo"]
with pytest.raises(TypeError, match="unexpected"):
s.rename_axis(columns="wrong")
def test_rename_axis_inplace(self, datetime_series):
# GH 15704
expected = datetime_series.rename_axis("foo")
result = datetime_series
no_return = result.rename_axis("foo", inplace=True)
assert no_return is None
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
def test_rename_axis_none(self, kwargs):
# GH 25034
index = Index(list("abc"), name="foo")
df = Series([1, 2, 3], index=index)
result = df.rename_axis(**kwargs)
expected_index = index.rename(None) if kwargs else index
expected = Series([1, 2, 3], index=expected_index)
tm.assert_series_equal(result, expected)
def test_set_axis_inplace_axes(self, axis_series):
# GH14636
ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
expected = ser.copy()
expected.index = list("abcd")
# inplace=True
# The FutureWarning comes from the fact that we would like to have
# inplace default to False some day
for inplace, warn in [(None, FutureWarning), (True, None)]:
result = ser.copy()
kwargs = {"inplace": inplace}
with tm.assert_produces_warning(warn):
result.set_axis(list("abcd"), axis=axis_series, **kwargs)
tm.assert_series_equal(result, expected)
def test_set_axis_inplace(self):
# GH14636
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
expected = s.copy()
expected.index = list("abcd")
# inplace=False
result = s.set_axis(list("abcd"), axis=0, inplace=False)
tm.assert_series_equal(expected, result)
# omitting the "axis" parameter
with tm.assert_produces_warning(None):
result = s.set_axis(list("abcd"), inplace=False)
tm.assert_series_equal(result, expected)
# wrong values for the "axis" parameter
for axis in [2, "foo"]:
with pytest.raises(ValueError, match="No axis named"):
s.set_axis(list("abcd"), axis=axis, inplace=False)
def test_set_axis_prior_to_deprecation_signature(self):
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64")
expected = s.copy()
expected.index = list("abcd")
for axis in [0, "index"]:
with tm.assert_produces_warning(FutureWarning):
result = s.set_axis(0, list("abcd"), inplace=False)
tm.assert_series_equal(result, expected)
def test_reset_index_drop_errors(self):
# GH 20925
# KeyError raised for series index when passed level name is missing
s = Series(range(4))
with pytest.raises(KeyError, match="must be same as name"):
s.reset_index("wrong", drop=True)
with pytest.raises(KeyError, match="must be same as name"):
s.reset_index("wrong")
# KeyError raised for series when level to be dropped is missing
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
with pytest.raises(KeyError, match="not found"):
s.reset_index("wrong", drop=True)
def test_droplevel(self):
# GH20342
ser = Series([1, 2, 3, 4])
ser.index = MultiIndex.from_arrays(
[(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"]
)
expected = ser.reset_index("b", drop=True)
result = ser.droplevel("b", axis="index")
tm.assert_series_equal(result, expected)
# test that droplevel raises ValueError on axis != 0
with pytest.raises(ValueError):
ser.droplevel(1, axis="columns")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,759 @@
from collections import OrderedDict
import pydoc
import warnings
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
from pandas.core.arrays import PeriodArray
from pandas.core.indexes.datetimes import Timestamp
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal, ensure_clean
import pandas.io.formats.printing as printing
from .common import TestData
class SharedWithSparse:
"""
A collection of tests Series and SparseSeries can share.
In generic tests on this class, use ``self._assert_series_equal()``
which is implemented in sub-classes.
"""
def _assert_series_equal(self, left, right):
"""Dispatch to series class dependent assertion"""
raise NotImplementedError
def test_scalarop_preserve_name(self):
result = self.ts * 2
assert result.name == self.ts.name
def test_copy_name(self):
result = self.ts.copy()
assert result.name == self.ts.name
def test_copy_index_name_checking(self):
# don't want to be able to modify the index stored elsewhere after
# making a copy
self.ts.index.name = None
assert self.ts.index.name is None
assert self.ts is self.ts
cp = self.ts.copy()
cp.index.name = "foo"
printing.pprint_thing(self.ts.index.name)
assert self.ts.index.name is None
def test_append_preserve_name(self):
result = self.ts[:5].append(self.ts[5:])
assert result.name == self.ts.name
def test_binop_maybe_preserve_name(self):
# names match, preserve
result = self.ts * self.ts
assert result.name == self.ts.name
result = self.ts.mul(self.ts)
assert result.name == self.ts.name
result = self.ts * self.ts[:-2]
assert result.name == self.ts.name
# names don't match, don't preserve
cp = self.ts.copy()
cp.name = "something else"
result = self.ts + cp
assert result.name is None
result = self.ts.add(cp)
assert result.name is None
ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
ops = ops + ["r" + op for op in ops]
for op in ops:
# names match, preserve
s = self.ts.copy()
result = getattr(s, op)(s)
assert result.name == self.ts.name
# names don't match, don't preserve
cp = self.ts.copy()
cp.name = "changed"
result = getattr(s, op)(cp)
assert result.name is None
def test_combine_first_name(self):
result = self.ts.combine_first(self.ts[:5])
assert result.name == self.ts.name
def test_getitem_preserve_name(self):
result = self.ts[self.ts > 0]
assert result.name == self.ts.name
result = self.ts[[0, 2, 4]]
assert result.name == self.ts.name
result = self.ts[5:10]
assert result.name == self.ts.name
def test_pickle(self):
unp_series = self._pickle_roundtrip(self.series)
unp_ts = self._pickle_roundtrip(self.ts)
assert_series_equal(unp_series, self.series)
assert_series_equal(unp_ts, self.ts)
def _pickle_roundtrip(self, obj):
with ensure_clean() as path:
obj.to_pickle(path)
unpickled = pd.read_pickle(path)
return unpickled
def test_argsort_preserve_name(self):
result = self.ts.argsort()
assert result.name == self.ts.name
def test_sort_index_name(self):
result = self.ts.sort_index(ascending=False)
assert result.name == self.ts.name
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
def test_to_sparse_pass_name(self):
result = self.ts.to_sparse()
assert result.name == self.ts.name
def test_constructor_dict(self):
d = {"a": 0.0, "b": 1.0, "c": 2.0}
result = self.series_klass(d)
expected = self.series_klass(d, index=sorted(d.keys()))
self._assert_series_equal(result, expected)
result = self.series_klass(d, index=["b", "c", "d", "a"])
expected = self.series_klass([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
self._assert_series_equal(result, expected)
def test_constructor_subclass_dict(self):
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
series = self.series_klass(data)
expected = self.series_klass(dict(data.items()))
self._assert_series_equal(series, expected)
def test_constructor_ordereddict(self):
# GH3283
data = OrderedDict(
("col{i}".format(i=i), np.random.random()) for i in range(12)
)
series = self.series_klass(data)
expected = self.series_klass(list(data.values()), list(data.keys()))
self._assert_series_equal(series, expected)
# Test with subclass
class A(OrderedDict):
pass
series = self.series_klass(A(data))
self._assert_series_equal(series, expected)
def test_constructor_dict_multiindex(self):
d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
_d = sorted(d.items())
result = self.series_klass(d)
expected = self.series_klass(
[x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d])
)
self._assert_series_equal(result, expected)
d["z"] = 111.0
_d.insert(0, ("z", d["z"]))
result = self.series_klass(d)
expected = self.series_klass(
[x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False)
)
result = result.reindex(index=expected.index)
self._assert_series_equal(result, expected)
def test_constructor_dict_timedelta_index(self):
# GH #12169 : Resample category data with timedelta index
# construct Series from dict as data and TimedeltaIndex as index
# will result NaN in result Series data
expected = self.series_klass(
data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s")
)
result = self.series_klass(
data={
pd.to_timedelta(0, unit="s"): "A",
pd.to_timedelta(10, unit="s"): "B",
pd.to_timedelta(20, unit="s"): "C",
},
index=pd.to_timedelta([0, 10, 20], unit="s"),
)
self._assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
def test_from_array_deprecated(self):
# multiple FutureWarnings, so can't assert stacklevel
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.series_klass.from_array([1, 2, 3])
def test_sparse_accessor_updates_on_inplace(self):
s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
s.drop([0, 1], inplace=True)
assert s.sparse.density == 1.0
class TestSeriesMisc(TestData, SharedWithSparse):
series_klass = Series
# SharedWithSparse tests use generic, series_klass-agnostic assertion
_assert_series_equal = staticmethod(tm.assert_series_equal)
def test_tab_completion(self):
# GH 9910
s = Series(list("abcd"))
# Series of str values should have .str but not .dt/.cat in __dir__
assert "str" in dir(s)
assert "dt" not in dir(s)
assert "cat" not in dir(s)
# similarly for .dt
s = Series(date_range("1/1/2015", periods=5))
assert "dt" in dir(s)
assert "str" not in dir(s)
assert "cat" not in dir(s)
# Similarly for .cat, but with the twist that str and dt should be
# there if the categories are of that type first cat and str.
s = Series(list("abbcd"), dtype="category")
assert "cat" in dir(s)
assert "str" in dir(s) # as it is a string categorical
assert "dt" not in dir(s)
# similar to cat and str
s = Series(date_range("1/1/2015", periods=5)).astype("category")
assert "cat" in dir(s)
assert "str" not in dir(s)
assert "dt" in dir(s) # as it is a datetime categorical
def test_tab_completion_with_categorical(self):
# test the tab completion display
ok_for_cat = [
"name",
"index",
"categorical",
"categories",
"codes",
"ordered",
"set_categories",
"add_categories",
"remove_categories",
"rename_categories",
"reorder_categories",
"remove_unused_categories",
"as_ordered",
"as_unordered",
]
def get_dir(s):
results = [r for r in s.cat.__dir__() if not r.startswith("_")]
return list(sorted(set(results)))
s = Series(list("aabbcde")).astype("category")
results = get_dir(s)
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
@pytest.mark.parametrize(
"index",
[
tm.makeUnicodeIndex(10),
tm.makeStringIndex(10),
tm.makeCategoricalIndex(10),
Index(["foo", "bar", "baz"] * 2),
tm.makeDateIndex(10),
tm.makePeriodIndex(10),
tm.makeTimedeltaIndex(10),
tm.makeIntIndex(10),
tm.makeUIntIndex(10),
tm.makeIntIndex(10),
tm.makeFloatIndex(10),
Index([True, False]),
Index(["a{}".format(i) for i in range(101)]),
pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
],
)
def test_index_tab_completion(self, index):
# dir contains string-like values of the Index.
s = pd.Series(index=index)
dir_s = dir(s)
for i, x in enumerate(s.index.unique(level=0)):
if i < 100:
assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
else:
assert x not in dir_s
def test_not_hashable(self):
s_empty = Series()
s = Series([1])
msg = "'Series' objects are mutable, thus they cannot be hashed"
with pytest.raises(TypeError, match=msg):
hash(s_empty)
with pytest.raises(TypeError, match=msg):
hash(s)
def test_contains(self):
tm.assert_contains_all(self.ts.index, self.ts)
def test_iter(self):
for i, val in enumerate(self.series):
assert val == self.series[i]
for i, val in enumerate(self.ts):
assert val == self.ts[i]
def test_keys(self):
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
# to .keys() in a list()
getkeys = self.ts.keys
assert getkeys() is self.ts.index
def test_values(self):
tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)
def test_iteritems(self):
for idx, val in self.series.iteritems():
assert val == self.series[idx]
for idx, val in self.ts.iteritems():
assert val == self.ts[idx]
# assert is lazy (genrators don't define reverse, lists do)
assert not hasattr(self.series.iteritems(), "reverse")
def test_items(self):
for idx, val in self.series.items():
assert val == self.series[idx]
for idx, val in self.ts.items():
assert val == self.ts[idx]
# assert is lazy (genrators don't define reverse, lists do)
assert not hasattr(self.series.items(), "reverse")
def test_raise_on_info(self):
s = Series(np.random.randn(10))
msg = "'Series' object has no attribute 'info'"
with pytest.raises(AttributeError, match=msg):
s.info()
def test_copy(self):
for deep in [None, False, True]:
s = Series(np.arange(10), dtype="float64")
# default deep is True
if deep is None:
s2 = s.copy()
else:
s2 = s.copy(deep=deep)
s2[::2] = np.NaN
if deep is None or deep is True:
# Did not modify original Series
assert np.isnan(s2[0])
assert not np.isnan(s[0])
else:
# we DID modify the original Series
assert np.isnan(s2[0])
assert np.isnan(s[0])
def test_copy_tzaware(self):
# GH#11794
# copy of tz-aware
expected = Series([Timestamp("2012/01/01", tz="UTC")])
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
for deep in [None, False, True]:
s = Series([Timestamp("2012/01/01", tz="UTC")])
if deep is None:
s2 = s.copy()
else:
s2 = s.copy(deep=deep)
s2[0] = pd.Timestamp("1999/01/01", tz="UTC")
# default deep is True
if deep is None or deep is True:
# Did not modify original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected)
else:
# we DID modify the original Series
assert_series_equal(s2, expected2)
assert_series_equal(s, expected2)
def test_axis_alias(self):
s = Series([1, 2, np.nan])
assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
assert s.dropna().sum("rows") == 3
assert s._get_axis_number("rows") == 0
assert s._get_axis_name("rows") == "index"
def test_class_axis(self):
# https://github.com/pandas-dev/pandas/issues/18147
# no exception and no empty docstring
assert pydoc.getdoc(Series.index)
def test_numpy_unique(self):
# it works!
np.unique(self.ts)
def test_ndarray_compat(self):
# test numpy compat with Series as sub-class of NDFrame
tsdf = DataFrame(
np.random.randn(1000, 3),
columns=["A", "B", "C"],
index=date_range("1/1/2000", periods=1000),
)
def f(x):
return x[x.idxmax()]
result = tsdf.apply(f)
expected = tsdf.max()
tm.assert_series_equal(result, expected)
# .item()
with tm.assert_produces_warning(FutureWarning):
s = Series([1])
result = s.item()
assert result == 1
assert s.item() == s.iloc[0]
# using an ndarray like function
s = Series(np.random.randn(10))
result = Series(np.ones_like(s))
expected = Series(1, index=range(10), dtype="float64")
tm.assert_series_equal(result, expected)
# ravel
s = Series(np.random.randn(10))
tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))
# compress
# GH 6658
s = Series([0, 1.0, -1], index=list("abc"))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.compress(s > 0, s)
tm.assert_series_equal(result, Series([1.0], index=["b"]))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.compress(s < -1, s)
# result empty Index(dtype=object) as the same as original
exp = Series([], dtype="float64", index=Index([], dtype="object"))
tm.assert_series_equal(result, exp)
s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.compress(s > 0, s)
tm.assert_series_equal(result, Series([1.0], index=[0.2]))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = np.compress(s < -1, s)
# result empty Float64Index as the same as original
exp = Series([], dtype="float64", index=Index([], dtype="float64"))
tm.assert_series_equal(result, exp)
def test_str_accessor_updates_on_inplace(self):
s = pd.Series(list("abc"))
s.drop([0], inplace=True)
assert len(s.str.lower()) == 2
def test_str_attribute(self):
# GH9068
methods = ["strip", "rstrip", "lstrip"]
s = Series([" jack", "jill ", " jesse ", "frank"])
for method in methods:
expected = Series([getattr(str, method)(x) for x in s.values])
assert_series_equal(getattr(Series.str, method)(s.str), expected)
# str accessor only valid with string values
s = Series(range(5))
with pytest.raises(AttributeError, match="only use .str accessor"):
s.str.repeat(2)
def test_empty_method(self):
s_empty = pd.Series()
assert s_empty.empty
for full_series in [pd.Series([1]), pd.Series(index=[1])]:
assert not full_series.empty
def test_tab_complete_warning(self, ip):
# https://github.com/pandas-dev/pandas/issues/16409
pytest.importorskip("IPython", minversion="6.0.0")
from IPython.core.completer import provisionalcompleter
code = "import pandas as pd; s = pd.Series()"
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("s.", 1))
def test_integer_series_size(self):
# GH 25580
s = Series(range(9))
assert s.size == 9
s = Series(range(9), dtype="Int64")
assert s.size == 9
def test_get_values_deprecation(self):
s = Series(range(9))
with tm.assert_produces_warning(FutureWarning):
res = s.get_values()
tm.assert_numpy_array_equal(res, s.values)
class TestCategoricalSeries:
@pytest.mark.parametrize(
"method",
[
lambda x: x.cat.set_categories([1, 2, 3]),
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
lambda x: x.cat.rename_categories([1, 2, 3]),
lambda x: x.cat.remove_unused_categories(),
lambda x: x.cat.remove_categories([2]),
lambda x: x.cat.add_categories([4]),
lambda x: x.cat.as_ordered(),
lambda x: x.cat.as_unordered(),
],
)
def test_getname_categorical_accessor(self, method):
# GH 17509
s = Series([1, 2, 3], name="A").astype("category")
expected = "A"
result = method(s).name
assert result == expected
def test_cat_accessor(self):
s = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
assert not s.cat.ordered, False
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
s.cat.set_categories(["b", "a"], inplace=True)
tm.assert_categorical_equal(s.values, exp)
res = s.cat.set_categories(["b", "a"])
tm.assert_categorical_equal(res.values, exp)
s[:] = "a"
s = s.cat.remove_unused_categories()
tm.assert_index_equal(s.cat.categories, Index(["a"]))
def test_cat_accessor_api(self):
# GH 9322
from pandas.core.arrays.categorical import CategoricalAccessor
assert Series.cat is CategoricalAccessor
s = Series(list("aabbcde")).astype("category")
assert isinstance(s.cat, CategoricalAccessor)
invalid = Series([1])
with pytest.raises(AttributeError, match="only use .cat accessor"):
invalid.cat
assert not hasattr(invalid, "cat")
def test_cat_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
c = Series(list("aabbcde")).astype("category")
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
c.cat.xlabel = "a"
def test_cat_accessor_updates_on_inplace(self):
s = Series(list("abc")).astype("category")
s.drop(0, inplace=True)
s.cat.remove_unused_categories(inplace=True)
assert len(s.cat.categories) == 2
def test_categorical_delegations(self):
# invalid accessor
msg = r"Can only use \.cat accessor with a 'category' dtype"
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat()
with pytest.raises(AttributeError, match=msg):
Series(["a", "b", "c"]).cat
with pytest.raises(AttributeError, match=msg):
Series(np.arange(5.0)).cat
with pytest.raises(AttributeError, match=msg):
Series([Timestamp("20130101")]).cat
# Series should delegate calls to '.categories', '.codes', '.ordered'
# and the methods '.set_categories()' 'drop_unused_categories()' to the
# categorical
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["a", "b", "c"])
tm.assert_index_equal(s.cat.categories, exp_categories)
s.cat.categories = [1, 2, 3]
exp_categories = Index([1, 2, 3])
tm.assert_index_equal(s.cat.categories, exp_categories)
exp_codes = Series([0, 1, 2, 0], dtype="int8")
tm.assert_series_equal(s.cat.codes, exp_codes)
assert s.cat.ordered
s = s.cat.as_unordered()
assert not s.cat.ordered
s.cat.as_ordered(inplace=True)
assert s.cat.ordered
# reorder
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["c", "b", "a"])
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
s = s.cat.set_categories(["c", "b", "a"])
tm.assert_index_equal(s.cat.categories, exp_categories)
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
tm.assert_numpy_array_equal(s.__array__(), exp_values)
# remove unused categories
s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
exp_categories = Index(["a", "b"])
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
s = s.cat.remove_unused_categories()
tm.assert_index_equal(s.cat.categories, exp_categories)
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
tm.assert_numpy_array_equal(s.__array__(), exp_values)
# This method is likely to be confused, so test that it raises an error
# on wrong inputs:
msg = "'Series' object has no attribute 'set_categories'"
with pytest.raises(AttributeError, match=msg):
s.set_categories([4, 3, 2, 1])
# right: s.cat.set_categories([4,3,2,1])
# GH18862 (let Series.cat.rename_categories take callables)
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
result = s.cat.rename_categories(lambda x: x.upper())
expected = Series(
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
)
tm.assert_series_equal(result, expected)
def test_dt_accessor_api_for_categorical(self):
# https://github.com/pandas-dev/pandas/issues/10661
from pandas.core.indexes.accessors import Properties
s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
c_dr = s_dr.astype("category")
s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
c_pr = s_pr.astype("category")
s_tdr = Series(timedelta_range("1 days", "10 days"))
c_tdr = s_tdr.astype("category")
# only testing field (like .day)
# and bool (is_month_start)
get_ops = lambda x: x._datetimelike_ops
test_data = [
("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
("Period", get_ops(PeriodArray), s_pr, c_pr),
("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr),
]
assert isinstance(c_dr.dt, Properties)
special_func_defs = [
("strftime", ("%Y-%m-%d",), {}),
("tz_convert", ("EST",), {}),
("round", ("D",), {}),
("floor", ("D",), {}),
("ceil", ("D",), {}),
("asfreq", ("D",), {}),
# ('tz_localize', ("UTC",), {}),
]
_special_func_names = [f[0] for f in special_func_defs]
# the series is already localized
_ignore_names = ["tz_localize", "components"]
for name, attr_names, s, c in test_data:
func_names = [
f
for f in dir(s.dt)
if not (
f.startswith("_")
or f in attr_names
or f in _special_func_names
or f in _ignore_names
)
]
func_defs = [(f, (), {}) for f in func_names]
for f_def in special_func_defs:
if f_def[0] in dir(s.dt):
func_defs.append(f_def)
for func, args, kwargs in func_defs:
with warnings.catch_warnings():
if func == "to_period":
# dropping TZ
warnings.simplefilter("ignore", UserWarning)
res = getattr(c.dt, func)(*args, **kwargs)
exp = getattr(s.dt, func)(*args, **kwargs)
if isinstance(res, DataFrame):
tm.assert_frame_equal(res, exp)
elif isinstance(res, Series):
tm.assert_series_equal(res, exp)
else:
tm.assert_almost_equal(res, exp)
for attr in attr_names:
try:
res = getattr(c.dt, attr)
exp = getattr(s.dt, attr)
except Exception as e:
print(name, attr)
raise e
if isinstance(res, DataFrame):
tm.assert_frame_equal(res, exp)
elif isinstance(res, Series):
tm.assert_series_equal(res, exp)
else:
tm.assert_almost_equal(res, exp)
invalid = Series([1, 2, 3]).astype("category")
msg = "Can only use .dt accessor with datetimelike"
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")

View File

@@ -0,0 +1,739 @@
from collections import Counter, OrderedDict, defaultdict
from itertools import chain
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, Series, isna
from pandas.conftest import _get_cython_table_params
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
class TestSeriesApply:
def test_apply(self, datetime_series):
with np.errstate(all="ignore"):
tm.assert_series_equal(
datetime_series.apply(np.sqrt), np.sqrt(datetime_series)
)
# element-wise apply
import math
tm.assert_series_equal(
datetime_series.apply(math.exp), np.exp(datetime_series)
)
# empty series
s = Series(dtype=object, name="foo", index=pd.Index([], name="bar"))
rs = s.apply(lambda x: x)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
assert s is not rs
assert s.index is rs.index
assert s.dtype == rs.dtype
assert s.name == rs.name
# index but no data
s = Series(index=[1, 2, 3])
rs = s.apply(lambda x: x)
tm.assert_series_equal(s, rs)
def test_apply_same_length_inference_bug(self):
s = Series([1, 2])
f = lambda x: (x, x + 1)
result = s.apply(f)
expected = s.map(f)
assert_series_equal(result, expected)
s = Series([1, 2, 3])
result = s.apply(f)
expected = s.map(f)
assert_series_equal(result, expected)
def test_apply_dont_convert_dtype(self):
s = Series(np.random.randn(10))
f = lambda x: x if x > 0 else np.nan
result = s.apply(f, convert_dtype=False)
assert result.dtype == object
def test_with_string_args(self, datetime_series):
for arg in ["sum", "mean", "min", "max", "std"]:
result = datetime_series.apply(arg)
expected = getattr(datetime_series, arg)()
assert result == expected
def test_apply_args(self):
s = Series(["foo,bar"])
result = s.apply(str.split, args=(",",))
assert result[0] == ["foo", "bar"]
assert isinstance(result[0], list)
def test_series_map_box_timestamps(self):
# GH#2689, GH#2627
ser = Series(pd.date_range("1/1/2000", periods=10))
def func(x):
return (x.hour, x.day, x.month)
# it works!
ser.map(func)
ser.apply(func)
def test_apply_box(self):
# ufunc will not be boxed. Same test cases as the test_map_box
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
s = pd.Series(vals)
assert s.dtype == "datetime64[ns]"
# boxed value must be Timestamp instance
res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
s = pd.Series(vals)
assert s.dtype == "datetime64[ns, US/Eastern]"
res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
s = pd.Series(vals)
assert s.dtype == "timedelta64[ns]"
res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days))
exp = pd.Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
s = pd.Series(vals)
assert s.dtype == "Period[M]"
res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr))
exp = pd.Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
def test_apply_datetimetz(self):
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
"Asia/Tokyo"
)
s = pd.Series(values, name="XX")
result = s.apply(lambda x: x + pd.offsets.Day())
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
"Asia/Tokyo"
)
exp = pd.Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.apply(lambda x: x.hour)
exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
def test_apply_dict_depr(self):
tsdf = pd.DataFrame(
np.random.randn(10, 3),
columns=["A", "B", "C"],
index=pd.date_range("1/1/2000", periods=10),
)
with tm.assert_produces_warning(FutureWarning):
tsdf.A.agg({"foo": ["sum", "mean"]})
@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
def test_apply_categorical_with_nan_values(self, series):
# GH 20714 bug fixed in: GH 24275
s = pd.Series(series, dtype="category")
result = s.apply(lambda x: x.split("-")[0])
result = result.astype(object)
expected = pd.Series(["1", "1", np.NaN], dtype="category")
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
class TestSeriesAggregate:
def test_transform(self, string_series):
# transforming functions
with np.errstate(all="ignore"):
f_sqrt = np.sqrt(string_series)
f_abs = np.abs(string_series)
# ufunc
result = string_series.transform(np.sqrt)
expected = f_sqrt.copy()
assert_series_equal(result, expected)
result = string_series.apply(np.sqrt)
assert_series_equal(result, expected)
# list-like
result = string_series.transform([np.sqrt])
expected = f_sqrt.to_frame().copy()
expected.columns = ["sqrt"]
assert_frame_equal(result, expected)
result = string_series.transform([np.sqrt])
assert_frame_equal(result, expected)
result = string_series.transform(["sqrt"])
assert_frame_equal(result, expected)
# multiple items in list
# these are in the order as if we are applying both functions per
# series and then concatting
expected = pd.concat([f_sqrt, f_abs], axis=1)
expected.columns = ["sqrt", "absolute"]
result = string_series.apply([np.sqrt, np.abs])
assert_frame_equal(result, expected)
result = string_series.transform(["sqrt", "abs"])
expected.columns = ["sqrt", "abs"]
assert_frame_equal(result, expected)
# dict, provide renaming
expected = pd.concat([f_sqrt, f_abs], axis=1)
expected.columns = ["foo", "bar"]
expected = expected.unstack().rename("series")
result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
assert_series_equal(result.reindex_like(expected), expected)
def test_transform_and_agg_error(self, string_series):
# we are trying to transform with an aggregator
with pytest.raises(ValueError):
string_series.transform(["min", "max"])
with pytest.raises(ValueError):
with np.errstate(all="ignore"):
string_series.agg(["sqrt", "max"])
with pytest.raises(ValueError):
with np.errstate(all="ignore"):
string_series.transform(["sqrt", "max"])
with pytest.raises(ValueError):
with np.errstate(all="ignore"):
string_series.agg({"foo": np.sqrt, "bar": "sum"})
def test_demo(self):
# demonstration tests
s = Series(range(6), dtype="int64", name="series")
result = s.agg(["min", "max"])
expected = Series([0, 5], index=["min", "max"], name="series")
tm.assert_series_equal(result, expected)
result = s.agg({"foo": "min"})
expected = Series([0], index=["foo"], name="series")
tm.assert_series_equal(result, expected)
# nested renaming
with tm.assert_produces_warning(FutureWarning):
result = s.agg({"foo": ["min", "max"]})
expected = (
DataFrame({"foo": [0, 5]}, index=["min", "max"]).unstack().rename("series")
)
tm.assert_series_equal(result, expected)
def test_multiple_aggregators_with_dict_api(self):
s = Series(range(6), dtype="int64", name="series")
# nested renaming
with tm.assert_produces_warning(FutureWarning):
result = s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})
expected = (
DataFrame(
{"foo": [5.0, np.nan, 0.0, np.nan], "bar": [np.nan, 2.5, np.nan, 15.0]},
columns=["foo", "bar"],
index=["max", "mean", "min", "sum"],
)
.unstack()
.rename("series")
)
tm.assert_series_equal(result.reindex_like(expected), expected)
def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
# test that we are evaluating row-by-row first
# before vectorized evaluation
result = string_series.apply(lambda x: str(x))
expected = string_series.agg(lambda x: str(x))
tm.assert_series_equal(result, expected)
result = string_series.apply(str)
expected = string_series.agg(str)
tm.assert_series_equal(result, expected)
def test_with_nested_series(self, datetime_series):
# GH 2316
# .agg with a reducer and a transform, what to do
result = datetime_series.apply(
lambda x: Series([x, x ** 2], index=["x", "x^2"])
)
expected = DataFrame({"x": datetime_series, "x^2": datetime_series ** 2})
tm.assert_frame_equal(result, expected)
result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"]))
tm.assert_frame_equal(result, expected)
def test_replicate_describe(self, string_series):
# this also tests a result set that is all scalars
expected = string_series.describe()
result = string_series.apply(
OrderedDict(
[
("count", "count"),
("mean", "mean"),
("std", "std"),
("min", "min"),
("25%", lambda x: x.quantile(0.25)),
("50%", "median"),
("75%", lambda x: x.quantile(0.75)),
("max", "max"),
]
)
)
assert_series_equal(result, expected)
def test_reduce(self, string_series):
# reductions with named functions
result = string_series.agg(["sum", "mean"])
expected = Series(
[string_series.sum(), string_series.mean()],
["sum", "mean"],
name=string_series.name,
)
assert_series_equal(result, expected)
def test_non_callable_aggregates(self):
# test agg using non-callable series attributes
s = Series([1, 2, None])
# Calling agg w/ just a string arg same as calling s.arg
result = s.agg("size")
expected = s.size
assert result == expected
# test when mixed w/ callable reducers
result = s.agg(["size", "count", "mean"])
expected = Series(OrderedDict([("size", 3.0), ("count", 2.0), ("mean", 1.5)]))
assert_series_equal(result[expected.index], expected)
@pytest.mark.parametrize(
"series, func, expected",
chain(
_get_cython_table_params(
Series(),
[
("sum", 0),
("max", np.nan),
("min", np.nan),
("all", True),
("any", False),
("mean", np.nan),
("prod", 1),
("std", np.nan),
("var", np.nan),
("median", np.nan),
],
),
_get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("sum", 6),
("max", 3),
("min", 1),
("all", True),
("any", True),
("mean", 2),
("prod", 6),
("std", 1),
("var", 1),
("median", 2),
],
),
_get_cython_table_params(
Series("a b c".split()),
[
("sum", "abc"),
("max", "c"),
("min", "a"),
("all", "c"), # see GH12863
("any", "a"),
],
),
),
)
def test_agg_cython_table(self, series, func, expected):
# GH21224
# test reducing functions in
# pandas.core.base.SelectionMixin._cython_table
result = series.agg(func)
if tm.is_number(expected):
assert np.isclose(result, expected, equal_nan=True)
else:
assert result == expected
@pytest.mark.parametrize(
"series, func, expected",
chain(
_get_cython_table_params(
Series(),
[("cumprod", Series([], Index([]))), ("cumsum", Series([], Index([])))],
),
_get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("cumprod", Series([np.nan, 1, 2, 6])),
("cumsum", Series([np.nan, 1, 3, 6])),
],
),
_get_cython_table_params(
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
),
),
)
def test_agg_cython_table_transform(self, series, func, expected):
# GH21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
result = series.agg(func)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"series, func, expected",
chain(
_get_cython_table_params(
Series("a b c".split()),
[
("mean", TypeError), # mean raises TypeError
("prod", TypeError),
("std", TypeError),
("var", TypeError),
("median", TypeError),
("cumprod", TypeError),
],
)
),
)
def test_agg_cython_table_raises(self, series, func, expected):
# GH21224
with pytest.raises(expected):
# e.g. Series('a b'.split()).cumprod() will raise
series.agg(func)
class TestSeriesMap:
def test_map(self, datetime_series):
index, data = tm.getMixedTypeDict()
source = Series(data["B"], index=data["C"])
target = Series(data["C"][:4], index=data["D"][:4])
merged = target.map(source)
for k, v in merged.items():
assert v == source[target[k]]
# input could be a dict
merged = target.map(source.to_dict())
for k, v in merged.items():
assert v == source[target[k]]
# function
result = datetime_series.map(lambda x: x * 2)
tm.assert_series_equal(result, datetime_series * 2)
# GH 10324
a = Series([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
tm.assert_series_equal(a.map(b), exp)
exp = Series(["odd", "even", "odd", np.nan])
tm.assert_series_equal(a.map(c), exp)
a = Series(["a", "b", "c", "d"])
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(c), exp)
a = Series(["a", "b", "c", "d"])
b = Series(
["B", "C", "D", "E"],
dtype="category",
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
)
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
exp = Series(
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
)
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, "B", "C", "D"])
tm.assert_series_equal(a.map(c), exp)
@pytest.mark.parametrize("index", tm.all_index_generator(10))
def test_map_empty(self, index):
s = Series(index)
result = s.map({})
expected = pd.Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)
def test_map_compat(self):
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
result = s.map({True: "foo", False: "bar"})
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
assert_series_equal(result, expected)
def test_map_int(self):
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
right = Series({1: 11, 2: 22, 3: 33})
assert left.dtype == np.float_
assert issubclass(right.dtype.type, np.integer)
merged = left.map(right)
assert merged.dtype == np.float_
assert isna(merged["d"])
assert not isna(merged["c"])
def test_map_type_inference(self):
s = Series(range(3))
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
assert issubclass(s2.dtype.type, np.integer)
def test_map_decimal(self, string_series):
from decimal import Decimal
result = string_series.map(lambda x: Decimal(str(x)))
assert result.dtype == np.object_
assert isinstance(result[0], Decimal)
def test_map_na_exclusion(self):
s = Series([1.5, np.nan, 3, np.nan, 5])
result = s.map(lambda x: x * 2, na_action="ignore")
exp = s * 2
assert_series_equal(result, exp)
def test_map_dict_with_tuple_keys(self):
"""
Due to new MultiIndex-ing behaviour in v0.14.0,
dicts with tuple keys passed to map were being
converted to a multi-index, preventing tuple values
from being mapped properly.
"""
# GH 18496
df = pd.DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
df["labels"] = df["a"].map(label_mappings)
df["expected_labels"] = pd.Series(["A", "B", "A", "B"], index=df.index)
# All labels should be filled now
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
def test_map_counter(self):
s = Series(["a", "b", "c"], index=[1, 2, 3])
counter = Counter()
counter["b"] = 5
counter["c"] += 1
result = s.map(counter)
expected = Series([0, 5, 1], index=[1, 2, 3])
assert_series_equal(result, expected)
def test_map_defaultdict(self):
s = Series([1, 2, 3], index=["a", "b", "c"])
default_dict = defaultdict(lambda: "blank")
default_dict[1] = "stuff"
result = s.map(default_dict)
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
assert_series_equal(result, expected)
def test_map_dict_subclass_with_missing(self):
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
"""
class DictWithMissing(dict):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
dictionary = DictWithMissing({3: "three"})
result = s.map(dictionary)
expected = Series(["missing", "missing", "three"])
assert_series_equal(result, expected)
def test_map_dict_subclass_without_missing(self):
class DictWithoutMissing(dict):
pass
s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: "three"})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, "three"])
assert_series_equal(result, expected)
def test_map_box(self):
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
s = pd.Series(vals)
assert s.dtype == "datetime64[ns]"
# boxed value must be Timestamp instance
res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
s = pd.Series(vals)
assert s.dtype == "datetime64[ns, US/Eastern]"
res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
s = pd.Series(vals)
assert s.dtype == "timedelta64[ns]"
res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days))
exp = pd.Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
s = pd.Series(vals)
assert s.dtype == "Period[M]"
res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr))
exp = pd.Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
def test_map_categorical(self):
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
s = pd.Series(values, name="XX", index=list("abcdefg"))
result = s.map(lambda x: x.lower())
exp_values = pd.Categorical(
list("abbabcd"), categories=list("dcba"), ordered=True
)
exp = pd.Series(exp_values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp_values)
result = s.map(lambda x: "A")
exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == np.object
with pytest.raises(NotImplementedError):
s.map(lambda x: x, na_action="ignore")
def test_map_datetimetz(self):
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
"Asia/Tokyo"
)
s = pd.Series(values, name="XX")
# keep tz
result = s.map(lambda x: x + pd.offsets.Day())
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
"Asia/Tokyo"
)
exp = pd.Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.map(lambda x: x.hour)
exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
with pytest.raises(NotImplementedError):
s.map(lambda x: x, na_action="ignore")
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"vals,mapping,exp",
[
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
],
)
def test_map_missing_mixed(self, vals, mapping, exp):
# GH20495
s = pd.Series(vals + [np.nan])
result = s.map(mapping)
tm.assert_series_equal(result, pd.Series(exp))
@pytest.mark.parametrize(
"dti,exp",
[
(
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
),
(
tm.makeTimeSeries(nper=30),
DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
),
],
)
def test_apply_series_on_date_time_index_aware_series(self, dti, exp):
# GH 25959
# Calling apply on a localized time series should not cause an error
index = dti.tz_localize("UTC").index
result = pd.Series(index).apply(lambda x: pd.Series([1, 2]))
assert_frame_equal(result, exp)
def test_apply_scaler_on_date_time_index_aware_series(self):
# GH 25959
# Calling apply on a localized time series should not cause an error
series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
result = pd.Series(series.index).apply(lambda x: 1)
assert_series_equal(result, pd.Series(np.ones(30), dtype="int64"))

View File

@@ -0,0 +1,165 @@
import operator
import numpy as np
import pytest
import pandas as pd
from pandas import Series
from pandas.core.indexes.period import IncompatibleFrequency
import pandas.util.testing as tm
def _permute(obj):
return obj.take(np.random.permutation(len(obj)))
class TestSeriesFlexArithmetic:
@pytest.mark.parametrize(
"ts",
[
(lambda x: x, lambda x: x * 2, False),
(lambda x: x, lambda x: x[::2], False),
(lambda x: x, lambda x: 5, True),
(lambda x: tm.makeFloatSeries(), lambda x: tm.makeFloatSeries(), True),
],
)
@pytest.mark.parametrize(
"opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"]
)
def test_flex_method_equivalence(self, opname, ts):
# check that Series.{opname} behaves like Series.__{opname}__,
tser = tm.makeTimeSeries().rename("ts")
series = ts[0](tser)
other = ts[1](tser)
check_reverse = ts[2]
op = getattr(Series, opname)
alt = getattr(operator, opname)
result = op(series, other)
expected = alt(series, other)
tm.assert_almost_equal(result, expected)
if check_reverse:
rop = getattr(Series, "r" + opname)
result = rop(series, other)
expected = alt(other, series)
tm.assert_almost_equal(result, expected)
class TestSeriesArithmetic:
# Some of these may end up in tests/arithmetic, but are not yet sorted
def test_add_series_with_period_index(self):
rng = pd.period_range("1/1/2000", "1/1/2010", freq="A")
ts = Series(np.random.randn(len(rng)), index=rng)
result = ts + ts[::2]
expected = ts + ts
expected[1::2] = np.nan
tm.assert_series_equal(result, expected)
result = ts + _permute(ts[::2])
tm.assert_series_equal(result, expected)
msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
with pytest.raises(IncompatibleFrequency, match=msg):
ts + ts.asfreq("D", how="end")
# ------------------------------------------------------------------
# Comparisons
class TestSeriesFlexComparison:
def test_comparison_flex_basic(self):
left = pd.Series(np.random.randn(10))
right = pd.Series(np.random.randn(10))
tm.assert_series_equal(left.eq(right), left == right)
tm.assert_series_equal(left.ne(right), left != right)
tm.assert_series_equal(left.le(right), left < right)
tm.assert_series_equal(left.lt(right), left <= right)
tm.assert_series_equal(left.gt(right), left > right)
tm.assert_series_equal(left.ge(right), left >= right)
# axis
for axis in [0, None, "index"]:
tm.assert_series_equal(left.eq(right, axis=axis), left == right)
tm.assert_series_equal(left.ne(right, axis=axis), left != right)
tm.assert_series_equal(left.le(right, axis=axis), left < right)
tm.assert_series_equal(left.lt(right, axis=axis), left <= right)
tm.assert_series_equal(left.gt(right, axis=axis), left > right)
tm.assert_series_equal(left.ge(right, axis=axis), left >= right)
#
msg = "No axis named 1 for object type"
for op in ["eq", "ne", "le", "le", "gt", "ge"]:
with pytest.raises(ValueError, match=msg):
getattr(left, op)(right, axis=1)
class TestSeriesComparison:
def test_comparison_different_length(self):
a = Series(["a", "b", "c"])
b = Series(["b", "a"])
with pytest.raises(ValueError):
a < b
a = Series([1, 2])
b = Series([2, 3, 4])
with pytest.raises(ValueError):
a == b
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
def test_ser_flex_cmp_return_dtypes(self, opname):
# GH#15115
ser = Series([1, 3, 2], index=range(3))
const = 2
result = getattr(ser, opname)(const).dtypes
expected = np.dtype("bool")
assert result == expected
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
def test_ser_flex_cmp_return_dtypes_empty(self, opname):
# GH#15115 empty Series case
ser = Series([1, 3, 2], index=range(3))
empty = ser.iloc[:0]
const = 2
result = getattr(empty, opname)(const).dtypes
expected = np.dtype("bool")
assert result == expected
@pytest.mark.parametrize(
"op",
[operator.eq, operator.ne, operator.le, operator.lt, operator.ge, operator.gt],
)
@pytest.mark.parametrize(
"names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")]
)
def test_ser_cmp_result_names(self, names, op):
# datetime64 dtype
dti = pd.date_range("1949-06-07 03:00:00", freq="H", periods=5, name=names[0])
ser = Series(dti).rename(names[1])
result = op(ser, dti)
assert result.name == names[2]
# datetime64tz dtype
dti = dti.tz_localize("US/Central")
ser = Series(dti).rename(names[1])
result = op(ser, dti)
assert result.name == names[2]
# timedelta64 dtype
tdi = dti - dti.shift(1)
ser = Series(tdi).rename(names[1])
result = op(ser, tdi)
assert result.name == names[2]
# categorical
if op in [operator.eq, operator.ne]:
# categorical dtype comparisons raise for inequalities
cidx = tdi.astype("category")
ser = Series(cidx).rename(names[1])
result = op(ser, cidx)
assert result.name == names[2]

View File

@@ -0,0 +1,178 @@
import numpy as np
import pytest
from pandas import Series, Timestamp, date_range, isna, notna, offsets
import pandas.util.testing as tm
class TestSeriesAsof:
def test_basic(self):
# array or list or dates
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.random.randn(N), index=rng)
ts[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
val = result[result.index[result.index >= ub][0]]
assert ts[ub] == val
def test_scalar(self):
N = 30
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.arange(N), index=rng)
ts[5:10] = np.NaN
ts[15:20] = np.NaN
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts[4]
assert val2 == ts[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts[4]
# in there
result = ts.asof(ts.index[3])
assert result == ts[3]
# no as of value
d = ts.index[0] - offsets.BDay()
assert np.isnan(ts.asof(d))
def test_with_nan(self):
# basic asof test
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
s = Series(np.arange(len(rng)), index=rng)
r = s.resample("2h").mean()
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[3:5] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[-3:] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
def test_periodindex(self):
from pandas import period_range, PeriodIndex
# array or list or dates
N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
ts = Series(np.random.randn(N), index=rng)
ts[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
pix = PeriodIndex(result.index.values, freq="H")
mask = (pix >= lb) & (pix < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
ts[5:10] = np.nan
ts[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts[4]
assert val2 == ts[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts[4]
# in there
assert ts.asof(ts.index[3]) == ts[3]
# no as of value
d = ts.index[0].to_timestamp() - offsets.BDay()
assert isna(ts.asof(d))
def test_errors(self):
s = Series(
[1, 2, 3],
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
)
# non-monotonic
assert not s.index.is_monotonic
with pytest.raises(ValueError):
s.asof(s.index[0])
# subset with Series
N = 10
rng = date_range("1/1/1990", periods=N, freq="53s")
s = Series(np.random.randn(N), index=rng)
with pytest.raises(ValueError):
s.asof(s.index[0], subset="foo")
def test_all_nans(self):
# GH 15713
# series is all nans
result = Series([np.nan]).asof([0])
expected = Series([np.nan])
tm.assert_series_equal(result, expected)
# testing non-default indexes
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)
# testing scalar input
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
result = Series(np.nan, index=rng).asof(date)
assert isna(result)
# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
expected = Series(np.nan, index=[4, 5], name="test")
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,39 @@
import pandas as pd
# Segregated collection of methods that require the BlockManager internal data
# structure
class TestSeriesBlockInternals:
def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz Series inplace invalidates the
# `freq` attribute on the underlying DatetimeIndex
dti = pd.date_range("20130101", periods=3, tz="US/Eastern")
ts = dti[1]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data._data.base
assert dti.freq == "D"
ser.iloc[1] = pd.NaT
assert ser._values.freq is None
# check that the DatetimeIndex was not altered in place
assert ser._values is not dti
assert ser._values._data.base is not dti._data._data.base
assert dti[1] == ts
assert dti.freq == "D"
def test_dt64tz_setitem_does_not_mutate_dti(self):
# GH#21907, GH#24096
dti = pd.date_range("2016-01-01", periods=10, tz="US/Pacific")
ts = dti[0]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data._data.base
assert ser._data.blocks[0].values is not dti
assert ser._data.blocks[0].values._data.base is not dti._data._data.base
ser[::3] = pd.NaT
assert ser[0] is pd.NaT
assert dti[0] == ts

View File

@@ -0,0 +1,418 @@
from datetime import datetime
import numpy as np
from numpy import nan
import pytest
import pandas as pd
from pandas import DataFrame, DatetimeIndex, Series, date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
class TestSeriesCombine:
def test_append(self, datetime_series, string_series, object_series):
appendedSeries = string_series.append(object_series)
for idx, value in appendedSeries.items():
if idx in string_series.index:
assert value == string_series[idx]
elif idx in object_series.index:
assert value == object_series[idx]
else:
raise AssertionError("orphaned index!")
msg = "Indexes have overlapping values:"
with pytest.raises(ValueError, match=msg):
datetime_series.append(datetime_series, verify_integrity=True)
def test_append_many(self, datetime_series):
pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]]
result = pieces[0].append(pieces[1:])
assert_series_equal(result, datetime_series)
def test_append_duplicates(self):
# GH 13677
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(s1.append(s2), exp)
tm.assert_series_equal(pd.concat([s1, s2]), exp)
# the result must have RangeIndex
exp = pd.Series([1, 2, 3, 4, 5, 6])
tm.assert_series_equal(
s1.append(s2, ignore_index=True), exp, check_index_type=True
)
tm.assert_series_equal(
pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True
)
msg = "Indexes have overlapping values:"
with pytest.raises(ValueError, match=msg):
s1.append(s2, verify_integrity=True)
with pytest.raises(ValueError, match=msg):
pd.concat([s1, s2], verify_integrity=True)
def test_combine_scalar(self):
# GH 21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
s = pd.Series([i * 10 for i in range(5)])
result = s.combine(3, lambda x, y: x + y)
expected = pd.Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)
result = s.combine(22, lambda x, y: min(x, y))
expected = pd.Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)
def test_combine_first(self):
values = tm.makeIntIndex(20).values.astype(float)
series = Series(values, index=tm.makeIntIndex(20))
series_copy = series * 2
series_copy[::2] = np.NaN
# nothing used from the input
combined = series.combine_first(series_copy)
tm.assert_series_equal(combined, series)
# Holes filled from input
combined = series_copy.combine_first(series)
assert np.isfinite(combined).all()
tm.assert_series_equal(combined[::2], series[::2])
tm.assert_series_equal(combined[1::2], series_copy[1::2])
# mixed types
index = tm.makeStringIndex(20)
floats = Series(tm.randn(20), index=index)
strings = Series(tm.makeStringIndex(10), index=index[::2])
combined = strings.combine_first(floats)
tm.assert_series_equal(strings, combined.loc[index[::2]])
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
# corner case
s = Series([1.0, 2, 3], index=[0, 1, 2])
result = s.combine_first(Series([], index=[]))
s.index = s.index.astype("O")
assert_series_equal(s, result)
def test_update(self):
s = Series([1.5, nan, 3.0, 4.0, nan])
s2 = Series([nan, 3.5, nan, 5.0])
s.update(s2)
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
assert_series_equal(s, expected)
# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df["c"] = np.nan
df["c"].update(Series(["foo"], index=[0]))
expected = DataFrame(
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
)
assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"other, dtype, expected",
[
# other is int
([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")),
([61, 63], "int64", pd.Series([10, 61, 12])),
([61, 63], float, pd.Series([10.0, 61.0, 12.0])),
([61, 63], object, pd.Series([10, 61, 12], dtype=object)),
# other is float, but can be cast to int
([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")),
([61.0, 63.0], "int64", pd.Series([10, 61, 12])),
([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])),
([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)),
# others is float, cannot be cast to int
([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)),
# other is object, cannot be cast
([(61,), (63,)], "int32", pd.Series([10, (61,), 12])),
([(61,), (63,)], "int64", pd.Series([10, (61,), 12])),
([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])),
([(61,), (63,)], object, pd.Series([10, (61,), 12])),
],
)
def test_update_dtypes(self, other, dtype, expected):
s = Series([10, 11, 12], dtype=dtype)
other = Series(other, index=[1, 3])
s.update(other)
assert_series_equal(s, expected)
def test_concat_empty_series_dtypes_roundtrips(self):
# round-tripping with self & like self
dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
for dtype in dtypes:
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype
def int_result_type(dtype, dtype2):
typs = {dtype.kind, dtype2.kind}
if not len(typs - {"i", "u", "b"}) and (
dtype.kind == "i" or dtype2.kind == "i"
):
return "i"
elif not len(typs - {"u", "b"}) and (
dtype.kind == "u" or dtype2.kind == "u"
):
return "u"
return None
def float_result_type(dtype, dtype2):
typs = {dtype.kind, dtype2.kind}
if not len(typs - {"f", "i", "u"}) and (
dtype.kind == "f" or dtype2.kind == "f"
):
return "f"
return None
def get_result_type(dtype, dtype2):
result = float_result_type(dtype, dtype2)
if result is not None:
return result
result = int_result_type(dtype, dtype2)
if result is not None:
return result
return "O"
for dtype in dtypes:
for dtype2 in dtypes:
if dtype == dtype2:
continue
expected = get_result_type(dtype, dtype2)
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
assert result.kind == expected
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
ser1 = pd.Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
name="ser1",
)
ser2 = pd.Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
index=[2, 3, 4],
name="ser2",
)
result = ser1.combine_first(ser2)
exp_vals = pd.DatetimeIndex(
["20150101", "20150102", "20150103", "20160515", "20160516"],
tz=tz_naive_fixture,
)
exp = pd.Series(exp_vals, name="ser1")
assert_series_equal(exp, result)
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
def test_concat_empty_series_dtypes(self):
# booleans
assert (
pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype
== np.int32
)
assert (
pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype
== np.object_
)
# datetime-like
assert (
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype
== np.object_
)
assert (
pd.concat(
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
).dtype
== np.object_
)
# categorical
assert (
pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype
== "category"
)
# GH 18515
assert (
pd.concat(
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
).dtype
== "float64"
)
assert (
pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype
== "object"
)
# sparse
# TODO: move?
result = pd.concat(
[Series(dtype="float64").to_sparse(), Series(dtype="float64").to_sparse()]
)
assert result.dtype == "Sparse[float64]"
# GH 26705 - Assert .ftype is deprecated
with tm.assert_produces_warning(FutureWarning):
assert result.ftype == "float64:sparse"
result = pd.concat(
[Series(dtype="float64").to_sparse(), Series(dtype="float64")]
)
# TODO: release-note: concat sparse dtype
expected = pd.core.sparse.api.SparseDtype(np.float64)
assert result.dtype == expected
# GH 26705 - Assert .ftype is deprecated
with tm.assert_produces_warning(FutureWarning):
assert result.ftype == "float64:sparse"
result = pd.concat(
[Series(dtype="float64").to_sparse(), Series(dtype="object")]
)
# TODO: release-note: concat sparse dtype
expected = pd.core.sparse.api.SparseDtype("object")
assert result.dtype == expected
# GH 26705 - Assert .ftype is deprecated
with tm.assert_produces_warning(FutureWarning):
assert result.ftype == "object:sparse"
def test_combine_first_dt64(self):
from pandas.core.tools.datetimes import to_datetime
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = to_datetime(Series([np.NaN, "2011"]))
rs = s0.combine_first(s1)
xp = to_datetime(Series(["2010", "2011"]))
assert_series_equal(rs, xp)
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = Series([np.NaN, "2011"])
rs = s0.combine_first(s1)
xp = Series([datetime(2010, 1, 1), "2011"])
assert_series_equal(rs, xp)
class TestTimeseries:
def test_append_concat(self):
rng = date_range("5/8/2012 1:45", periods=10, freq="5T")
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
result = ts.append(ts)
result_df = df.append(df)
ex_index = DatetimeIndex(np.tile(rng.values, 2))
tm.assert_index_equal(result.index, ex_index)
tm.assert_index_equal(result_df.index, ex_index)
appended = rng.append(rng)
tm.assert_index_equal(appended, ex_index)
appended = rng.append([rng, rng])
ex_index = DatetimeIndex(np.tile(rng.values, 3))
tm.assert_index_equal(appended, ex_index)
# different index names
rng1 = rng.copy()
rng2 = rng.copy()
rng1.name = "foo"
rng2.name = "bar"
assert rng1.append(rng1).name == "foo"
assert rng1.append(rng2).name is None
def test_append_concat_tz(self):
# see gh-2938
rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern")
rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern")
rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern")
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts.append(ts2)
result_df = df.append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
def test_append_concat_tz_explicit_pytz(self):
# see gh-2938
from pytz import timezone as timezone
rng = date_range(
"5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern")
)
rng2 = date_range(
"5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern")
)
rng3 = date_range(
"5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern")
)
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts.append(ts2)
result_df = df.append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
def test_append_concat_tz_dateutil(self):
# see gh-2938
rng = date_range(
"5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern"
)
rng2 = date_range(
"5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern"
)
rng3 = date_range(
"5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern"
)
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts.append(ts2)
result_df = df.append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,648 @@
import calendar
from datetime import date, datetime, time
import locale
import unicodedata
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.timezones import maybe_get_tz
from pandas.core.dtypes.common import is_integer_dtype, is_list_like
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
PeriodIndex,
Series,
TimedeltaIndex,
bdate_range,
date_range,
period_range,
timedelta_range,
)
from pandas.core.arrays import PeriodArray
import pandas.core.common as com
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
class TestSeriesDatetimeValues:
def test_dt_namespace_accessor(self):
# GH 7207, 11128
# test .dt namespace accessor
ok_for_period = PeriodArray._datetimelike_ops
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
ok_for_dt = DatetimeIndex._datetimelike_ops
ok_for_dt_methods = [
"to_period",
"to_pydatetime",
"tz_localize",
"tz_convert",
"normalize",
"strftime",
"round",
"floor",
"ceil",
"day_name",
"month_name",
]
ok_for_td = TimedeltaIndex._datetimelike_ops
ok_for_td_methods = [
"components",
"to_pytimedelta",
"total_seconds",
"round",
"floor",
"ceil",
]
def get_expected(s, name):
result = getattr(Index(s._values), prop)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result):
return result
return Series(result, index=s.index, name=s.name)
def compare(s, name):
a = getattr(s.dt, prop)
b = get_expected(s, prop)
if not (is_list_like(a) and is_list_like(b)):
assert a == b
else:
tm.assert_series_equal(a, b)
# datetimeindex
cases = [
Series(date_range("20130101", periods=5), name="xxx"),
Series(date_range("20130101", periods=5, freq="s"), name="xxx"),
Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"),
]
for s in cases:
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
compare(s, prop)
for prop in ok_for_dt_methods:
getattr(s.dt, prop)
result = s.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = s.dt.tz_localize("US/Eastern")
exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern")
expected = Series(exp_values, index=s.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "US/Eastern"
freq_result = s.dt.freq
assert freq_result == DatetimeIndex(s.values, freq="infer").freq
# let's localize, then convert
result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
exp_values = (
DatetimeIndex(s.values).tz_localize("UTC").tz_convert("US/Eastern")
)
expected = Series(exp_values, index=s.index, name="xxx")
tm.assert_series_equal(result, expected)
# datetimeindex with tz
s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
compare(s, prop)
for prop in ok_for_dt_methods:
getattr(s.dt, prop)
result = s.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = s.dt.tz_convert("CET")
expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "CET"
freq_result = s.dt.freq
assert freq_result == DatetimeIndex(s.values, freq="infer").freq
# timedelta index
cases = [
Series(
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
),
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
Series(
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
name="xxx",
),
]
for s in cases:
for prop in ok_for_td:
# we test freq below
if prop != "freq":
compare(s, prop)
for prop in ok_for_td_methods:
getattr(s.dt, prop)
result = s.dt.components
assert isinstance(result, DataFrame)
tm.assert_index_equal(result.index, s.index)
result = s.dt.to_pytimedelta()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = s.dt.total_seconds()
assert isinstance(result, pd.Series)
assert result.dtype == "float64"
freq_result = s.dt.freq
assert freq_result == TimedeltaIndex(s.values, freq="infer").freq
# both
index = date_range("20130101", periods=3, freq="D")
s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx")
exp = Series(
np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx"
)
tm.assert_series_equal(s.dt.year, exp)
exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx")
tm.assert_series_equal(s.dt.month, exp)
exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx")
tm.assert_series_equal(s.dt.second, exp)
exp = pd.Series([s[0]] * 3, index=index, name="xxx")
tm.assert_series_equal(s.dt.normalize(), exp)
# periodindex
cases = [Series(period_range("20130101", periods=5, freq="D"), name="xxx")]
for s in cases:
for prop in ok_for_period:
# we test freq below
if prop != "freq":
compare(s, prop)
for prop in ok_for_period_methods:
getattr(s.dt, prop)
freq_result = s.dt.freq
assert freq_result == PeriodIndex(s.values).freq
# test limited display api
def get_dir(s):
results = [r for r in s.dt.__dir__() if not r.startswith("_")]
return list(sorted(set(results)))
s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
results = get_dir(s)
tm.assert_almost_equal(
results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
)
s = Series(
period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
)
results = get_dir(s)
tm.assert_almost_equal(
results, list(sorted(set(ok_for_period + ok_for_period_methods)))
)
# 11295
# ambiguous time error on the conversions
s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
results = get_dir(s)
tm.assert_almost_equal(
results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))
)
exp_values = pd.date_range(
"2015-01-01", "2016-01-01", freq="T", tz="UTC"
).tz_convert("America/Chicago")
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(s, expected)
# no setting allowed
s = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
with pytest.raises(ValueError, match="modifications"):
s.dt.hour = 5
# trying to set a copy
with pd.option_context("chained_assignment", "raise"):
with pytest.raises(com.SettingWithCopyError):
s.dt.hour[0] = 5
@pytest.mark.parametrize(
"method, dates",
[
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
],
)
def test_dt_round(self, method, dates):
# round
s = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = getattr(s.dt, method)("D")
expected = Series(pd.to_datetime(dates), name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_round_tz(self):
s = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
exp_values = pd.to_datetime(
["2012-01-01", "2012-01-01", "2012-01-01"]
).tz_localize("US/Eastern")
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
def test_dt_round_tz_ambiguous(self, method):
# GH 18946 round near "fall back" DST
df1 = pd.DataFrame(
[
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
],
columns=["date"],
)
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
# infer
result = getattr(df1.date.dt, method)("H", ambiguous="infer")
expected = df1["date"]
tm.assert_series_equal(result, expected)
# bool-array
result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False])
tm.assert_series_equal(result, expected)
# NaT
result = getattr(df1.date.dt, method)("H", ambiguous="NaT")
expected = df1["date"].copy()
expected.iloc[0:2] = pd.NaT
tm.assert_series_equal(result, expected)
# raise
with pytest.raises(pytz.AmbiguousTimeError):
getattr(df1.date.dt, method)("H", ambiguous="raise")
@pytest.mark.parametrize(
"method, ts_str, freq",
[
["ceil", "2018-03-11 01:59:00-0600", "5min"],
["round", "2018-03-11 01:59:00-0600", "5min"],
["floor", "2018-03-11 03:01:00-0500", "2H"],
],
)
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
# GH 23324 round near "spring forward" DST
s = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
result = getattr(s.dt, method)(freq, nonexistent="shift_forward")
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
tm.assert_series_equal(result, expected)
result = getattr(s.dt, method)(freq, nonexistent="NaT")
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
tm.assert_series_equal(result, expected)
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
getattr(s.dt, method)(freq, nonexistent="raise")
def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
s = Series(pd.Categorical(dti), name="foo")
result = s.dt.year
expected = Series([2017, 2017, 2018, 2018], name="foo")
tm.assert_series_equal(result, expected)
def test_dt_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
s = Series(date_range("20130101", periods=5, freq="D"))
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
s.dt.xlabel = "a"
@pytest.mark.parametrize(
"time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales()
)
def test_dt_accessor_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
s = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert s.dt.weekday_name[day] == eng_name
assert s.dt.day_name(locale=time_locale)[day] == name
s = s.append(Series([pd.NaT]))
assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
s = Series(date_range(freq="M", start="2012", end="2013"))
result = s.dt.month_name(locale=time_locale)
expected = Series([month.capitalize() for month in expected_months])
# work around https://github.com/pandas-dev/pandas/issues/22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_series_equal(result, expected)
for s_date, expected in zip(s, expected_months):
result = s_date.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", expected)
assert result == expected
s = s.append(Series([pd.NaT]))
assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
def test_strftime(self):
# GH 10086
s = Series(date_range("20130101", periods=5))
result = s.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
s = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/04 11-22-33",
"2015/02/05 11-22-33",
"2015/02/06 11-22-33",
"2015/02/07 11-22-33",
]
)
tm.assert_series_equal(result, expected)
s = Series(period_range("20130101", periods=5))
result = s.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
s = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
result = s.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/03 11-22-34",
"2015/02/03 11-22-35",
"2015/02/03 11-22-36",
"2015/02/03 11-22-37",
]
)
tm.assert_series_equal(result, expected)
s = Series(date_range("20130101", periods=5))
s.iloc[0] = pd.NaT
result = s.dt.strftime("%Y/%m/%d")
expected = Series(
["NaT", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
datetime_index = date_range("20150301", periods=5)
result = datetime_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
period_index = period_range("20150301", periods=5)
result = period_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype="=U10",
)
tm.assert_index_equal(result, expected)
s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
result = s.dt.strftime("%Y-%m-%d %H:%M:%S")
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
tm.assert_series_equal(result, expected)
s = Series(period_range("20130101", periods=4, freq="H"))
result = s.dt.strftime("%Y/%m/%d %H:%M:%S")
expected = Series(
[
"2013/01/01 00:00:00",
"2013/01/01 01:00:00",
"2013/01/01 02:00:00",
"2013/01/01 03:00:00",
]
)
s = Series(period_range("20130101", periods=4, freq="L"))
result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
expected = Series(
[
"2013/01/01 00:00:00.000",
"2013/01/01 00:00:00.001",
"2013/01/01 00:00:00.002",
"2013/01/01 00:00:00.003",
]
)
tm.assert_series_equal(result, expected)
def test_valid_dt_with_missing_values(self):
from datetime import date, time
# GH 8689
s = Series(date_range("20130101", periods=5, freq="D"))
s.iloc[2] = pd.NaT
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
expected = getattr(s.dt, attr).copy()
expected.iloc[2] = np.nan
result = getattr(s.dt, attr)
tm.assert_series_equal(result, expected)
result = s.dt.date
expected = Series(
[
date(2013, 1, 1),
date(2013, 1, 2),
np.nan,
date(2013, 1, 4),
date(2013, 1, 5),
],
dtype="object",
)
tm.assert_series_equal(result, expected)
result = s.dt.time
expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object")
tm.assert_series_equal(result, expected)
def test_dt_accessor_api(self):
# GH 9322
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties,
DatetimeProperties,
)
assert Series.dt is CombinedDatetimelikeProperties
s = Series(date_range("2000-01-01", periods=3))
assert isinstance(s.dt, DatetimeProperties)
@pytest.mark.parametrize(
"ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))]
)
def test_dt_accessor_invalid(self, ser):
# GH#9322 check that series with incorrect dtypes don't have attr
with pytest.raises(AttributeError, match="only use .dt accessor"):
ser.dt
assert not hasattr(ser, "dt")
def test_dt_accessor_updates_on_inplace(self):
s = Series(pd.date_range("2018-01-01", periods=10))
s[2] = None
s.fillna(pd.Timestamp("2018-01-01"), inplace=True)
result = s.dt.date
assert result[0] == result[2]
def test_between(self):
s = Series(bdate_range("1/1/2000", periods=20).astype(object))
s[::2] = np.nan
result = s[s.between(s[3], s[17])]
expected = s[3:18].dropna()
assert_series_equal(result, expected)
result = s[s.between(s[3], s[17], inclusive=False)]
expected = s[5:16].dropna()
assert_series_equal(result, expected)
def test_date_tz(self):
# GH11757
rng = pd.DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
tz="US/Eastern",
)
s = Series(rng)
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
assert_series_equal(s.dt.date, expected)
assert_series_equal(s.apply(lambda x: x.date()), expected)
def test_datetime_understood(self):
# Ensures it doesn't fail to create the right series
# reported in issue#16726
series = pd.Series(pd.date_range("2012-01-01", periods=3))
offset = pd.offsets.DateOffset(days=6)
result = series - offset
expected = pd.Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"]))
tm.assert_series_equal(result, expected)
def test_dt_timetz_accessor(self, tz_naive_fixture):
# GH21358
tz = maybe_get_tz(tz_naive_fixture)
dtindex = pd.DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
)
s = Series(dtindex)
expected = Series(
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
)
result = s.dt.timetz
tm.assert_series_equal(result, expected)
def test_setitem_with_string_index(self):
# GH 23451
x = pd.Series([1, 2, 3], index=["Date", "b", "other"])
x["Date"] = date.today()
assert x.Date == date.today()
assert x["Date"] == date.today()
def test_setitem_with_different_tz(self):
# GH#24024
ser = pd.Series(pd.date_range("2000", periods=2, tz="US/Central"))
ser[0] = pd.Timestamp("2000", tz="US/Eastern")
expected = pd.Series(
[
pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"),
pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"),
],
dtype=object,
)
tm.assert_series_equal(ser, expected)

View File

@@ -0,0 +1,521 @@
from datetime import datetime, timedelta
from importlib import reload
import string
import sys
import numpy as np
import pytest
from pandas._libs.tslibs import iNaT
from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel
import pandas as pd
from pandas import (
Categorical,
DataFrame,
Index,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas.util.testing as tm
class TestSeriesDtypes:
def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_
def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
s = Series(np.random.randn(5), name="foo")
as_typed = s.astype(dtype)
assert as_typed.dtype == dtype
assert as_typed.name == s.name
def test_asobject_deprecated(self):
s = Series(np.random.randn(5), name="foo")
with tm.assert_produces_warning(FutureWarning):
o = s.asobject
assert isinstance(o, np.ndarray)
def test_dtype(self, datetime_series):
assert datetime_series.dtype == np.dtype("float64")
assert datetime_series.dtypes == np.dtype("float64")
# GH 26705 - Assert .ftype is deprecated
with tm.assert_produces_warning(FutureWarning):
assert datetime_series.ftype == "float64:dense"
# GH 26705 - Assert .ftypes is deprecated
with tm.assert_produces_warning(FutureWarning):
assert datetime_series.ftypes == "float64:dense"
# GH18243 - Assert .get_ftype_counts is deprecated
with tm.assert_produces_warning(FutureWarning):
tm.assert_series_equal(
datetime_series.get_ftype_counts(), Series(1, ["float64:dense"])
)
@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
s = Series([value])
with pytest.raises(ValueError, match=msg):
s.astype(dtype)
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)
def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)
tm.assert_series_equal(result, Series(np.arange(1, 5)))
def test_astype_datetime(self):
s = Series(iNaT, dtype="M8[ns]", index=range(5))
s = s.astype("O")
assert s.dtype == np.object_
s = Series([datetime(2001, 1, 2, 0, 0)])
s = s.astype("O")
assert s.dtype == np.object_
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
s[1] = np.nan
assert s.dtype == "M8[ns]"
s = s.astype("O")
assert s.dtype == np.object_
def test_astype_datetime64tz(self):
s = Series(date_range("20130101", periods=3, tz="US/Eastern"))
# astype
result = s.astype(object)
expected = Series(s.astype(object), dtype=object)
tm.assert_series_equal(result, expected)
result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
tm.assert_series_equal(result, s)
# astype - object, preserves on construction
result = Series(s.astype(object))
expected = s.astype(object)
tm.assert_series_equal(result, expected)
# astype - datetime64[ns, tz]
result = Series(s.values).astype("datetime64[ns, US/Eastern]")
tm.assert_series_equal(result, s)
result = Series(s.values).astype(s.dtype)
tm.assert_series_equal(result, s)
result = s.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [str, np.str_])
@pytest.mark.parametrize(
"series",
[
Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
],
)
def test_astype_str_map(self, dtype, series):
# see gh-4405
result = series.astype(dtype)
expected = series.map(str)
tm.assert_series_equal(result, expected)
def test_astype_str_cast(self):
# see gh-9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
s = ts.astype(str)
expected = Series([str("2010-01-04")])
tm.assert_series_equal(s, expected)
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
s = ts.astype(str)
expected = Series([str("2010-01-04 00:00:00-05:00")])
tm.assert_series_equal(s, expected)
td = Series([Timedelta(1, unit="d")])
s = td.astype(str)
expected = Series([str("1 days 00:00:00.000000000")])
tm.assert_series_equal(s, expected)
def test_astype_unicode(self):
# see gh-7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]
former_encoding = None
if sys.getdefaultencoding() == "utf-8":
test_series.append(Series(["野菜食べないとやばい".encode("utf-8")]))
for s in test_series:
res = s.astype("unicode")
expec = s.map(str)
tm.assert_series_equal(res, expec)
# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)
@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see gh-7271
s = Series(range(0, 10, 2), name="abc")
dt1 = dtype_class({"abc": str})
result = s.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc")
tm.assert_series_equal(result, expected)
dt2 = dtype_class({"abc": "float64"})
result = s.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)
dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype"
r" mappings\."
)
with pytest.raises(KeyError, match=msg):
s.astype(dt3)
dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
s.astype(dt4)
# GH16717
# if dtypes provided is empty, it should error
dt5 = dtype_class({})
with pytest.raises(KeyError, match=msg):
s.astype(dt5)
def test_astype_categories_deprecation_raises(self):
# deprecated 17636
s = Series(["a", "b", "a"])
with pytest.raises(ValueError, match="Got an unexpected"):
s.astype("category", categories=["a", "b"], ordered=True)
@pytest.mark.parametrize(
"none, warning", [(None, None), (ordered_sentinel, FutureWarning)]
)
def test_astype_category_ordered_none_deprecated(self, none, warning):
# GH 26336: only warn if None is not explicitly passed
cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
cdt2 = CategoricalDtype(categories=list("cedafb"), ordered=none)
s = Series(list("abcdaba"), dtype=cdt1)
with tm.assert_produces_warning(warning, check_stacklevel=False):
s.astype(cdt2)
def test_astype_from_categorical(self):
items = ["a", "b", "c", "a"]
s = Series(items)
exp = Series(Categorical(items))
res = s.astype("category")
tm.assert_series_equal(res, exp)
items = [1, 2, 3, 1]
s = Series(items)
exp = Series(Categorical(items))
res = s.astype("category")
tm.assert_series_equal(res, exp)
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
cats = Categorical([1, 2, 3, 4, 5, 6])
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
df["cats"] = df["cats"].astype("category")
tm.assert_frame_equal(exp_df, df)
df = DataFrame(
{"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
)
cats = Categorical(["a", "b", "b", "a", "a", "d"])
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
df["cats"] = df["cats"].astype("category")
tm.assert_frame_equal(exp_df, df)
# with keywords
lst = ["a", "b", "c", "a"]
s = Series(lst)
exp = Series(Categorical(lst, ordered=True))
res = s.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
res = s.astype(CategoricalDtype(list("abcdef"), ordered=True))
tm.assert_series_equal(res, exp)
def test_astype_categorical_to_other(self):
value = np.random.RandomState(0).randint(0, 10000, 100)
df = DataFrame({"value": value})
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
cat_labels = Categorical(labels, labels)
df = df.sort_values(by=["value"], ascending=True)
df["value_group"] = pd.cut(
df.value, range(0, 10500, 500), right=False, labels=cat_labels
)
s = df["value_group"]
expected = s
tm.assert_series_equal(s.astype("category"), expected)
tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
msg = r"could not convert string to float|" r"invalid literal for float\(\)"
with pytest.raises(ValueError, match=msg):
s.astype("float64")
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
tm.assert_series_equal(cat.astype("str"), exp)
s2 = Series(Categorical(["1", "2", "3", "4"]))
exp2 = Series([1, 2, 3, 4]).astype(int)
tm.assert_series_equal(s2.astype("int"), exp2)
# object don't sort correctly, so just compare that we have the same
# values
def cmp(a, b):
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
expected = Series(np.array(s.values), name="value_group")
cmp(s.astype("object"), expected)
cmp(s.astype(np.object_), expected)
# array conversion
tm.assert_almost_equal(np.array(s), np.array(s.values))
# valid conversion
for valid in [
lambda x: x.astype("category"),
lambda x: x.astype(CategoricalDtype()),
lambda x: x.astype("object").astype("category"),
lambda x: x.astype("object").astype(CategoricalDtype()),
]:
result = valid(s)
# compare series values
# internal .categories can't be compared because it is sorted
tm.assert_series_equal(result, s, check_categorical=False)
# invalid conversion (these are NOT a dtype)
msg = (
r"invalid type <class 'pandas\.core\.arrays\.categorical\."
"Categorical'> for astype"
)
for invalid in [
lambda x: x.astype(Categorical),
lambda x: x.astype("object").astype(Categorical),
]:
with pytest.raises(TypeError, match=msg):
invalid(s)
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH 10696/18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
s = Series(s_data, dtype=s_dtype, name=name)
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = s.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = s.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
if dtype_ordered is False:
# not specifying ordered, so only test once
expected = s
result = s.astype("category")
tm.assert_series_equal(result, expected)
def test_astype_categoricaldtype(self):
s = Series(["a", "b", "a"])
result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)
result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)
result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype):
# see gh-15524, gh-15987
data = [1]
s = Series(data)
msg = (
r"The '{dtype}' dtype has no unit\. "
r"Please pass in '{dtype}\[ns\]' instead."
).format(dtype=dtype.__name__)
with pytest.raises(ValueError, match=msg):
s.astype(dtype)
@pytest.mark.parametrize("dtype", np.typecodes["All"])
def test_astype_empty_constructor_equality(self, dtype):
# see gh-15524
if dtype not in (
"S",
"V", # poor support (if any) currently
"M",
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_complex(self):
# see gh-4819: complex access for ndarray compat
a = np.arange(5, dtype=np.float64)
b = Series(a + 4j * a)
tm.assert_numpy_array_equal(a, np.real(b))
tm.assert_numpy_array_equal(4 * a, np.imag(b))
b.real = np.arange(5) + 5
tm.assert_numpy_array_equal(a + 5, np.real(b))
tm.assert_numpy_array_equal(4 * a, np.imag(b))
def test_real_imag_deprecated(self):
# GH 18262
s = pd.Series([1])
with tm.assert_produces_warning(FutureWarning):
s.imag
s.real
def test_arg_for_errors_in_astype(self):
# see gh-14878
s = Series([1, 2, 3])
msg = (
r"Expected value of kwarg 'errors' to be one of \['raise',"
r" 'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
s.astype(np.float64, errors=False)
s.astype(np.int8, errors="raise")
def test_intercept_astype_object(self):
series = Series(date_range("1/1/2000", periods=10))
# This test no longer makes sense, as
# Series is by default already M8[ns].
expected = series.astype("object")
df = DataFrame({"a": series, "b": np.random.randn(len(series))})
exp_dtypes = Series(
[np.dtype("datetime64[ns]"), np.dtype("float64")], index=["a", "b"]
)
tm.assert_series_equal(df.dtypes, exp_dtypes)
result = df.values.squeeze()
assert (result[:, 0] == expected.values).all()
df = DataFrame({"a": series, "b": ["foo"] * len(series)})
result = df.values.squeeze()
assert (result[:, 0] == expected.values).all()
def test_series_to_categorical(self):
# see gh-16524: test conversion of Series to Categorical
series = Series(["a", "b", "c"])
result = Series(series, dtype="category")
expected = Series(["a", "b", "c"], dtype="category")
tm.assert_series_equal(result, expected)
def test_infer_objects_series(self):
# GH 11221
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = Series([1, 2, 3])
tm.assert_series_equal(actual, expected)
actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = Series([1.0, 2.0, 3.0, np.nan])
tm.assert_series_equal(actual, expected)
# only soft conversions, unconvertable pass thru unchanged
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
expected = Series([1, 2, 3, None, "a"])
assert actual.dtype == "object"
tm.assert_series_equal(actual, expected)
def test_is_homogeneous_type(self):
assert Series()._is_homogeneous_type
assert Series([1, 2])._is_homogeneous_type
assert Series(pd.Categorical([1, 2]))._is_homogeneous_type
@pytest.mark.parametrize(
"data",
[
pd.period_range("2000", periods=4),
pd.IntervalIndex.from_breaks([1, 2, 3, 4]),
],
)
def test_values_compatibility(self, data):
# https://github.com/pandas-dev/pandas/issues/23995
result = pd.Series(data).values
expected = np.array(data.astype(object))
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,160 @@
import numpy as np
import pytest
from pandas import Categorical, Series
import pandas.util.testing as tm
def test_value_counts_nunique():
# basics.rst doc example
series = Series(np.random.randn(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11
# GH 18051
s = Series(Categorical([]))
assert s.nunique() == 0
s = Series(Categorical([np.nan]))
assert s.nunique() == 0
def test_unique():
# GH714 also, dtype=float
s = Series([1.2345] * 100)
s[::2] = np.nan
result = s.unique()
assert len(result) == 2
s = Series([1.2345] * 100, dtype="f4")
s[::2] = np.nan
result = s.unique()
assert len(result) == 2
# NAs in object arrays #714
s = Series(["foo"] * 100, dtype="O")
s[::2] = np.nan
result = s.unique()
assert len(result) == 2
# decision about None
s = Series([1, 2, 3, None, None, None], dtype=object)
result = s.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)
# GH 18051
s = Series(Categorical([]))
tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False)
s = Series(Categorical([np.nan]))
tm.assert_categorical_equal(s.unique(), Categorical([np.nan]), check_dtype=False)
def test_unique_data_ownership():
# it works! #1807
Series(Series(["a", "c", "b"]).unique()).sort_values()
@pytest.mark.parametrize(
"data, expected",
[
(np.random.randint(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(["foo", "bar", np.nan], True),
(["foo", "foo", np.nan], False),
(["foo", "bar", np.nan, np.nan], False),
],
)
def test_is_unique(data, expected):
# GH11946 / GH25180
s = Series(data)
assert s.is_unique is expected
def test_is_unique_class_ne(capsys):
# GH 20661
class Foo:
def __init__(self, val):
self._value = val
def __ne__(self, other):
raise Exception("NEQ not supported")
with capsys.disabled():
li = [Foo(i) for i in range(5)]
s = Series(li, index=[i for i in range(5)])
s.is_unique
captured = capsys.readouterr()
assert len(captured.err) == 0
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, False, False, True, True, False])),
("last", Series([False, True, True, False, False, False, False])),
(False, Series([False, True, True, False, True, True, False])),
],
)
def test_drop_duplicates(any_numpy_dtype, keep, expected):
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
if tc.dtype == "bool":
pytest.skip("tested separately in test_drop_duplicates_bool")
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep=keep, inplace=True)
tm.assert_series_equal(sc, tc[~expected])
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, True])),
("last", Series([True, True, False, False])),
(False, Series([True, True, True, True])),
],
)
def test_drop_duplicates_bool(keep, expected):
tc = Series([True, False, True, False])
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep=keep, inplace=True)
tm.assert_series_equal(sc, tc[~expected])
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True], name="name")),
("last", Series([True, True, False, False, False], name="name")),
(False, Series([True, True, True, False, True], name="name")),
],
)
def test_duplicated_keep(keep, expected):
s = Series(["a", "b", "b", "c", "a"], name="name")
result = s.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True])),
("last", Series([True, True, False, False, False])),
(False, Series([True, True, True, False, True])),
],
)
def test_duplicated_nan_none(keep, expected):
s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
result = s.duplicated(keep=keep)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,113 @@
import numpy as np
import pytest
import pandas as pd
from pandas.util import testing as tm
def test_basic():
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_mixed_type():
s = pd.Series(
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
)
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
index=[0, 0, 0, 1, 2, 3, 4, 4],
dtype=object,
name="foo",
)
tm.assert_series_equal(result, expected)
def test_empty():
s = pd.Series()
result = s.explode()
expected = s.copy()
tm.assert_series_equal(result, expected)
def test_nested_lists():
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
result = s.explode()
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
tm.assert_series_equal(result, expected)
def test_multi_index():
s = pd.Series(
[[0, 1, 2], np.nan, [], (3, 4)],
name="foo",
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
)
result = s.explode()
index = pd.MultiIndex.from_tuples(
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
names=["foo", "bar"],
)
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_large():
s = pd.Series([range(256)]).explode()
result = s.explode()
tm.assert_series_equal(result, s)
def test_invert_array():
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
listify = df.apply(lambda x: x.array, axis=1)
result = listify.explode()
tm.assert_series_equal(result, df["a"].rename())
@pytest.mark.parametrize(
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
)
def non_object_dtype(s):
result = s.explode()
tm.assert_series_equal(result, s)
def test_typical_usecase():
df = pd.DataFrame(
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
columns=["var1", "var2"],
)
exploded = df.var1.str.split(",").explode()
exploded
result = df[["var2"]].join(exploded)
expected = pd.DataFrame(
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
columns=["var2", "var1"],
index=[0, 0, 0, 1, 1, 1],
)
tm.assert_frame_equal(result, expected)
def test_nested_EA():
# a nested EA array
s = pd.Series(
[
pd.date_range("20170101", periods=3, tz="UTC"),
pd.date_range("20170104", periods=3, tz="UTC"),
]
)
result = s.explode()
expected = pd.Series(
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,252 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import NaT, Series, Timestamp
from pandas.core.internals.blocks import IntBlock
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
class TestSeriesInternals:
# GH 10265
def test_convert(self):
# Tests: All to nans, coerce, true
# Test coercion returns correct type
s = Series(["a", "b", "c"])
results = s._convert(datetime=True, coerce=True)
expected = Series([NaT] * 3)
assert_series_equal(results, expected)
results = s._convert(numeric=True, coerce=True)
expected = Series([np.nan] * 3)
assert_series_equal(results, expected)
expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
results = s._convert(timedelta=True, coerce=True)
assert_series_equal(results, expected)
dt = datetime(2001, 1, 1, 0, 0)
td = dt - datetime(2000, 1, 1, 0, 0)
# Test coercion with mixed types
s = Series(["a", "3.1415", dt, td])
results = s._convert(datetime=True, coerce=True)
expected = Series([NaT, NaT, dt, NaT])
assert_series_equal(results, expected)
results = s._convert(numeric=True, coerce=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
assert_series_equal(results, expected)
results = s._convert(timedelta=True, coerce=True)
expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
assert_series_equal(results, expected)
# Test standard conversion returns original
results = s._convert(datetime=True)
assert_series_equal(results, s)
results = s._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
assert_series_equal(results, expected)
results = s._convert(timedelta=True)
assert_series_equal(results, s)
# test pass-through and non-conversion when other types selected
s = Series(["1.0", "2.0", "3.0"])
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([1.0, 2.0, 3.0])
assert_series_equal(results, expected)
results = s._convert(True, False, True)
assert_series_equal(results, s)
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O")
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
assert_series_equal(results, expected)
results = s._convert(datetime=False, numeric=True, timedelta=True)
assert_series_equal(results, s)
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
s = Series([td, td], dtype="O")
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([td, td])
assert_series_equal(results, expected)
results = s._convert(True, True, False)
assert_series_equal(results, s)
s = Series([1.0, 2, 3], index=["a", "b", "c"])
result = s._convert(numeric=True)
assert_series_equal(result, s)
# force numeric conversion
r = s.copy().astype("O")
r["a"] = "1"
result = r._convert(numeric=True)
assert_series_equal(result, s)
r = s.copy().astype("O")
r["a"] = "1."
result = r._convert(numeric=True)
assert_series_equal(result, s)
r = s.copy().astype("O")
r["a"] = "garbled"
result = r._convert(numeric=True)
expected = s.copy()
expected["a"] = np.nan
assert_series_equal(result, expected)
# GH 4119, not converting a mixed type (e.g.floats and object)
s = Series([1, "na", 3, 4])
result = s._convert(datetime=True, numeric=True)
expected = Series([1, np.nan, 3, 4])
assert_series_equal(result, expected)
s = Series([1, "", 3, 4])
result = s._convert(datetime=True, numeric=True)
assert_series_equal(result, expected)
# dates
s = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
]
)
s2 = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
"foo",
1.0,
1,
Timestamp("20010104"),
"20010105",
],
dtype="O",
)
result = s._convert(datetime=True)
expected = Series(
[Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
dtype="M8[ns]",
)
assert_series_equal(result, expected)
result = s._convert(datetime=True, coerce=True)
assert_series_equal(result, expected)
expected = Series(
[
Timestamp("20010101"),
Timestamp("20010102"),
Timestamp("20010103"),
NaT,
NaT,
NaT,
Timestamp("20010104"),
Timestamp("20010105"),
],
dtype="M8[ns]",
)
result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
assert_series_equal(result, expected)
result = s2._convert(datetime=True, coerce=True)
assert_series_equal(result, expected)
s = Series(["foo", "bar", 1, 1.0], dtype="O")
result = s._convert(datetime=True, coerce=True)
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
assert_series_equal(result, expected)
# preserver if non-object
s = Series([1], dtype="float32")
result = s._convert(datetime=True, coerce=True)
assert_series_equal(result, s)
# r = s.copy()
# r[0] = np.nan
# result = r._convert(convert_dates=True,convert_numeric=False)
# assert result.dtype == 'M8[ns]'
# dateutil parses some single letters into today's value as a date
expected = Series([NaT])
for x in "abcdefghijklmnopqrstuvwxyz":
s = Series([x])
result = s._convert(datetime=True, coerce=True)
assert_series_equal(result, expected)
s = Series([x.upper()])
result = s._convert(datetime=True, coerce=True)
assert_series_equal(result, expected)
def test_convert_no_arg_error(self):
s = Series(["1.0", "2"])
msg = r"At least one of datetime, numeric or timedelta must be True\."
with pytest.raises(ValueError, match=msg):
s._convert()
def test_convert_preserve_bool(self):
s = Series([1, True, 3, 5], dtype=object)
r = s._convert(datetime=True, numeric=True)
e = Series([1, 1, 3, 5], dtype="i8")
tm.assert_series_equal(r, e)
def test_convert_preserve_all_bool(self):
s = Series([False, True, False, False], dtype=object)
r = s._convert(datetime=True, numeric=True)
e = Series([False, True, False, False], dtype=bool)
tm.assert_series_equal(r, e)
def test_constructor_no_pandas_array(self):
ser = pd.Series([1, 2, 3])
result = pd.Series(ser.array)
tm.assert_series_equal(ser, result)
assert isinstance(result._data.blocks[0], IntBlock)
def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = pd.Series([1, 2], dtype="int64")
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
# which is a PandasDtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)
def test_from_array(self):
result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
assert result._data.blocks[0].is_extension is False
result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]"))
assert result._data.blocks[0].is_extension is False
def test_from_list_dtype(self):
result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]")
assert result._data.blocks[0].is_extension is False
result = pd.Series(["2015"], dtype="datetime64[ns]")
assert result._data.blocks[0].is_extension is False
def test_hasnans_unchached_for_series():
# GH#19700
idx = pd.Index([0, 1])
assert idx.hasnans is False
assert "hasnans" in idx._cache
ser = idx.to_series()
assert ser.hasnans is False
assert not hasattr(ser, "_cache")
ser.iloc[-1] = np.nan
assert ser.hasnans is True
assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__
def test_put_deprecated():
# GH 18262
s = pd.Series([1])
with tm.assert_produces_warning(FutureWarning):
s.put(0, 0)

View File

@@ -0,0 +1,272 @@
import collections
from datetime import datetime
from io import StringIO
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_series_equal,
ensure_clean,
)
from pandas.io.common import _get_handle
class TestSeriesToCSV:
def read_csv(self, path, **kwargs):
params = dict(squeeze=True, index_col=0, header=None, parse_dates=True)
params.update(**kwargs)
header = params.get("header")
out = pd.read_csv(path, **params)
if header is None:
out.name = out.index.name = None
return out
@pytest.mark.parametrize("arg", ["path", "header", "both"])
def test_to_csv_deprecation(self, arg, datetime_series):
# see gh-19715
with ensure_clean() as path:
if arg == "path":
kwargs = dict(path=path, header=False)
elif arg == "header":
kwargs = dict(path_or_buf=path)
else: # Both discrepancies match.
kwargs = dict(path=path)
with tm.assert_produces_warning(FutureWarning):
datetime_series.to_csv(**kwargs)
# Make sure roundtrip still works.
ts = self.read_csv(path)
assert_series_equal(datetime_series, ts, check_names=False)
def test_from_csv(self, datetime_series, string_series):
with ensure_clean() as path:
datetime_series.to_csv(path, header=False)
ts = self.read_csv(path)
assert_series_equal(datetime_series, ts, check_names=False)
assert ts.name is None
assert ts.index.name is None
# see gh-10483
datetime_series.to_csv(path, header=True)
ts_h = self.read_csv(path, header=0)
assert ts_h.name == "ts"
string_series.to_csv(path, header=False)
series = self.read_csv(path)
assert_series_equal(string_series, series, check_names=False)
assert series.name is None
assert series.index.name is None
string_series.to_csv(path, header=True)
series_h = self.read_csv(path, header=0)
assert series_h.name == "series"
with open(path, "w") as outfile:
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
series = self.read_csv(path, sep="|")
check_series = Series(
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
)
assert_series_equal(check_series, series)
series = self.read_csv(path, sep="|", parse_dates=False)
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
assert_series_equal(check_series, series)
def test_to_csv(self, datetime_series):
import io
with ensure_clean() as path:
datetime_series.to_csv(path, header=False)
with io.open(path, newline=None) as f:
lines = f.readlines()
assert lines[1] != "\n"
datetime_series.to_csv(path, index=False, header=False)
arr = np.loadtxt(path)
assert_almost_equal(arr, datetime_series.values)
def test_to_csv_unicode_index(self):
buf = StringIO()
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
s.to_csv(buf, encoding="UTF-8", header=False)
buf.seek(0)
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
assert_series_equal(s, s2)
def test_to_csv_float_format(self):
with ensure_clean() as filename:
ser = Series([0.123456, 0.234567, 0.567567])
ser.to_csv(filename, float_format="%.2f", header=False)
rs = self.read_csv(filename)
xp = Series([0.12, 0.23, 0.57])
assert_series_equal(rs, xp)
def test_to_csv_list_entries(self):
s = Series(["jack and jill", "jesse and frank"])
split = s.str.split(r"\s+and\s+")
buf = StringIO()
split.to_csv(buf, header=False)
def test_to_csv_path_is_none(self):
# GH 8215
# Series.to_csv() was returning None, inconsistent with
# DataFrame.to_csv() which returned string
s = Series([1, 2, 3])
csv_str = s.to_csv(path_or_buf=None, header=False)
assert isinstance(csv_str, str)
@pytest.mark.parametrize(
"s,encoding",
[
(
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
None,
),
# GH 21241, 21118
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
(Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
],
)
def test_to_csv_compression(self, s, encoding, compression):
with ensure_clean() as filename:
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
# test the round trip - to_csv -> read_csv
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
squeeze=True,
)
assert_series_equal(s, result)
# test the round trip using file handle - to_csv -> read_csv
f, _handles = _get_handle(
filename, "w", compression=compression, encoding=encoding
)
with f:
s.to_csv(f, encoding=encoding, header=True)
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
squeeze=True,
)
assert_series_equal(s, result)
# explicitly ensure file was compressed
with tm.decompress_file(filename, compression) as fh:
text = fh.read().decode(encoding or "utf8")
assert s.name in text
with tm.decompress_file(filename, compression) as fh:
assert_series_equal(
s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding)
)
class TestSeriesIO:
def test_to_frame(self, datetime_series):
datetime_series.name = None
rs = datetime_series.to_frame()
xp = pd.DataFrame(datetime_series.values, index=datetime_series.index)
assert_frame_equal(rs, xp)
datetime_series.name = "testname"
rs = datetime_series.to_frame()
xp = pd.DataFrame(
dict(testname=datetime_series.values), index=datetime_series.index
)
assert_frame_equal(rs, xp)
rs = datetime_series.to_frame(name="testdifferent")
xp = pd.DataFrame(
dict(testdifferent=datetime_series.values), index=datetime_series.index
)
assert_frame_equal(rs, xp)
def test_timeseries_periodindex(self):
# GH2891
from pandas import period_range
prng = period_range("1/1/2011", "1/1/2012", freq="M")
ts = Series(np.random.randn(len(prng)), prng)
new_ts = tm.round_trip_pickle(ts)
assert new_ts.index.freq == "M"
def test_pickle_preserve_name(self):
for n in [777, 777.0, "name", datetime(2001, 11, 11), (1, 2)]:
unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
assert unpickled.name == n
def test_pickle_categorical_ordered_from_sentinel(self):
# GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
s = Series(["a", "b", "c", "a"], dtype="category")
result = tm.round_trip_pickle(s)
result = result.astype("category")
tm.assert_series_equal(result, s)
assert result.dtype._ordered_from_sentinel is False
def _pickle_roundtrip_name(self, obj):
with ensure_clean() as path:
obj.to_pickle(path)
unpickled = pd.read_pickle(path)
return unpickled
def test_to_frame_expanddim(self):
# GH 9762
class SubclassedSeries(Series):
@property
def _constructor_expanddim(self):
return SubclassedFrame
class SubclassedFrame(DataFrame):
pass
s = SubclassedSeries([1, 2, 3], name="X")
result = s.to_frame()
assert isinstance(result, SubclassedFrame)
expected = SubclassedFrame({"X": [1, 2, 3]})
assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
)
def test_to_dict(self, mapping, datetime_series):
# GH16122
tm.assert_series_equal(
Series(datetime_series.to_dict(mapping), name="ts"), datetime_series
)
from_method = Series(datetime_series.to_dict(collections.Counter))
from_constructor = Series(collections.Counter(datetime_series.items()))
tm.assert_series_equal(from_method, from_constructor)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,794 @@
from datetime import datetime, timedelta
import operator
import numpy as np
import pytest
import pandas as pd
from pandas import Categorical, DataFrame, Index, Series, bdate_range, date_range, isna
from pandas.core import ops
from pandas.core.indexes.base import InvalidIndexError
import pandas.core.nanops as nanops
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_index_equal,
assert_series_equal,
)
from .common import TestData
class TestSeriesLogicalOps:
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
def test_bool_operators_with_nas(self, bool_op):
# boolean &, |, ^ should work with object arrays and propagate NAs
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
ser[::2] = np.nan
mask = ser.isna()
filled = ser.fillna(ser[0])
result = bool_op(ser < ser[9], ser > ser[3])
expected = bool_op(filled < filled[9], filled > filled[3])
expected[mask] = False
assert_series_equal(result, expected)
def test_operators_bitwise(self):
# GH#9016: support bitwise op for integer types
index = list("bca")
s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_tff = Series([True, False, False], index=index)
s_empty = Series([])
# TODO: unused
# s_0101 = Series([0, 1, 0, 1])
s_0123 = Series(range(4), dtype="int64")
s_3333 = Series([3] * 4)
s_4444 = Series([4] * 4)
res = s_tft & s_empty
expected = s_fff
assert_series_equal(res, expected)
res = s_tft | s_empty
expected = s_tft
assert_series_equal(res, expected)
res = s_0123 & s_3333
expected = Series(range(4), dtype="int64")
assert_series_equal(res, expected)
res = s_0123 | s_4444
expected = Series(range(4, 8), dtype="int64")
assert_series_equal(res, expected)
s_a0b1c0 = Series([1], list("b"))
res = s_tft & s_a0b1c0
expected = s_tff.reindex(list("abc"))
assert_series_equal(res, expected)
res = s_tft | s_a0b1c0
expected = s_tft.reindex(list("abc"))
assert_series_equal(res, expected)
n0 = 0
res = s_tft & n0
expected = s_fff
assert_series_equal(res, expected)
res = s_0123 & n0
expected = Series([0] * 4)
assert_series_equal(res, expected)
n1 = 1
res = s_tft & n1
expected = s_tft
assert_series_equal(res, expected)
res = s_0123 & n1
expected = Series([0, 1, 0, 1])
assert_series_equal(res, expected)
s_1111 = Series([1] * 4, dtype="int8")
res = s_0123 & s_1111
expected = Series([0, 1, 0, 1], dtype="int64")
assert_series_equal(res, expected)
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
expected = Series([1, 1, 3, 3], dtype="int32")
assert_series_equal(res, expected)
with pytest.raises(TypeError):
s_1111 & "a"
with pytest.raises(TypeError):
s_1111 & ["a", "b", "c", "d"]
with pytest.raises(TypeError):
s_0123 & np.NaN
with pytest.raises(TypeError):
s_0123 & 3.14
with pytest.raises(TypeError):
s_0123 & [0.1, 4, 3.14, 2]
# s_0123 will be all false now because of reindexing like s_tft
exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
assert_series_equal(s_tft & s_0123, exp)
# s_tft will be all false now because of reindexing like s_0123
exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
assert_series_equal(s_0123 & s_tft, exp)
assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
assert_series_equal(s_0123 & [False], Series([False] * 4))
assert_series_equal(s_0123 & (False), Series([False] * 4))
assert_series_equal(
s_0123 & Series([False, np.NaN, False, False]), Series([False] * 4)
)
s_ftft = Series([False, True, False, True])
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)
s_abNd = Series(["a", "b", np.NaN, "d"])
res = s_0123 & s_abNd
expected = s_ftft
assert_series_equal(res, expected)
def test_scalar_na_logical_ops_corners(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
with pytest.raises(TypeError):
s & datetime(2005, 1, 1)
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan
expected = Series(True, index=s.index)
expected[::2] = False
result = s & list(s)
assert_series_equal(result, expected)
d = DataFrame({"A": s})
# TODO: Fix this exception - needs to be fixed! (see GH5035)
# (previously this was a TypeError because series returned
# NotImplemented
# this is an alignment issue; these are equivalent
# https://github.com/pandas-dev/pandas/issues/5284
with pytest.raises(TypeError):
d.__and__(s, axis="columns")
with pytest.raises(TypeError):
s & d
# this is wrong as its not a boolean result
# result = d.__and__(s,axis='index')
@pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
def test_logical_ops_with_index(self, op):
# GH#22092, GH#19792
ser = Series([True, True, False, False])
idx1 = Index([True, False, True, False])
idx2 = Index([1, 0, 1, 0])
expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
result = op(ser, idx1)
assert_series_equal(result, expected)
expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool)
result = op(ser, idx2)
assert_series_equal(result, expected)
@pytest.mark.parametrize(
"op",
[
pytest.param(
ops.rand_,
marks=pytest.mark.xfail(
reason="GH#22092 Index implementation returns Index",
raises=AssertionError,
strict=True,
),
),
pytest.param(
ops.ror_,
marks=pytest.mark.xfail(
reason="Index.get_indexer with non unique index",
raises=InvalidIndexError,
strict=True,
),
),
ops.rxor,
],
)
def test_reversed_logical_ops_with_index(self, op):
# GH#22092, GH#19792
ser = Series([True, True, False, False])
idx1 = Index([True, False, True, False])
idx2 = Index([1, 0, 1, 0])
# symmetric_difference is only for rxor, but other 2 should fail
expected = idx1.symmetric_difference(ser)
result = op(ser, idx1)
assert_index_equal(result, expected)
expected = idx2.symmetric_difference(ser)
result = op(ser, idx2)
assert_index_equal(result, expected)
@pytest.mark.parametrize(
"op, expected",
[
(ops.rand_, pd.Index([False, True])),
(ops.ror_, pd.Index([False, True])),
(ops.rxor, pd.Index([])),
],
)
def test_reverse_ops_with_index(self, op, expected):
# https://github.com/pandas-dev/pandas/pull/23628
# multi-set Index ops are buggy, so let's avoid duplicates...
ser = Series([True, False])
idx = Index([False, True])
result = op(ser, idx)
tm.assert_index_equal(result, expected)
def test_logical_ops_label_based(self):
# GH#4947
# logical ops should be label based
a = Series([True, False, True], list("bca"))
b = Series([False, True, False], list("abc"))
expected = Series([False, True, False], list("abc"))
result = a & b
assert_series_equal(result, expected)
expected = Series([True, True, False], list("abc"))
result = a | b
assert_series_equal(result, expected)
expected = Series([True, False, False], list("abc"))
result = a ^ b
assert_series_equal(result, expected)
# rhs is bigger
a = Series([True, False, True], list("bca"))
b = Series([False, True, False, True], list("abcd"))
expected = Series([False, True, False, False], list("abcd"))
result = a & b
assert_series_equal(result, expected)
expected = Series([True, True, False, False], list("abcd"))
result = a | b
assert_series_equal(result, expected)
# filling
# vs empty
result = a & Series([])
expected = Series([False, False, False], list("bca"))
assert_series_equal(result, expected)
result = a | Series([])
expected = Series([True, False, True], list("bca"))
assert_series_equal(result, expected)
# vs non-matching
result = a & Series([1], ["z"])
expected = Series([False, False, False, False], list("abcz"))
assert_series_equal(result, expected)
result = a | Series([1], ["z"])
expected = Series([True, True, False, False], list("abcz"))
assert_series_equal(result, expected)
# identity
# we would like s[s|e] == s to hold for any e, whether empty or not
for e in [
Series([]),
Series([1], ["z"]),
Series(np.nan, b.index),
Series(np.nan, a.index),
]:
result = a[a | e]
assert_series_equal(result, a[a])
for e in [Series(["z"])]:
result = a[a | e]
assert_series_equal(result, a[a])
# vs scalars
index = list("bca")
t = Series([True, False, True])
for v in [True, 1, 2]:
result = Series([True, False, True], index=index) | v
expected = Series([True, True, True], index=index)
assert_series_equal(result, expected)
for v in [np.nan, "foo"]:
with pytest.raises(TypeError):
t | v
for v in [False, 0]:
result = Series([True, False, True], index=index) | v
expected = Series([True, False, True], index=index)
assert_series_equal(result, expected)
for v in [True, 1]:
result = Series([True, False, True], index=index) & v
expected = Series([True, False, True], index=index)
assert_series_equal(result, expected)
for v in [False, 0]:
result = Series([True, False, True], index=index) & v
expected = Series([False, False, False], index=index)
assert_series_equal(result, expected)
for v in [np.nan]:
with pytest.raises(TypeError):
t & v
def test_logical_ops_df_compat(self):
# GH#1134
s1 = pd.Series([True, False, True], index=list("ABC"), name="x")
s2 = pd.Series([True, True, False], index=list("ABD"), name="x")
exp = pd.Series([True, False, False, False], index=list("ABCD"), name="x")
assert_series_equal(s1 & s2, exp)
assert_series_equal(s2 & s1, exp)
# True | np.nan => True
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s1 | s2, exp)
# np.nan | True => np.nan, filled with False
exp = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
assert_series_equal(s2 | s1, exp)
# DataFrame doesn't fill nan with False
exp = pd.DataFrame({"x": [True, False, np.nan, np.nan]}, index=list("ABCD"))
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
# different length
s3 = pd.Series([True, False, True], index=list("ABC"), name="x")
s4 = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
exp = pd.Series([True, False, True, False], index=list("ABCD"), name="x")
assert_series_equal(s3 & s4, exp)
assert_series_equal(s4 & s3, exp)
# np.nan | True => np.nan, filled with False
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s3 | s4, exp)
# True | np.nan => True
exp = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
assert_series_equal(s4 | s3, exp)
exp = pd.DataFrame({"x": [True, False, True, np.nan]}, index=list("ABCD"))
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
exp = pd.DataFrame({"x": [True, True, True, np.nan]}, index=list("ABCD"))
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
class TestSeriesComparisons:
def test_comparisons(self):
left = np.random.randn(10)
right = np.random.randn(10)
left[:3] = np.nan
result = nanops.nangt(left, right)
with np.errstate(invalid="ignore"):
expected = (left > right).astype("O")
expected[:3] = np.nan
assert_almost_equal(result, expected)
s = Series(["a", "b", "c"])
s2 = Series([False, True, False])
# it works!
exp = Series([False, False, False])
assert_series_equal(s == s2, exp)
assert_series_equal(s2 == s, exp)
def test_categorical_comparisons(self):
# GH 8938
# allow equality comparisons
a = Series(list("abc"), dtype="category")
b = Series(list("abc"), dtype="object")
c = Series(["a", "b", "cc"], dtype="object")
d = Series(list("acb"), dtype="object")
e = Categorical(list("abc"))
f = Categorical(list("acb"))
# vs scalar
assert not (a == "a").all()
assert ((a != "a") == ~(a == "a")).all()
assert not ("a" == a).all()
assert (a == "a")[0]
assert ("a" == a)[0]
assert not ("a" != a)[0]
# vs list-like
assert (a == a).all()
assert not (a != a).all()
assert (a == list(a)).all()
assert (a == b).all()
assert (b == a).all()
assert ((~(a == b)) == (a != b)).all()
assert ((~(b == a)) == (b != a)).all()
assert not (a == c).all()
assert not (c == a).all()
assert not (a == d).all()
assert not (d == a).all()
# vs a cat-like
assert (a == e).all()
assert (e == a).all()
assert not (a == f).all()
assert not (f == a).all()
assert (~(a == e) == (a != e)).all()
assert (~(e == a) == (e != a)).all()
assert (~(a == f) == (a != f)).all()
assert (~(f == a) == (f != a)).all()
# non-equality is not comparable
with pytest.raises(TypeError):
a < b
with pytest.raises(TypeError):
b < a
with pytest.raises(TypeError):
a > b
with pytest.raises(TypeError):
b > a
def test_comparison_tuples(self):
# GH11339
# comparisons vs tuple
s = Series([(1, 1), (1, 2)])
result = s == (1, 2)
expected = Series([False, True])
assert_series_equal(result, expected)
result = s != (1, 2)
expected = Series([True, False])
assert_series_equal(result, expected)
result = s == (0, 0)
expected = Series([False, False])
assert_series_equal(result, expected)
result = s != (0, 0)
expected = Series([True, True])
assert_series_equal(result, expected)
s = Series([(1, 1), (1, 1)])
result = s == (1, 1)
expected = Series([True, True])
assert_series_equal(result, expected)
result = s != (1, 1)
expected = Series([False, False])
assert_series_equal(result, expected)
s = Series([frozenset([1]), frozenset([1, 2])])
result = s == frozenset([1])
expected = Series([True, False])
assert_series_equal(result, expected)
def test_comparison_operators_with_nas(self):
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
ser[::2] = np.nan
# test that comparisons work
ops = ["lt", "le", "gt", "ge", "eq", "ne"]
for op in ops:
val = ser[5]
f = getattr(operator, op)
result = f(ser, val)
expected = f(ser.dropna(), val).reindex(ser.index)
if op == "ne":
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)
assert_series_equal(result, expected)
# fffffffuuuuuuuuuuuu
# result = f(val, s)
# expected = f(val, s.dropna()).reindex(s.index)
# assert_series_equal(result, expected)
def test_unequal_categorical_comparison_raises_type_error(self):
# unequal comparison should raise for unordered cats
cat = Series(Categorical(list("abc")))
with pytest.raises(TypeError):
cat > "b"
cat = Series(Categorical(list("abc"), ordered=False))
with pytest.raises(TypeError):
cat > "b"
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
# and following comparisons with scalars not in categories should raise
# for unequal comps, but not for equal/not equal
cat = Series(Categorical(list("abc"), ordered=True))
with pytest.raises(TypeError):
cat < "d"
with pytest.raises(TypeError):
cat > "d"
with pytest.raises(TypeError):
"d" < cat
with pytest.raises(TypeError):
"d" > cat
tm.assert_series_equal(cat == "d", Series([False, False, False]))
tm.assert_series_equal(cat != "d", Series([True, True, True]))
def test_ne(self):
ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float)
expected = [True, True, False, True, True]
assert tm.equalContents(ts.index != 5, expected)
assert tm.equalContents(~(ts.index == 5), expected)
def test_comp_ops_df_compat(self):
# GH 1134
s1 = pd.Series([1, 2, 3], index=list("ABC"), name="x")
s2 = pd.Series([2, 2, 2], index=list("ABD"), name="x")
s3 = pd.Series([1, 2, 3], index=list("ABC"), name="x")
s4 = pd.Series([2, 2, 2, 2], index=list("ABCD"), name="x")
for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]:
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
left == right
with pytest.raises(ValueError, match=msg):
left != right
with pytest.raises(ValueError, match=msg):
left < right
msg = "Can only compare identically-labeled DataFrame objects"
with pytest.raises(ValueError, match=msg):
left.to_frame() == right.to_frame()
with pytest.raises(ValueError, match=msg):
left.to_frame() != right.to_frame()
with pytest.raises(ValueError, match=msg):
left.to_frame() < right.to_frame()
def test_compare_series_interval_keyword(self):
# GH 25338
s = Series(["IntervalA", "IntervalB", "IntervalC"])
result = s == "IntervalA"
expected = Series([True, False, False])
assert_series_equal(result, expected)
class TestSeriesFlexComparisonOps:
def test_comparison_flex_alignment(self):
left = Series([1, 3, 2], index=list("abc"))
right = Series([2, 2, 2], index=list("bcd"))
exp = pd.Series([False, False, True, False], index=list("abcd"))
assert_series_equal(left.eq(right), exp)
exp = pd.Series([True, True, False, True], index=list("abcd"))
assert_series_equal(left.ne(right), exp)
exp = pd.Series([False, False, True, False], index=list("abcd"))
assert_series_equal(left.le(right), exp)
exp = pd.Series([False, False, False, False], index=list("abcd"))
assert_series_equal(left.lt(right), exp)
exp = pd.Series([False, True, True, False], index=list("abcd"))
assert_series_equal(left.ge(right), exp)
exp = pd.Series([False, True, False, False], index=list("abcd"))
assert_series_equal(left.gt(right), exp)
def test_comparison_flex_alignment_fill(self):
left = Series([1, 3, 2], index=list("abc"))
right = Series([2, 2, 2], index=list("bcd"))
exp = pd.Series([False, False, True, True], index=list("abcd"))
assert_series_equal(left.eq(right, fill_value=2), exp)
exp = pd.Series([True, True, False, False], index=list("abcd"))
assert_series_equal(left.ne(right, fill_value=2), exp)
exp = pd.Series([False, False, True, True], index=list("abcd"))
assert_series_equal(left.le(right, fill_value=0), exp)
exp = pd.Series([False, False, False, True], index=list("abcd"))
assert_series_equal(left.lt(right, fill_value=0), exp)
exp = pd.Series([True, True, True, False], index=list("abcd"))
assert_series_equal(left.ge(right, fill_value=0), exp)
exp = pd.Series([True, True, False, False], index=list("abcd"))
assert_series_equal(left.gt(right, fill_value=0), exp)
class TestSeriesOperators(TestData):
def test_operators_empty_int_corner(self):
s1 = Series([], [], dtype=np.int32)
s2 = Series({"x": 0.0})
assert_series_equal(s1 * s2, Series([np.nan], index=["x"]))
def test_ops_datetimelike_align(self):
# GH 7500
# datetimelike ops need to align
dt = Series(date_range("2012-1-1", periods=3, freq="D"))
dt.iloc[2] = np.nan
dt2 = dt[::-1]
expected = Series([timedelta(0), timedelta(0), pd.NaT])
# name is reset
result = dt2 - dt
assert_series_equal(result, expected)
expected = Series(expected, name=0)
result = (dt2.to_frame() - dt.to_frame())[0]
assert_series_equal(result, expected)
def test_operators_corner(self):
series = self.ts
empty = Series([], index=Index([]))
result = series + empty
assert np.isnan(result).all()
result = empty + Series([], index=Index([]))
assert len(result) == 0
# TODO: this returned NotImplemented earlier, what to do?
# deltas = Series([timedelta(1)] * 5, index=np.arange(5))
# sub_deltas = deltas[::2]
# deltas5 = deltas * 5
# deltas = deltas + sub_deltas
# float + int
int_ts = self.ts.astype(int)[:-5]
added = self.ts + int_ts
expected = Series(
self.ts.values[:-5] + int_ts.values, index=self.ts.index[:-5], name="ts"
)
tm.assert_series_equal(added[:-5], expected)
pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)]
for op in ["add", "sub", "mul", "pow", "truediv", "floordiv"]:
fv = 0
lop = getattr(Series, op)
lequiv = getattr(operator, op)
rop = getattr(Series, "r" + op)
# bind op at definition time...
requiv = lambda x, y, op=op: getattr(operator, op)(y, x)
pairings.append((lop, lequiv, fv))
pairings.append((rop, requiv, fv))
@pytest.mark.parametrize("op, equiv_op, fv", pairings)
def test_operators_combine(self, op, equiv_op, fv):
def _check_fill(meth, op, a, b, fill_value=0):
exp_index = a.index.union(b.index)
a = a.reindex(exp_index)
b = b.reindex(exp_index)
amask = isna(a)
bmask = isna(b)
exp_values = []
for i in range(len(exp_index)):
with np.errstate(all="ignore"):
if amask[i]:
if bmask[i]:
exp_values.append(np.nan)
continue
exp_values.append(op(fill_value, b[i]))
elif bmask[i]:
if amask[i]:
exp_values.append(np.nan)
continue
exp_values.append(op(a[i], fill_value))
else:
exp_values.append(op(a[i], b[i]))
result = meth(a, b, fill_value=fill_value)
expected = Series(exp_values, exp_index)
assert_series_equal(result, expected)
a = Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=np.arange(5))
b = Series([np.nan, 1, np.nan, 3, np.nan, 4.0], index=np.arange(6))
result = op(a, b)
exp = equiv_op(a, b)
assert_series_equal(result, exp)
_check_fill(op, equiv_op, a, b, fill_value=fv)
# should accept axis=0 or axis='rows'
op(a, b, axis=0)
def test_operators_na_handling(self):
from decimal import Decimal
from datetime import date
s = Series(
[Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)]
)
result = s + s.shift(1)
result2 = s.shift(1) + s
assert isna(result[0])
assert isna(result2[0])
def test_op_duplicate_index(self):
# GH14227
s1 = Series([1, 2], index=[1, 1])
s2 = Series([10, 10], index=[1, 2])
result = s1 + s2
expected = pd.Series([11, 12, np.nan], index=[1, 1, 2])
assert_series_equal(result, expected)
def test_divmod(self):
# GH25557
a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"])
b = Series([2, np.nan, 1, np.nan], index=["a", "b", "d", "e"])
result = a.divmod(b)
expected = divmod(a, b)
assert_series_equal(result[0], expected[0])
assert_series_equal(result[1], expected[1])
result = a.rdivmod(b)
expected = divmod(b, a)
assert_series_equal(result[0], expected[0])
assert_series_equal(result[1], expected[1])
class TestSeriesUnaryOps:
# __neg__, __pos__, __inv__
def test_neg(self):
ser = tm.makeStringSeries()
ser.name = "series"
assert_series_equal(-ser, -1 * ser)
def test_invert(self):
ser = tm.makeStringSeries()
ser.name = "series"
assert_series_equal(-(ser < 0), ~(ser < 0))

View File

@@ -0,0 +1,171 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Period, Series, period_range
from pandas.core.arrays import PeriodArray
import pandas.util.testing as tm
class TestSeriesPeriod:
def setup_method(self, method):
self.series = Series(period_range("2000-01-01", periods=10, freq="D"))
def test_auto_conversion(self):
series = Series(list(period_range("2000-01-01", periods=10, freq="D")))
assert series.dtype == "Period[D]"
series = pd.Series(
[pd.Period("2011-01-01", freq="D"), pd.Period("2011-02-01", freq="D")]
)
assert series.dtype == "Period[D]"
def test_getitem(self):
assert self.series[1] == pd.Period("2000-01-02", freq="D")
result = self.series[[2, 4]]
exp = pd.Series(
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
index=[2, 4],
dtype="Period[D]",
)
tm.assert_series_equal(result, exp)
assert result.dtype == "Period[D]"
def test_isna(self):
# GH 13737
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
tm.assert_series_equal(s.isna(), Series([False, True]))
tm.assert_series_equal(s.notna(), Series([True, False]))
def test_fillna(self):
# GH 13737
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
res = s.fillna(pd.Period("2012-01", freq="M"))
exp = Series([pd.Period("2011-01", freq="M"), pd.Period("2012-01", freq="M")])
tm.assert_series_equal(res, exp)
assert res.dtype == "Period[M]"
def test_dropna(self):
# GH 13737
s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")])
tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")]))
def test_between(self):
left, right = self.series[[2, 7]]
result = self.series.between(left, right)
expected = (self.series >= left) & (self.series <= right)
tm.assert_series_equal(result, expected)
# ---------------------------------------------------------------------
# NaT support
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
def test_NaT_scalar(self):
series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]")
val = series[3]
assert pd.isna(val)
series[2] = val
assert pd.isna(series[2])
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
def test_NaT_cast(self):
result = Series([np.nan]).astype("period[D]")
expected = Series([pd.NaT])
tm.assert_series_equal(result, expected)
def test_set_none(self):
self.series[3] = None
assert self.series[3] is pd.NaT
self.series[3:5] = None
assert self.series[4] is pd.NaT
def test_set_nan(self):
# Do we want to allow this?
self.series[5] = np.nan
assert self.series[5] is pd.NaT
self.series[5:7] = np.nan
assert self.series[6] is pd.NaT
def test_intercept_astype_object(self):
expected = self.series.astype("object")
df = DataFrame({"a": self.series, "b": np.random.randn(len(self.series))})
result = df.values.squeeze()
assert (result[:, 0] == expected.values).all()
df = DataFrame({"a": self.series, "b": ["foo"] * len(self.series)})
result = df.values.squeeze()
assert (result[:, 0] == expected.values).all()
def test_align_series(self, join_type):
rng = period_range("1/1/2000", "1/1/2010", freq="A")
ts = Series(np.random.randn(len(rng)), index=rng)
ts.align(ts[::2], join=join_type)
def test_truncate(self):
# GH 17717
idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series1 = pd.Series([1, 2, 3], index=idx1)
result1 = series1.truncate(after="2017-09-02")
expected_idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
)
tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
idx2 = pd.PeriodIndex(
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series2 = pd.Series([1, 2, 3], index=idx2)
result2 = series2.sort_index().truncate(after="2017-09-02")
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
@pytest.mark.parametrize(
"input_vals",
[
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
[
Period("2016-01-01 00:00:00", freq="H"),
Period("2016-01-01 01:00:00", freq="H"),
],
[
Period("2016-01-01 00:00:00", freq="M"),
Period("2016-01-01 00:01:00", freq="M"),
],
[
Period("2016-01-01 00:00:00", freq="S"),
Period("2016-01-01 00:00:01", freq="S"),
],
],
)
def test_end_time_timevalues(self, input_vals):
# GH 17157
# Check that the time part of the Period is adjusted by end_time
# when using the dt accessor on a Series
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
s = Series(input_vals)
result = s.dt.end_time
expected = s.apply(lambda x: x.end_time)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
def test_to_period(self, input_vals):
# GH 21205
expected = Series([input_vals], dtype="Period[D]")
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,212 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import Index, Series
from pandas.core.indexes.datetimes import Timestamp
import pandas.util.testing as tm
from .common import TestData
class TestSeriesQuantile(TestData):
def test_quantile(self):
q = self.ts.quantile(0.1)
assert q == np.percentile(self.ts.dropna(), 10)
q = self.ts.quantile(0.9)
assert q == np.percentile(self.ts.dropna(), 90)
# object dtype
q = Series(self.ts, dtype=object).quantile(0.9)
assert q == np.percentile(self.ts.dropna(), 90)
# datetime64[ns] dtype
dts = self.ts.index.to_series()
q = dts.quantile(0.2)
assert q == Timestamp("2000-01-10 19:12:00")
# timedelta64[ns] dtype
tds = dts.diff()
q = tds.quantile(0.25)
assert q == pd.to_timedelta("24:00:00")
# GH7661
result = Series([np.timedelta64("NaT")]).sum()
assert result == pd.Timedelta(0)
msg = "percentiles should all be in the interval \\[0, 1\\]"
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
with pytest.raises(ValueError, match=msg):
self.ts.quantile(invalid)
def test_quantile_multi(self):
qs = [0.1, 0.9]
result = self.ts.quantile(qs)
expected = pd.Series(
[np.percentile(self.ts.dropna(), 10), np.percentile(self.ts.dropna(), 90)],
index=qs,
name=self.ts.name,
)
tm.assert_series_equal(result, expected)
dts = self.ts.index.to_series()
dts.name = "xxx"
result = dts.quantile((0.2, 0.2))
expected = Series(
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
index=[0.2, 0.2],
name="xxx",
)
tm.assert_series_equal(result, expected)
result = self.ts.quantile([])
expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float))
tm.assert_series_equal(result, expected)
def test_quantile_interpolation(self):
# see gh-10174
# interpolation = linear (default case)
q = self.ts.quantile(0.1, interpolation="linear")
assert q == np.percentile(self.ts.dropna(), 10)
q1 = self.ts.quantile(0.1)
assert q1 == np.percentile(self.ts.dropna(), 10)
# test with and without interpolation keyword
assert q == q1
def test_quantile_interpolation_dtype(self):
# GH #10174
# interpolation = linear (default case)
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="lower")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="higher")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
def test_quantile_nan(self):
# GH 13098
s = pd.Series([1, 2, 3, 4, np.nan])
result = s.quantile(0.5)
expected = 2.5
assert result == expected
# all nan/empty
cases = [Series([]), Series([np.nan, np.nan])]
for s in cases:
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
res = s.quantile([0.2, 0.3])
tm.assert_series_equal(res, pd.Series([np.nan, np.nan], index=[0.2, 0.3]))
@pytest.mark.parametrize(
"case",
[
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
],
[
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timestamp("2011-01-03", tz="US/Eastern"),
],
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
# NaT
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
pd.NaT,
],
[
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timestamp("2011-01-03", tz="US/Eastern"),
pd.NaT,
],
[
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
pd.NaT,
],
],
)
def test_quantile_box(self, case):
s = pd.Series(case, name="XXX")
res = s.quantile(0.5)
assert res == case[1]
res = s.quantile([0.5])
exp = pd.Series([case[1]], index=[0.5], name="XXX")
tm.assert_series_equal(res, exp)
def test_datetime_timedelta_quantiles(self):
# covers #9694
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
def test_quantile_nat(self):
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
assert res is pd.NaT
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
@pytest.mark.parametrize(
"values, dtype",
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
)
def test_quantile_sparse(self, values, dtype):
ser = pd.Series(values, dtype=dtype)
result = ser.quantile([0.5])
expected = pd.Series(np.asarray(ser)).quantile([0.5])
tm.assert_series_equal(result, expected)
def test_quantile_empty(self):
# floats
s = Series([], dtype="float64")
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
# int
s = Series([], dtype="int64")
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
# datetime
s = Series([], dtype="datetime64[ns]")
res = s.quantile(0.5)
assert res is pd.NaT
res = s.quantile([0.5])
exp = Series([pd.NaT], index=[0.5])
tm.assert_series_equal(res, exp)

View File

@@ -0,0 +1,567 @@
from itertools import chain, product
import numpy as np
from numpy import nan
import pytest
from pandas._libs.algos import Infinity, NegInfinity
from pandas._libs.tslib import iNaT
import pandas.util._test_decorators as td
from pandas import NaT, Series, Timestamp, date_range
from pandas.api.types import CategoricalDtype
from pandas.tests.series.common import TestData
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
class TestSeriesRank(TestData):
s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3])
results = {
"average": np.array([1.5, 5.5, 7.0, 3.5, nan, 3.5, 1.5, 8.0, nan, 5.5]),
"min": np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]),
"max": np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]),
"first": np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]),
"dense": np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]),
}
def test_rank(self):
pytest.importorskip("scipy.stats.special")
rankdata = pytest.importorskip("scipy.stats.rankdata")
self.ts[::2] = np.nan
self.ts[:10][::3] = 4.0
ranks = self.ts.rank()
oranks = self.ts.astype("O").rank()
assert_series_equal(ranks, oranks)
mask = np.isnan(self.ts)
filled = self.ts.fillna(np.inf)
# rankdata returns a ndarray
exp = Series(rankdata(filled), index=filled.index, name="ts")
exp[mask] = np.nan
tm.assert_series_equal(ranks, exp)
iseries = Series(np.arange(5).repeat(2))
iranks = iseries.rank()
exp = iseries.astype(float).rank()
assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
exp = iseries / 5.0
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries = Series(np.repeat(1, 100))
exp = Series(np.repeat(0.505, 100))
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries[1] = np.nan
exp = Series(np.repeat(50.0 / 99.0, 100))
exp[1] = np.nan
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries = Series(np.repeat(np.nan, 100))
exp = iseries.copy()
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
rng = date_range("1/1/1990", periods=5)
iseries = Series(np.arange(5), rng) + 1
iseries.iloc[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
assert_series_equal(iranks, exp)
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
exp = Series([2, 1, 3, 5, 4, 6.0])
iranks = iseries.rank()
assert_series_equal(iranks, exp)
# GH 5968
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
exp = Series([3, 2, 1, np.nan])
iranks = iseries.rank()
assert_series_equal(iranks, exp)
values = np.array(
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
dtype="float64",
)
random_order = np.random.permutation(len(values))
iseries = Series(values[random_order])
exp = Series(random_order + 1.0, dtype="float64")
iranks = iseries.rank()
assert_series_equal(iranks, exp)
def test_rank_categorical(self):
# GH issue #15420 rank incorrectly orders ordered categories
# Test ascending/descending ranking for ordered categoricals
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
ordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=True,
)
)
assert_series_equal(ordered.rank(), exp)
assert_series_equal(ordered.rank(ascending=False), exp_desc)
# Unordered categoricals should be ranked as objects
unordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=False,
)
)
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
res = unordered.rank()
assert_series_equal(res, exp_unordered)
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
)
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
res1 = unordered1.rank()
assert_series_equal(res1, exp_unordered1)
# Test na_option for rank data
na_ser = Series(
["first", "second", "third", "fourth", "fifth", "sixth", np.NaN]
).astype(
CategoricalDtype(
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
True,
)
)
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN])
assert_series_equal(na_ser.rank(na_option="top"), exp_top)
assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
# Test na_option for rank data with ascending False
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.NaN])
assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
assert_series_equal(na_ser.rank(na_option="bottom", ascending=False), exp_bot)
assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
# Test invalid values for na_option
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option="bad", ascending=False)
# invalid type
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option=True, ascending=False)
# Test with pct=True
na_ser = Series(["first", "second", "third", "fourth", np.NaN]).astype(
CategoricalDtype(["first", "second", "third", "fourth"], True)
)
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.NaN])
assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
def test_rank_signature(self):
s = Series([0, 1])
s.rank(method="average")
msg = (
"No axis named average for object type"
" <class 'pandas.core.series.Series'>"
)
with pytest.raises(ValueError, match=msg):
s.rank("average")
@pytest.mark.parametrize(
"contents,dtype",
[
(
[
-np.inf,
-50,
-1,
-1e-20,
-1e-25,
-1e-50,
0,
1e-40,
1e-20,
1e-10,
2,
40,
np.inf,
],
"float64",
),
(
[
-np.inf,
-50,
-1,
-1e-20,
-1e-25,
-1e-45,
0,
1e-40,
1e-20,
1e-10,
2,
40,
np.inf,
],
"float32",
),
([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"),
pytest.param(
[
np.iinfo(np.int64).min,
-100,
0,
1,
9999,
100000,
1e10,
np.iinfo(np.int64).max,
],
"int64",
marks=pytest.mark.xfail(
reason="iNaT is equivalent to minimum value of dtype"
"int64 pending issue GH#16674"
),
),
([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"),
],
)
def test_rank_inf(self, contents, dtype):
dtype_na_map = {
"float64": np.nan,
"float32": np.nan,
"int64": iNaT,
"object": None,
}
# Insert nans at random positions if underlying dtype has missing
# value. Then adjust the expected order by adding nans accordingly
# This is for testing whether rank calculation is affected
# when values are interwined with nan values.
values = np.array(contents, dtype=dtype)
exp_order = np.array(range(len(values)), dtype="float64") + 1.0
if dtype in dtype_na_map:
na_value = dtype_na_map[dtype]
nan_indices = np.random.choice(range(len(values)), 5)
values = np.insert(values, nan_indices, na_value)
exp_order = np.insert(exp_order, nan_indices, np.nan)
# shuffle the testing array and expected results in the same way
random_order = np.random.permutation(len(values))
iseries = Series(values[random_order])
exp = Series(exp_order[random_order], dtype="float64")
iranks = iseries.rank()
assert_series_equal(iranks, exp)
def test_rank_tie_methods(self):
s = self.s
def _check(s, expected, method="average"):
result = s.rank(method=method)
tm.assert_series_equal(result, Series(expected))
dtypes = [None, object]
disabled = {(object, "first")}
results = self.results
for method, dtype in product(results, dtypes):
if (dtype, method) in disabled:
continue
series = s if dtype is None else s.astype(dtype)
_check(series, results[method], method=method)
@td.skip_if_no_scipy
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
dtypes = [
("object", None, Infinity(), NegInfinity()),
("float64", np.nan, np.inf, -np.inf),
]
chunk = 3
disabled = {("object", "first")}
def _check(s, method, na_option, ascending):
exp_ranks = {
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
}
ranks = exp_ranks[method]
if na_option == "top":
order = [ranks[1], ranks[0], ranks[2]]
elif na_option == "bottom":
order = [ranks[0], ranks[2], ranks[1]]
else:
order = [ranks[0], [np.nan] * chunk, ranks[1]]
expected = order if ascending else order[::-1]
expected = list(chain.from_iterable(expected))
result = s.rank(method=method, na_option=na_option, ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype="float64"))
for dtype, na_value, pos_inf, neg_inf in dtypes:
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
iseries = Series(in_arr, dtype=dtype)
if (dtype, method) in disabled:
continue
_check(iseries, method, na_option, ascending)
def test_rank_desc_mix_nans_infs(self):
# GH 19538
# check descending ranking when mix nans and infs
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
result = iseries.rank(ascending=False)
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
tm.assert_series_equal(result, exp)
def test_rank_methods_series(self):
pytest.importorskip("scipy.stats.special")
rankdata = pytest.importorskip("scipy.stats.rankdata")
xs = np.random.randn(9)
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
np.random.shuffle(xs)
index = [chr(ord("a") + i) for i in range(len(xs))]
for vals in [xs, xs + 1e6, xs * 1e-6]:
ts = Series(vals, index=index)
for m in ["average", "min", "max", "first", "dense"]:
result = ts.rank(method=m)
sprank = rankdata(vals, m if m != "first" else "ordinal")
expected = Series(sprank, index=index).astype("float64")
tm.assert_series_equal(result, expected)
def test_rank_dense_method(self):
dtypes = ["O", "f8", "i8"]
in_out = [
([1], [1]),
([2], [1]),
([0], [1]),
([2, 2], [1, 1]),
([1, 2, 3], [1, 2, 3]),
([4, 2, 1], [3, 2, 1]),
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
]
for ser, exp in in_out:
for dtype in dtypes:
s = Series(ser).astype(dtype)
result = s.rank(method="dense")
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
def test_rank_descending(self):
dtypes = ["O", "f8", "i8"]
for dtype, method in product(dtypes, self.results):
if "i" in dtype:
s = self.s.dropna()
else:
s = self.s.astype(dtype)
res = s.rank(ascending=False)
expected = (s.max() - s).rank()
assert_series_equal(res, expected)
if method == "first" and dtype == "O":
continue
expected = (s.max() - s).rank(method=method)
res2 = s.rank(method=method, ascending=False)
assert_series_equal(res2, expected)
def test_rank_int(self):
s = self.s.dropna().astype("i8")
for method, res in self.results.items():
result = s.rank(method=method)
expected = Series(res).dropna()
expected.index = result.index
assert_series_equal(result, expected)
def test_rank_object_bug(self):
# GH 13445
# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)
def test_rank_modify_inplace(self):
# GH 18521
# Check rank does not mutate series
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
expected = s.copy()
s.rank()
result = s
assert_series_equal(result, expected)
# GH15630, pct should be on 100% basis when method='dense'
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_dense_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="dense", pct=True)
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 1.0 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_min_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="min", pct=True)
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_max_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="max", pct=True)
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.5 / 2, 1.5 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_average_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="average", pct=True)
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 2.0 / 2.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_first_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="first", pct=True)
expected = Series(exp).astype(result.dtype)
assert_series_equal(result, expected)
@pytest.mark.single
@pytest.mark.high_memory
def test_pct_max_many_rows():
# GH 18271
s = Series(np.arange(2 ** 24 + 1))
result = s.rank(pct=True).max()
assert result == 1

View File

@@ -0,0 +1,309 @@
import numpy as np
import pytest
import pandas as pd
import pandas.util.testing as tm
from .common import TestData
class TestSeriesReplace(TestData):
def test_replace(self):
N = 100
ser = pd.Series(np.random.randn(N))
ser[0:4] = np.nan
ser[6:10] = 0
# replace list with a single value
ser.replace([np.nan], -1, inplace=True)
exp = ser.fillna(-1)
tm.assert_series_equal(ser, exp)
rs = ser.replace(0.0, np.nan)
ser[ser == 0.0] = np.nan
tm.assert_series_equal(rs, ser)
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
ser = pd.Series([np.nan, 0, np.inf])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
filled = ser.copy()
filled[4] = 0
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
ser = pd.Series(self.ts.index)
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
# malformed
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
with pytest.raises(ValueError, match=msg):
ser.replace([1, 2, 3], [np.nan, 0])
# make sure that we aren't just masking a TypeError because bools don't
# implement indexing
with pytest.raises(TypeError, match="Cannot compare types .+"):
ser.replace([1, 2], [np.nan, 0])
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
def test_replace_gh5319(self):
# API change from 0.12?
# GH 5319
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace([np.nan])
tm.assert_series_equal(result, expected)
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace(np.nan)
tm.assert_series_equal(result, expected)
# GH 5797
ser = pd.Series(pd.date_range("20130101", periods=5))
expected = ser.copy()
expected.loc[2] = pd.Timestamp("20120101")
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
tm.assert_series_equal(result, expected)
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
tm.assert_series_equal(result, expected)
# GH 11792: Test with replacing NaT in a list with tz data
ts = pd.Timestamp("2015/01/01", tz="UTC")
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
tm.assert_series_equal(expected, result)
def test_replace_with_single_list(self):
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([1, 2, 3])
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
s = ser.copy()
s.replace([1, 2, 3], inplace=True)
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
# make sure things don't get corrupted when fillna call fails
s = ser.copy()
msg = (
r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
r" \(bfill\)\. Got crash_cymbal"
)
with pytest.raises(ValueError, match=msg):
s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
tm.assert_series_equal(s, ser)
def test_replace_with_empty_list(self):
# GH 21977
s = pd.Series([[1], [2, 3], [], np.nan, [4]])
expected = s
result = s.replace([], np.nan)
tm.assert_series_equal(result, expected)
# GH 19266
with pytest.raises(ValueError, match="cannot assign mismatch"):
s.replace({np.nan: []})
with pytest.raises(ValueError, match="cannot assign mismatch"):
s.replace({np.nan: ["dummy", "alt"]})
def test_replace_mixed_types(self):
s = pd.Series(np.arange(5), dtype="int64")
def check_replace(to_rep, val, expected):
sc = s.copy()
r = s.replace(to_rep, val)
sc.replace(to_rep, val, inplace=True)
tm.assert_series_equal(expected, r)
tm.assert_series_equal(expected, sc)
# MUST upcast to float
e = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0])
tr, v = [3], [3.0]
check_replace(tr, v, e)
# MUST upcast to float
e = pd.Series([0, 1, 2, 3.5, 4])
tr, v = [3], [3.5]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, "a"])
tr, v = [3, 4], [3.5, "a"]
check_replace(tr, v, e)
# again casts to object
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)
# test an object with dates + floats + integers + strings
dr = (
pd.date_range("1/1/2001", "1/10/2001", freq="D")
.to_series()
.reset_index(drop=True)
)
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_bool_with_string_no_op(self):
s = pd.Series([True, False, True])
result = s.replace("fun", "in-the-sun")
tm.assert_series_equal(s, result)
def test_replace_bool_with_string(self):
# nonexistent elements
s = pd.Series([True, False, True])
result = s.replace(True, "2u")
expected = pd.Series(["2u", False, "2u"])
tm.assert_series_equal(expected, result)
def test_replace_bool_with_bool(self):
s = pd.Series([True, False, True])
result = s.replace(True, False)
expected = pd.Series([False] * len(s))
tm.assert_series_equal(expected, result)
def test_replace_with_dict_with_bool_keys(self):
s = pd.Series([True, False, True])
with pytest.raises(TypeError, match="Cannot compare types .+"):
s.replace({"asdf": "asdb", True: "yes"})
def test_replace2(self):
N = 100
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
def test_replace_with_empty_dictlike(self):
# GH 15289
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace(dict()))
tm.assert_series_equal(s, s.replace(pd.Series([])))
def test_replace_string_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_replacer_equals_replacement(self):
# GH 20656
# make sure all replacers are matching against original values
s = pd.Series(["a", "b"])
expected = pd.Series(["b", "a"])
result = s.replace({"a": "b", "b": "a"})
tm.assert_series_equal(expected, result)
def test_replace_unicode_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_mixed_types_with_string(self):
# Testing mixed
s = pd.Series([1, 2, 3, "4", 4, 5])
result = s.replace([2, "4"], np.nan)
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical("A", categories=["A", "B"]), [1]),
(pd.Categorical(("A",), categories=["A", "B"]), [1]),
(pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]),
],
)
def test_replace_categorical(self, categorical, numeric):
# GH 24971
# Do not check if dtypes are equal due to a known issue that
# Categorical.replace sometimes coerces to object (GH 23305)
s = pd.Series(categorical)
result = s.replace({"A": 1, "B": 2})
expected = pd.Series(numeric)
tm.assert_series_equal(expected, result, check_dtype=False)
def test_replace_with_no_overflowerror(self):
# GH 25616
# casts to object without Exception from OverflowError
s = pd.Series([0, 1, 2, 3, 4])
result = s.replace([3], ["100000000000000000000"])
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
tm.assert_series_equal(result, expected)
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
result = s.replace(["100000000000000000000"], [1])
expected = pd.Series([0, 1, "100000000000000000001"])
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,492 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
DataFrame,
Index,
Series,
date_range,
option_context,
period_range,
timedelta_range,
)
from pandas.core.base import StringMixin
from pandas.core.index import MultiIndex
import pandas.util.testing as tm
from .common import TestData
class TestSeriesRepr(TestData):
def test_multilevel_name_print(self):
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
s = Series(range(len(index)), index=index, name="sth")
expected = [
"first second",
"foo one 0",
" two 1",
" three 2",
"bar one 3",
" two 4",
"baz two 5",
" three 6",
"qux one 7",
" two 8",
" three 9",
"Name: sth, dtype: int64",
]
expected = "\n".join(expected)
assert repr(s) == expected
def test_name_printing(self):
# Test small Series.
s = Series([0, 1, 2])
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
# Test big Series (diff code path).
s = Series(range(1000))
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
s = Series(index=date_range("20010101", "20020101"), name="test")
assert "Name: test" in repr(s)
def test_repr(self):
str(self.ts)
str(self.series)
str(self.series.astype(int))
str(self.objSeries)
str(Series(tm.randn(1000), index=np.arange(1000)))
str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1)))
# empty
str(self.empty)
# with NaNs
self.series[5:7] = np.NaN
str(self.series)
# with Nones
ots = self.ts.astype("O")
ots[::2] = None
repr(ots)
# various names
for name in [
"",
1,
1.2,
"foo",
"\u03B1\u03B2\u03B3",
"loooooooooooooooooooooooooooooooooooooooooooooooooooong",
("foo", "bar", "baz"),
(1, 2),
("foo", 1, 2.3),
("\u03B1", "\u03B2", "\u03B3"),
("\u03B1", "bar"),
]:
self.series.name = name
repr(self.series)
biggie = Series(
tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz")
)
repr(biggie)
# 0 as name
ser = Series(np.random.randn(100), name=0)
rep_str = repr(ser)
assert "Name: 0" in rep_str
# tidy repr
ser = Series(np.random.randn(1001), name=0)
rep_str = repr(ser)
assert "Name: 0" in rep_str
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
assert "\t" not in repr(ser)
assert "\r" not in repr(ser)
assert "a\n" not in repr(ser)
# with empty series (#4651)
s = Series([], dtype=np.int64, name="foo")
assert repr(s) == "Series([], Name: foo, dtype: int64)"
s = Series([], dtype=np.int64, name=None)
assert repr(s) == "Series([], dtype: int64)"
def test_tidy_repr(self):
a = Series(["\u05d0"] * 1000)
a.name = "title1"
repr(a) # should not raise exception
def test_repr_bool_fails(self, capsys):
s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
# It works (with no Cython exception barf)!
repr(s)
captured = capsys.readouterr()
assert captured.err == ""
def test_repr_name_iterable_indexable(self):
s = Series([1, 2, 3], name=np.int64(3))
# it works!
repr(s)
s.name = ("\u05d0",) * 2
repr(s)
def test_repr_should_return_str(self):
# https://docs.python.org/3/reference/datamodel.html#object.__repr__
# ...The return value must be a string object.
# (str on py2.x, str (unicode) on py3)
data = [8, 5, 3, 5]
index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"]
df = Series(data, index=index1)
assert type(df.__repr__() == str) # both py2 / 3
def test_repr_max_rows(self):
# GH 6863
with pd.option_context("max_rows", None):
str(Series(range(1001))) # should not raise exception
def test_unicode_string_with_unicode(self):
df = Series(["\u05d0"], name="\u05d1")
str(df)
def test_str_to_bytes_raises(self):
# GH 26447
df = Series(["abc"], name="abc")
msg = "^'str' object cannot be interpreted as an integer$"
with pytest.raises(TypeError, match=msg):
bytes(df)
def test_timeseries_repr_object_dtype(self):
index = Index(
[datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
)
ts = Series(np.random.randn(len(index)), index)
repr(ts)
ts = tm.makeTimeSeries(1000)
assert repr(ts).splitlines()[-1].startswith("Freq:")
ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
repr(ts2).splitlines()[-1]
def test_latex_repr(self):
result = r"""\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & $\alpha$ \\
1 & b \\
2 & c \\
\bottomrule
\end{tabular}
"""
with option_context("display.latex.escape", False, "display.latex.repr", True):
s = Series([r"$\alpha$", "b", "c"])
assert result == s._repr_latex_()
assert s._repr_latex_() is None
def test_index_repr_in_frame_with_nan(self):
# see gh-25061
i = Index([1, np.nan])
s = Series([1, 2], index=i)
exp = """1.0 1\nNaN 2\ndtype: int64"""
assert repr(s) == exp
class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
# see gh-21002
class County(StringMixin):
name = "San Sebastián"
state = "PR"
def __str__(self):
return self.name + ", " + self.state
cat = pd.Categorical([County() for _ in range(61)])
idx = pd.Index(cat)
ser = idx.to_series()
repr(ser)
str(ser)
def test_categorical_repr(self):
a = Series(Categorical([1, 2, 3, 4]))
exp = (
"0 1\n1 2\n2 3\n3 4\n"
+ "dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
)
assert exp == a.__str__()
a = Series(Categorical(["a", "b"] * 25))
exp = (
"0 a\n1 b\n"
+ " ..\n"
+ "48 a\n49 b\n"
+ "Length: 50, dtype: category\nCategories (2, object): [a, b]"
)
with option_context("display.max_rows", 5):
assert exp == repr(a)
levs = list("abcdefghijklmnopqrstuvwxyz")
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
exp = (
"0 a\n1 b\n" + "dtype: category\n"
"Categories (26, object): [a < b < c < d ... w < x < y < z]"
)
assert exp == a.__str__()
def test_categorical_series_repr(self):
s = Series(Categorical([1, 2, 3]))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1, 2, 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10)))
exp = """0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
assert repr(s) == exp
def test_categorical_series_repr_ordered(self):
s = Series(Categorical([1, 2, 3], ordered=True))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1 < 2 < 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10), ordered=True))
exp = """0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
assert repr(s) == exp
def test_categorical_series_repr_datetime(self):
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
2011-01-01 13:00:00-05:00]""" # noqa
assert repr(s) == exp
def test_categorical_series_repr_datetime_ordered(self):
idx = date_range("2011-01-01 09:00", freq="H", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
2011-01-01 13:00:00-05:00]""" # noqa
assert repr(s) == exp
def test_categorical_series_repr_period(self):
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2011-01-01 13:00]""" # noqa
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_period_ordered(self):
idx = period_range("2011-01-01 09:00", freq="H", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2011-01-01 13:00]""" # noqa
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_timedelta(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
8 days 01:00:00, 9 days 01:00:00]""" # noqa
assert repr(s) == exp
def test_categorical_series_repr_timedelta_ordered(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx, ordered=True))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
8 days 01:00:00 < 9 days 01:00:00]""" # noqa
assert repr(s) == exp

View File

@@ -0,0 +1,267 @@
import random
import numpy as np
import pytest
from pandas import Categorical, DataFrame, IntervalIndex, MultiIndex, Series
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal, assert_series_equal
from .common import TestData
class TestSeriesSorting(TestData):
def test_sort_values(self):
# check indexes are reordered corresponding with the values
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
result = ser.sort_values()
tm.assert_series_equal(expected, result)
ts = self.ts.copy()
ts[:5] = np.NaN
vals = ts.values
result = ts.sort_values()
assert np.isnan(result[-5:]).all()
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
# na_position
result = ts.sort_values(na_position="first")
assert np.isnan(result[:5]).all()
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
# something object-type
ser = Series(["A", "B"], [1, 2])
# no failure
ser.sort_values()
# ascending=False
ordered = ts.sort_values(ascending=False)
expected = np.sort(ts.dropna().values)[::-1]
assert_almost_equal(expected, ordered.dropna().values)
ordered = ts.sort_values(ascending=False, na_position="first")
assert_almost_equal(expected, ordered.dropna().values)
# ascending=[False] should behave the same as ascending=False
ordered = ts.sort_values(ascending=[False])
expected = ts.sort_values(ascending=False)
assert_series_equal(expected, ordered)
ordered = ts.sort_values(ascending=[False], na_position="first")
expected = ts.sort_values(ascending=False, na_position="first")
assert_series_equal(expected, ordered)
msg = "ascending must be boolean"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=None)
msg = r"Length of ascending \(0\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[])
msg = r"Length of ascending \(3\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[1, 2, 3])
msg = r"Length of ascending \(2\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[False, False])
msg = "ascending must be boolean"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending="foobar")
# inplace=True
ts = self.ts.copy()
ts.sort_values(ascending=False, inplace=True)
tm.assert_series_equal(ts, self.ts.sort_values(ascending=False))
tm.assert_index_equal(ts.index, self.ts.sort_values(ascending=False).index)
# GH 5856/5853
# Series.sort_values operating on a view
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0]
msg = (
"This Series is a view of some other array, to sort in-place"
" you must create a copy"
)
with pytest.raises(ValueError, match=msg):
s.sort_values(inplace=True)
def test_sort_index(self):
rindex = list(self.ts.index)
random.shuffle(rindex)
random_order = self.ts.reindex(rindex)
sorted_series = random_order.sort_index()
assert_series_equal(sorted_series, self.ts)
# descending
sorted_series = random_order.sort_index(ascending=False)
assert_series_equal(sorted_series, self.ts.reindex(self.ts.index[::-1]))
# compat on level
sorted_series = random_order.sort_index(level=0)
assert_series_equal(sorted_series, self.ts)
# compat on axis
sorted_series = random_order.sort_index(axis=0)
assert_series_equal(sorted_series, self.ts)
msg = "No axis named 1 for object type <class 'pandas.core.series.Series'>"
with pytest.raises(ValueError, match=msg):
random_order.sort_values(axis=1)
sorted_series = random_order.sort_index(level=0, axis=0)
assert_series_equal(sorted_series, self.ts)
with pytest.raises(ValueError, match=msg):
random_order.sort_index(level=0, axis=1)
def test_sort_index_inplace(self):
# For #11402
rindex = list(self.ts.index)
random.shuffle(rindex)
# descending
random_order = self.ts.reindex(rindex)
result = random_order.sort_index(ascending=False, inplace=True)
assert result is None
tm.assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1]))
# ascending
random_order = self.ts.reindex(rindex)
result = random_order.sort_index(ascending=True, inplace=True)
assert result is None
tm.assert_series_equal(random_order, self.ts)
@pytest.mark.parametrize("level", ["A", 0]) # GH 21052
def test_sort_index_multiindex(self, level):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
# implicit sort_remaining=True
res = s.sort_index(level=level)
assert_series_equal(backwards, res)
# GH13496
# sort has no effect without remaining lvls
res = s.sort_index(level=level, sort_remaining=False)
assert_series_equal(s, res)
def test_sort_index_kind(self):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3])
expected_series = Series(index=[1, 2, 3, 3, 4])
index_sorted_series = series.sort_index(kind="mergesort")
assert_series_equal(expected_series, index_sorted_series)
index_sorted_series = series.sort_index(kind="quicksort")
assert_series_equal(expected_series, index_sorted_series)
index_sorted_series = series.sort_index(kind="heapsort")
assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position(self):
series = Series(index=[3, 2, 1, 4, 3, np.nan])
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4])
index_sorted_series = series.sort_index(na_position="first")
assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan])
index_sorted_series = series.sort_index(na_position="last")
assert_series_equal(expected_series_last, index_sorted_series)
def test_sort_index_intervals(self):
s = Series(
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
)
result = s.sort_index()
expected = s
assert_series_equal(result, expected)
result = s.sort_index(ascending=False)
expected = Series(
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
)
assert_series_equal(result, expected)
def test_sort_values_categorical(self):
c = Categorical(["a", "b", "b", "a"], ordered=False)
cat = Series(c.copy())
# sort in the categories order
expected = Series(
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
)
result = cat.sort_values()
tm.assert_series_equal(result, expected)
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
cat = Series(
Categorical(
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
)
)
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
res = cat.sort_values(ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
raw_cat1 = Categorical(
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
)
raw_cat2 = Categorical(
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
)
s = ["a", "b", "c", "d"]
df = DataFrame(
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
)
# Cats must be sorted in a dataframe
res = df.sort_values(by=["string"], ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
assert res["sort"].dtype == "category"
res = df.sort_values(by=["sort"], ascending=False)
exp = df.sort_values(by=["string"], ascending=True)
tm.assert_series_equal(res["values"], exp["values"])
assert res["sort"].dtype == "category"
assert res["unsort"].dtype == "category"
# unordered cat, but we allow this
df.sort_values(by=["unsort"], ascending=False)
# multi-columns sort
# GH 7848
df = DataFrame(
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df["grade"] = Categorical(df["raw_grade"], ordered=True)
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
# sorts 'grade' according to the order of the categories
result = df.sort_values(by=["grade"])
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
tm.assert_frame_equal(result, expected)
# multi
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,104 @@
import numpy as np
import pytest
import pandas as pd
from pandas import SparseDtype
import pandas.util.testing as tm
class TestSeriesSubclassing:
def test_indexing_sliced(self):
s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"))
res = s.loc[["a", "b"]]
exp = tm.SubclassedSeries([1, 2], index=list("ab"))
tm.assert_series_equal(res, exp)
res = s.iloc[[2, 3]]
exp = tm.SubclassedSeries([3, 4], index=list("cd"))
tm.assert_series_equal(res, exp)
res = s.loc[["a", "b"]]
exp = tm.SubclassedSeries([1, 2], index=list("ab"))
tm.assert_series_equal(res, exp)
def test_to_frame(self):
s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"), name="xxx")
res = s.to_frame()
exp = tm.SubclassedDataFrame({"xxx": [1, 2, 3, 4]}, index=list("abcd"))
tm.assert_frame_equal(res, exp)
def test_subclass_unstack(self):
# GH 15564
s = tm.SubclassedSeries([1, 2, 3, 4], index=[list("aabb"), list("xyxy")])
res = s.unstack()
exp = tm.SubclassedDataFrame({"x": [1, 3], "y": [2, 4]}, index=["a", "b"])
tm.assert_frame_equal(res, exp)
def test_subclass_empty_repr(self):
assert "SubclassedSeries" in repr(tm.SubclassedSeries())
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
class TestSparseSeriesSubclassing:
def test_subclass_sparse_slice(self):
# int64
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
tm.assert_sp_series_equal(s.loc[1:3], exp)
assert s.loc[1:3].dtype == SparseDtype(np.int64)
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
tm.assert_sp_series_equal(s.iloc[1:3], exp)
assert s.iloc[1:3].dtype == SparseDtype(np.int64)
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
tm.assert_sp_series_equal(s[1:3], exp)
assert s[1:3].dtype == SparseDtype(np.int64)
# float64
s = tm.SubclassedSparseSeries([1.0, 2.0, 3.0, 4.0, 5.0])
exp = tm.SubclassedSparseSeries([2.0, 3.0, 4.0], index=[1, 2, 3])
tm.assert_sp_series_equal(s.loc[1:3], exp)
assert s.loc[1:3].dtype == SparseDtype(np.float64)
exp = tm.SubclassedSparseSeries([2.0, 3.0], index=[1, 2])
tm.assert_sp_series_equal(s.iloc[1:3], exp)
assert s.iloc[1:3].dtype == SparseDtype(np.float64)
exp = tm.SubclassedSparseSeries([2.0, 3.0], index=[1, 2])
tm.assert_sp_series_equal(s[1:3], exp)
assert s[1:3].dtype == SparseDtype(np.float64)
def test_subclass_sparse_addition(self):
s1 = tm.SubclassedSparseSeries([1, 3, 5])
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
exp = tm.SubclassedSparseSeries([-1, 8, 17])
tm.assert_sp_series_equal(s1 + s2, exp)
s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
exp = tm.SubclassedSparseSeries([5.0, 7.0, 9.0])
tm.assert_sp_series_equal(s1 + s2, exp)
def test_subclass_sparse_to_frame(self):
s = tm.SubclassedSparseSeries([1, 2], index=list("ab"), name="xxx")
res = s.to_frame()
exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind="block", fill_value=0)
exp = tm.SubclassedSparseDataFrame(
{"xxx": exp_arr}, index=list("ab"), default_fill_value=0
)
tm.assert_sp_frame_equal(res, exp)
# create from int dict
res = tm.SubclassedSparseDataFrame(
{"xxx": [1, 2]}, index=list("ab"), default_fill_value=0
)
tm.assert_sp_frame_equal(res, exp)
s = tm.SubclassedSparseSeries([1.1, 2.1], index=list("ab"), name="xxx")
res = s.to_frame()
exp = tm.SubclassedSparseDataFrame({"xxx": [1.1, 2.1]}, index=list("ab"))
tm.assert_sp_frame_equal(res, exp)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,381 @@
"""
Tests for Series timezone-related methods
"""
from datetime import datetime
from dateutil.tz import tzoffset
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import conversion, timezones
from pandas import DatetimeIndex, Index, NaT, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
class TestSeriesTimezones:
# -----------------------------------------------------------------
# Series.tz_localize
def test_series_tz_localize(self):
rng = date_range("1/1/2011", periods=100, freq="H")
ts = Series(1, index=rng)
result = ts.tz_localize("utc")
assert result.index.tz.zone == "UTC"
# Can't localize if already tz-aware
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
ts = Series(1, index=rng)
with pytest.raises(TypeError, match="Already tz-aware"):
ts.tz_localize("US/Eastern")
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_tz_localize_errors_deprecation(self):
# GH 22644
tz = "Europe/Warsaw"
n = 60
rng = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
ts = Series(rng)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
with pytest.raises(ValueError):
ts.dt.tz_localize(tz, errors="foo")
# make sure errors='coerce' gets mapped correctly to nonexistent
result = ts.dt.tz_localize(tz, errors="coerce")
expected = ts.dt.tz_localize(tz, nonexistent="NaT")
tm.assert_series_equal(result, expected)
def test_series_tz_localize_ambiguous_bool(self):
# make sure that we are correctly accepting bool values as ambiguous
# GH#14402
ts = Timestamp("2015-11-01 01:00:03")
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
ser = Series([ts])
expected0 = Series([expected0])
expected1 = Series([expected1])
with pytest.raises(pytz.AmbiguousTimeError):
ser.dt.tz_localize("US/Central")
result = ser.dt.tz_localize("US/Central", ambiguous=True)
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=False)
tm.assert_series_equal(result, expected1)
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
tm.assert_series_equal(result, expected1)
@pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"])
@pytest.mark.parametrize(
"method, exp",
[
["shift_forward", "2015-03-29 03:00:00"],
["NaT", NaT],
["raise", None],
["foo", "invalid"],
],
)
def test_series_tz_localize_nonexistent(self, tz, method, exp):
# GH 8917
n = 60
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
s = Series(1, dti)
if method == "raise":
with pytest.raises(pytz.NonExistentTimeError):
s.tz_localize(tz, nonexistent=method)
elif exp == "invalid":
with pytest.raises(ValueError):
dti.tz_localize(tz, nonexistent=method)
else:
result = s.tz_localize(tz, nonexistent=method)
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_series_tz_localize_empty(self, tzstr):
# GH#2248
ser = Series()
ser2 = ser.tz_localize("utc")
assert ser2.index.tz == pytz.utc
ser2 = ser.tz_localize(tzstr)
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
# -----------------------------------------------------------------
# Series.tz_convert
def test_series_tz_convert(self):
rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
ts = Series(1, index=rng)
result = ts.tz_convert("Europe/Berlin")
assert result.index.tz.zone == "Europe/Berlin"
# can't convert tz-naive
rng = date_range("1/1/2011", periods=200, freq="D")
ts = Series(1, index=rng)
with pytest.raises(TypeError, match="Cannot convert tz-naive"):
ts.tz_convert("US/Eastern")
def test_series_tz_convert_to_utc(self):
base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
idx1 = base.tz_convert("Asia/Tokyo")[:2]
idx2 = base.tz_convert("US/Eastern")[1:]
res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
# -----------------------------------------------------------------
# Series.append
def test_series_append_aware(self):
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1.append(ser2)
exp_index = DatetimeIndex(
["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern"
)
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
assert ts_result.index.tz == rng1.tz
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1.append(ser2)
exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC")
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
utc = rng1.tz
assert utc == ts_result.index.tz
# GH#7795
# different tz coerces to object dtype, not UTC
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1.append(ser2)
exp_index = Index(
[
Timestamp("1/1/2011 01:00", tz="US/Eastern"),
Timestamp("1/1/2011 02:00", tz="US/Central"),
]
)
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
def test_series_append_aware_naive(self):
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
ts_result = ser1.append(ser2)
expected = ser1.index.astype(object).append(ser2.index.astype(object))
assert ts_result.index.equals(expected)
# mixed
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
rng2 = range(100)
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
ts_result = ser1.append(ser2)
expected = ser1.index.astype(object).append(ser2.index)
assert ts_result.index.equals(expected)
def test_series_append_dst(self):
rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
ser1 = Series([1, 2, 3], index=rng1)
ser2 = Series([10, 11, 12], index=rng2)
ts_result = ser1.append(ser2)
exp_index = DatetimeIndex(
[
"2016-01-01 01:00",
"2016-01-01 02:00",
"2016-01-01 03:00",
"2016-08-01 01:00",
"2016-08-01 02:00",
"2016-08-01 03:00",
],
tz="US/Eastern",
)
exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
tm.assert_series_equal(ts_result, exp)
assert ts_result.index.tz == rng1.tz
# -----------------------------------------------------------------
def test_dateutil_tzoffset_support(self):
values = [188.5, 328.25]
tzinfo = tzoffset(None, 7200)
index = [
datetime(2012, 5, 11, 11, tzinfo=tzinfo),
datetime(2012, 5, 11, 12, tzinfo=tzinfo),
]
series = Series(data=values, index=index)
assert series.index.tz == tzinfo
# it works! #2443
repr(series.index[0])
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_tz_aware_asfreq(self, tz):
dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz)
ser = Series(np.random.randn(len(dr)), index=dr)
# it works!
ser.asfreq("T")
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_string_index_alias_tz_aware(self, tz):
rng = date_range("1/1/2000", periods=10, tz=tz)
ser = Series(np.random.randn(len(rng)), index=rng)
result = ser["1/3/2000"]
tm.assert_almost_equal(result, ser[2])
# TODO: De-duplicate with test below
def test_series_add_tz_mismatch_converts_to_utc_duplicate(self):
rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
ser = Series(np.random.randn(len(rng)), index=rng)
ts_moscow = ser.tz_convert("Europe/Moscow")
result = ser + ts_moscow
assert result.index.tz is pytz.utc
result = ts_moscow + ser
assert result.index.tz is pytz.utc
def test_series_add_tz_mismatch_converts_to_utc(self):
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
perm = np.random.permutation(100)[:90]
ser1 = Series(
np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern")
)
perm = np.random.permutation(100)[:90]
ser2 = Series(
np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin")
)
result = ser1 + ser2
uts1 = ser1.tz_convert("utc")
uts2 = ser2.tz_convert("utc")
expected = uts1 + uts2
assert result.index.tz == pytz.UTC
tm.assert_series_equal(result, expected)
def test_series_add_aware_naive_raises(self):
rng = date_range("1/1/2011", periods=10, freq="H")
ser = Series(np.random.randn(len(rng)), index=rng)
ser_utc = ser.tz_localize("utc")
with pytest.raises(Exception):
ser + ser_utc
with pytest.raises(Exception):
ser_utc + ser
def test_series_align_aware(self):
idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
ser = Series(np.random.randn(len(idx1)), index=idx1)
ser_central = ser.tz_convert("US/Central")
# # different timezones convert to UTC
new1, new2 = ser.align(ser_central)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_localized_at_time_between_time(self, tzstr):
from datetime import time
tz = timezones.maybe_get_tz(tzstr)
rng = date_range("4/16/2012", "5/1/2012", freq="H")
ts = Series(np.random.randn(len(rng)), index=rng)
ts_local = ts.tz_localize(tzstr)
result = ts_local.at_time(time(10, 0))
expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
tm.assert_series_equal(result, expected)
assert timezones.tz_compare(result.index.tz, tz)
t1, t2 = time(10, 0), time(11, 0)
result = ts_local.between_time(t1, t2)
expected = ts.between_time(t1, t2).tz_localize(tzstr)
tm.assert_series_equal(result, expected)
assert timezones.tz_compare(result.index.tz, tz)
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
def test_getitem_pydatetime_tz(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
index = date_range(
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr
)
ts = Series(index=index, data=index.hour)
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
dt = datetime(2012, 12, 24, 17, 0)
time_datetime = conversion.localize_pydatetime(dt, tz)
assert ts[time_pandas] == ts[time_datetime]
def test_series_truncate_datetimeindex_tz(self):
# GH 9243
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
s = Series(range(len(idx)), index=idx)
result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
expected = Series([1, 2, 3], index=idx[1:4])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize(
"method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]]
)
def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz):
# GH 6326
result = Series(
np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz)
)
getattr(result, method)("UTC", copy=copy)
expected = Series(
np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz)
)
tm.assert_series_equal(result, expected)
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
# GH 25843
tz = tz_aware_fixture
result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]")
expected = Series([Timestamp("2019")])
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,308 @@
from collections import deque
import string
import numpy as np
import pytest
import pandas as pd
import pandas.util.testing as tm
UNARY_UFUNCS = [np.positive, np.floor, np.exp]
BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op
SPARSE = [True, False]
SPARSE_IDS = ["sparse", "dense"]
SHUFFLE = [True, False]
@pytest.fixture
def arrays_for_binary_ufunc():
"""
A pair of random, length-100 integer-dtype arrays, that are mostly 0.
"""
a1 = np.random.randint(0, 10, 100, dtype="int64")
a2 = np.random.randint(0, 10, 100, dtype="int64")
a1[::3] = 0
a2[::4] = 0
return a1, a2
@pytest.mark.parametrize("ufunc", UNARY_UFUNCS)
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
def test_unary_ufunc(ufunc, sparse):
# Test that ufunc(Series) == Series(ufunc)
array = np.random.randint(0, 10, 10, dtype="int64")
array[::2] = 0
if sparse:
array = pd.SparseArray(array, dtype=pd.SparseDtype("int64", 0))
index = list(string.ascii_letters[:10])
name = "name"
series = pd.Series(array, index=index, name=name)
result = ufunc(series)
expected = pd.Series(ufunc(array), index=index, name=name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
# Test that ufunc(Series(a), array) == Series(ufunc(a, b))
a1, a2 = arrays_for_binary_ufunc
if sparse:
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
name = "name" # op(Series, array) preserves the name.
series = pd.Series(a1, name=name)
other = a2
array_args = (a1, a2)
series_args = (series, other) # ufunc(series, array)
if flip:
array_args = reversed(array_args)
series_args = reversed(series_args) # ufunc(array, series)
expected = pd.Series(ufunc(*array_args), name=name)
result = ufunc(*series_args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
# Test that
# * func(Series(a), Series(b)) == Series(ufunc(a, b))
# * ufunc(Index, Series) dispatches to Series (returns a Series)
a1, a2 = arrays_for_binary_ufunc
if sparse:
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
name = "name" # op(Series, array) preserves the name.
series = pd.Series(a1, name=name)
other = pd.Index(a2, name=name).astype("int64")
array_args = (a1, a2)
series_args = (series, other) # ufunc(series, array)
if flip:
array_args = reversed(array_args)
series_args = reversed(series_args) # ufunc(array, series)
expected = pd.Series(ufunc(*array_args), name=name)
result = ufunc(*series_args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
def test_binary_ufunc_with_series(
flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
):
# Test that
# * func(Series(a), Series(b)) == Series(ufunc(a, b))
# with alignment between the indices
a1, a2 = arrays_for_binary_ufunc
if sparse:
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
name = "name" # op(Series, array) preserves the name.
series = pd.Series(a1, name=name)
other = pd.Series(a2, name=name)
idx = np.random.permutation(len(a1))
if shuffle:
other = other.take(idx)
if flip:
index = other.align(series)[0].index
else:
index = series.align(other)[0].index
else:
index = series.index
array_args = (a1, a2)
series_args = (series, other) # ufunc(series, array)
if flip:
array_args = tuple(reversed(array_args))
series_args = tuple(reversed(series_args)) # ufunc(array, series)
expected = pd.Series(ufunc(*array_args), index=index, name=name)
result = ufunc(*series_args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("flip", [True, False])
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
# Test that
# * ufunc(Series, scalar) == Series(ufunc(array, scalar))
# * ufunc(Series, scalar) == ufunc(scalar, Series)
array, _ = arrays_for_binary_ufunc
if sparse:
array = pd.SparseArray(array)
other = 2
series = pd.Series(array, name="name")
series_args = (series, other)
array_args = (array, other)
if flip:
series_args = tuple(reversed(series_args))
array_args = tuple(reversed(array_args))
expected = pd.Series(ufunc(*array_args), name="name")
result = ufunc(*series_args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", [np.divmod]) # any others?
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("shuffle", SHUFFLE)
@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
# Test that
# the same conditions from binary_ufunc_scalar apply to
# ufuncs with multiple outputs.
if sparse and ufunc is np.divmod:
pytest.skip("sparse divmod not implemented.")
a1, a2 = arrays_for_binary_ufunc
# work around https://github.com/pandas-dev/pandas/issues/26987
a1[a1 == 0] = 1
a2[a2 == 0] = 1
if sparse:
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
s1 = pd.Series(a1)
s2 = pd.Series(a2)
if shuffle:
# ensure we align before applying the ufunc
s2 = s2.sample(frac=1)
expected = ufunc(a1, a2)
assert isinstance(expected, tuple)
result = ufunc(s1, s2)
assert isinstance(result, tuple)
tm.assert_series_equal(result[0], pd.Series(expected[0]))
tm.assert_series_equal(result[1], pd.Series(expected[1]))
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc):
# Test that the same conditions from unary input apply to multi-output
# ufuncs
array, _ = arrays_for_binary_ufunc
if sparse:
array = pd.SparseArray(array)
series = pd.Series(array, name="name")
result = np.modf(series)
expected = np.modf(array)
assert isinstance(result, tuple)
assert isinstance(expected, tuple)
tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
# Drop the names when they differ.
a1, a2 = arrays_for_binary_ufunc
s1 = pd.Series(a1, name="a")
s2 = pd.Series(a2, name="b")
result = ufunc(s1, s2)
assert result.name is None
def test_object_series_ok():
class Dummy:
def __init__(self, value):
self.value = value
def __add__(self, other):
return self.value + other.value
arr = np.array([Dummy(0), Dummy(1)])
ser = pd.Series(arr)
tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr)))
tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
@pytest.mark.parametrize(
"values",
[
pd.array([1, 3, 2]),
pytest.param(
pd.array([1, 10, 0], dtype="Sparse[int]"),
marks=pytest.mark.xfail(resason="GH-27080. Bug in SparseArray"),
),
pd.to_datetime(["2000", "2010", "2001"]),
pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
],
)
def test_reduce(values):
a = pd.Series(values)
assert np.maximum.reduce(a) == values[1]
@pytest.mark.parametrize("type_", [list, deque, tuple])
def test_binary_ufunc_other_types(type_):
a = pd.Series([1, 2, 3], name="name")
b = type_([3, 4, 5])
result = np.add(a, b)
expected = pd.Series(np.add(a.to_numpy(), b), name="name")
tm.assert_series_equal(result, expected)
def test_object_dtype_ok():
class Thing:
def __init__(self, value):
self.value = value
def __add__(self, other):
other = getattr(other, "value", other)
return type(self)(self.value + other)
def __eq__(self, other):
return type(other) is Thing and self.value == other.value
def __repr__(self):
return "Thing({})".format(self.value)
s = pd.Series([Thing(1), Thing(2)])
result = np.add(s, Thing(1))
expected = pd.Series([Thing(2), Thing(3)])
tm.assert_series_equal(result, expected)
def test_outer():
# https://github.com/pandas-dev/pandas/issues/27186
s = pd.Series([1, 2, 3])
o = np.array([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
result = np.subtract.outer(s, o)
expected = np.array([[0, -1, -2], [1, 0, -1], [2, 1, 0]], dtype=np.dtype("int64"))
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,20 @@
import pytest
class TestSeriesValidate:
"""Tests for error handling related to data types of method arguments."""
@pytest.mark.parametrize(
"func",
["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"],
)
@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0])
def test_validate_bool_args(self, string_series, func, inplace):
msg = 'For argument "inplace" expected type bool'
kwargs = dict(inplace=inplace)
if func == "_set_name":
kwargs["name"] = "hello"
with pytest.raises(ValueError, match=msg):
getattr(string_series, func)(**kwargs)