8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
import pytest
|
||||
|
||||
from pandas.tests.series.common import TestData
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def test_data():
|
||||
return TestData()
|
@@ -0,0 +1,558 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, Series, date_range, isna
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill", [None, -1])
|
||||
def test_align(test_data, first_slice, second_slice, join_type, fill):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
assert aa.name == "ts"
|
||||
assert ea.name == "ts"
|
||||
assert ab.name == "ts"
|
||||
assert eb.name == "ts"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["pad", "bfill"])
|
||||
@pytest.mark.parametrize("limit", [None, 1])
|
||||
def test_align_fill_method(
|
||||
test_data, first_slice, second_slice, join_type, method, limit
|
||||
):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(test_data):
|
||||
b = test_data.ts[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join="left")
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join="left", copy=False)
|
||||
ra[:5] = 5
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join="right")
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join="right", copy=False)
|
||||
rb[:2] = 5
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(test_data):
|
||||
a, b = test_data.ts.align(test_data.ts, copy=False)
|
||||
assert a.index is test_data.ts.index
|
||||
assert b.index is test_data.ts.index
|
||||
|
||||
a, b = test_data.ts.align(test_data.ts, copy=True)
|
||||
assert a.index is not test_data.ts.index
|
||||
assert b.index is not test_data.ts.index
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
idx = pd.Index(range(2), name="b")
|
||||
s1 = pd.Series(np.arange(12, dtype="int64"), index=midx)
|
||||
s2 = pd.Series(np.arange(2, dtype="int64"), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join="left")
|
||||
res2l, res2r = s2.align(s1, join="right")
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join="right")
|
||||
res2l, res2r = s2.align(s1, join="left")
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
def test_reindex(test_data):
|
||||
identity = test_data.series.reindex(test_data.series.index)
|
||||
|
||||
# __array_interface__ is not defined for older numpies
|
||||
# and on some pythons
|
||||
try:
|
||||
assert np.may_share_memory(test_data.series.index, identity.index)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
assert identity.index.is_(test_data.series.index)
|
||||
assert identity.index.identical(test_data.series.index)
|
||||
|
||||
subIndex = test_data.series.index[10:20]
|
||||
subSeries = test_data.series.reindex(subIndex)
|
||||
|
||||
for idx, val in subSeries.items():
|
||||
assert val == test_data.series[idx]
|
||||
|
||||
subIndex2 = test_data.ts.index[10:20]
|
||||
subTS = test_data.ts.reindex(subIndex2)
|
||||
|
||||
for idx, val in subTS.items():
|
||||
assert val == test_data.ts[idx]
|
||||
stuffSeries = test_data.ts.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = test_data.ts.index[::2]
|
||||
subNonContig = test_data.ts.reindex(nonContigIndex)
|
||||
for idx, val in subNonContig.items():
|
||||
assert val == test_data.ts[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = test_data.ts.reindex()
|
||||
assert not (result is test_data.ts)
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
|
||||
|
||||
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype("object")
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(range(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
ts = Series(np.random.randn(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(test_data):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
test_data.empty.reindex(test_data.ts.index, method="pad") # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = test_data.empty.reindex(test_data.ts.index, method="pad")
|
||||
|
||||
# pass non-Index
|
||||
reindexed = test_data.ts.reindex(list(test_data.ts.index))
|
||||
assert_series_equal(test_data.ts, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = test_data.ts[::2]
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
|
||||
r" \(bfill\) or nearest\. Got foo"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.reindex(test_data.ts.index, method="foo")
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method="pad")
|
||||
reindexed2 = s2.reindex(s.index, method="ffill")
|
||||
assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
|
||||
assert_series_equal(reindexed, expected)
|
||||
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
|
||||
new_index = ["a", "g", "c", "f"]
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
assert_series_equal(result, expected.astype("float64"))
|
||||
|
||||
result = s.reindex(new_index).ffill(downcast="infer")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method="ffill")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list("abcd"))
|
||||
new_index = "agc"
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=range(0, 5))
|
||||
result = s.shift(1).fillna(method="bfill")
|
||||
expected = Series(False, index=range(0, 5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype="int64"))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
actual = s.reindex(target, method="nearest")
|
||||
expected = Series(np.around(target).astype("int64"), target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method="nearest")
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method="nearest", tolerance=1)
|
||||
assert_series_equal(expected, actual)
|
||||
actual = s.reindex_like(actual, method="nearest", tolerance=[1, 2, 3, 4])
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method="nearest", tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
def test_reindex_backfill():
|
||||
pass
|
||||
|
||||
|
||||
def test_reindex_int(test_data):
|
||||
ts = test_data.ts[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.int_
|
||||
|
||||
|
||||
def test_reindex_bool(test_data):
|
||||
# A series other than float, int, string, or object
|
||||
ts = test_data.ts[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(test_data):
|
||||
# fail
|
||||
ts = test_data.ts[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(test_data.ts.index, method="pad")
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range("20000101", periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(["a", "b", "c"], dtype="category")
|
||||
result = s.reindex(index)
|
||||
expected = Series(
|
||||
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
|
||||
)
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like(test_data):
|
||||
other = test_data.ts[::2]
|
||||
assert_series_equal(
|
||||
test_data.ts.reindex(other.index), test_data.ts.reindex_like(other)
|
||||
)
|
||||
|
||||
# GH 7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method="pad")
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1.0, 2.0, 3.0])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value="foo")
|
||||
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||||
# GH 8306
|
||||
idx = date_range("20131101", tz="America/Chicago", periods=7)
|
||||
newidx = date_range("20131103", periods=10, freq="H")
|
||||
s = Series(range(7), index=idx)
|
||||
with pytest.raises(TypeError):
|
||||
s.reindex(newidx, method="ffill")
|
||||
|
||||
|
||||
def test_reindex_empty_series_tz_dtype():
|
||||
# GH 20869
|
||||
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
|
||||
expected = Series([pd.NaT] * 2, dtype="datetime64[ns, UTC]")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_rename():
|
||||
# GH 17407
|
||||
s = Series(range(1, 6), index=pd.Index(range(2, 7), name="IntIndex"))
|
||||
result = s.rename(str)
|
||||
expected = s.rename(lambda i: str(i))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, expected_data, expected_index",
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
|
||||
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
|
||||
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
|
||||
],
|
||||
)
|
||||
def test_drop_unique_and_non_unique_index(
|
||||
data, index, axis, drop_labels, expected_data, expected_index
|
||||
):
|
||||
|
||||
s = Series(data=data, index=index)
|
||||
result = s.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, error_type, error_desc",
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
|
||||
# bad axis
|
||||
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
|
||||
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
|
||||
],
|
||||
)
|
||||
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
|
||||
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
Series(data, index=index).drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
s = Series(range(3), index=list("abc"))
|
||||
result = s.drop("bc", errors="ignore")
|
||||
tm.assert_series_equal(result, s)
|
||||
result = s.drop(["a", "d"], errors="ignore")
|
||||
expected = s.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
s = Series([2, 3], index=[True, False])
|
||||
assert s.index.is_object()
|
||||
result = s.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = pd.Series(index=index).drop(drop_labels)
|
||||
tm.assert_series_equal(series, pd.Series(index=expected_index))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels",
|
||||
[
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True]),
|
||||
],
|
||||
)
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
with pytest.raises(KeyError, match="not found in axis"):
|
||||
pd.Series(data=data, index=index).drop(drop_labels)
|
@@ -0,0 +1,628 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series, Timestamp, date_range, isna
|
||||
from pandas.core.indexing import IndexingError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_getitem_boolean(test_data):
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
|
||||
# passing list is OK
|
||||
result = s[list(mask)]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, s.index[mask])
|
||||
|
||||
|
||||
def test_getitem_boolean_empty():
|
||||
s = Series([], dtype=np.int64)
|
||||
s.index.name = "index_name"
|
||||
s = s[s.isna()]
|
||||
assert s.index.name == "index_name"
|
||||
assert s.dtype == np.int64
|
||||
|
||||
# GH5877
|
||||
# indexing with empty series
|
||||
s = Series(["A", "B"])
|
||||
expected = Series(np.nan, index=["C"], dtype=object)
|
||||
result = s[Series(["C"], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(["A", "B"])
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = s[Series([], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([True], dtype=bool)]
|
||||
|
||||
|
||||
def test_getitem_boolean_object(test_data):
|
||||
# using column from DataFrame
|
||||
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = s[omask]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
cop = s.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "cannot index with vector containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask] = 5
|
||||
|
||||
|
||||
def test_getitem_setitem_boolean_corner(test_data):
|
||||
ts = test_data.ts
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
# these used to raise...??
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted] = 1
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted] = 1
|
||||
|
||||
|
||||
def test_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
# similar indexed series
|
||||
result = test_data.series.copy()
|
||||
result[mask] = test_data.series * 2
|
||||
expected = test_data.series * 2
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
# needs alignment
|
||||
result = test_data.series.copy()
|
||||
result[mask] = (test_data.series * 2)[0:5]
|
||||
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
|
||||
expected[-mask] = test_data.series[mask]
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
|
||||
def test_get_set_boolean_different_order(test_data):
|
||||
ordered = test_data.series.sort_values()
|
||||
|
||||
# setting
|
||||
copy = test_data.series.copy()
|
||||
copy[ordered > 0] = 0
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[expected > 0] = 0
|
||||
|
||||
assert_series_equal(copy, expected)
|
||||
|
||||
# getting
|
||||
sel = test_data.series[ordered > 0]
|
||||
exp = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(sel, exp)
|
||||
|
||||
|
||||
def test_where_unsafe_int(sint_dtype):
|
||||
s = Series(np.arange(10), dtype=sint_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_dtype):
|
||||
s = Series(np.arange(10), dtype=float_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
msg = (
|
||||
"NumPy boolean array indexing assignment cannot assign 0 input"
|
||||
" values to the 1 output values where the mask is true"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = "cannot set using a {} indexer with a different length than the value"
|
||||
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
s[selection] = box(item)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[cond])
|
||||
assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = pd.Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
def test_where_timedelta_coerce():
|
||||
s = Series([1, 2], dtype="timedelta64[ns]")
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype="object")
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetime_conversion():
|
||||
s = Series(date_range("20130102", periods=2))
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype="object")
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
s = Series([pd.Timestamp(t) for t in timestamps])
|
||||
rs = s.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, s[1]])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_dt_tz_values(tz_naive_fixture):
|
||||
ser1 = pd.Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture)
|
||||
)
|
||||
ser2 = pd.Series(
|
||||
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture)
|
||||
)
|
||||
mask = pd.Series([True, True, False])
|
||||
result = ser1.where(mask, ser2)
|
||||
exp = pd.Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture)
|
||||
)
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.mask(s > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# see gh-21891
|
||||
s = Series([1, 2])
|
||||
res = s.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[~cond])
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.mask(cond, -s))
|
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series(4, index=list("ABCD"))
|
||||
result = s[lambda x: "A"]
|
||||
assert result == s.loc["A"]
|
||||
|
||||
result = s[lambda x: ["A", "B"]]
|
||||
tm.assert_series_equal(result, s.loc[["A", "B"]])
|
||||
|
||||
result = s[lambda x: [True, False, True, True]]
|
||||
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
|
||||
|
||||
|
||||
def test_setitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series([1, 2, 3, 4], index=list("ABCD"))
|
||||
s[lambda x: "A"] = -1
|
||||
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list("ABCD")))
|
||||
|
||||
|
||||
def test_setitem_other_callable():
|
||||
# GH 13299
|
||||
inc = lambda x: x + 1
|
||||
|
||||
s = pd.Series([1, 2, -1, 4])
|
||||
s[s < 0] = inc
|
||||
|
||||
expected = pd.Series([1, 2, inc, 4])
|
||||
tm.assert_series_equal(s, expected)
|
@@ -0,0 +1,778 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
import pandas._libs.index as _index
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
|
||||
@pytest.mark.parametrize("name", [None, "my_dti"])
|
||||
def test_dti_snap(name, tz):
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
"1/1/2002",
|
||||
"1/2/2002",
|
||||
"1/3/2002",
|
||||
"1/4/2002",
|
||||
"1/5/2002",
|
||||
"1/6/2002",
|
||||
"1/7/2002",
|
||||
],
|
||||
name=name,
|
||||
tz=tz,
|
||||
freq="D",
|
||||
)
|
||||
|
||||
result = dti.snap(freq="W-MON")
|
||||
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
|
||||
expected = expected.repeat([3, 4])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
|
||||
result = dti.snap(freq="B")
|
||||
|
||||
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
|
||||
expected = expected.repeat([1, 1, 1, 2, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
|
||||
|
||||
def test_dti_reset_index_round_trip():
|
||||
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")
|
||||
d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes[0] == np.dtype("M8[ns]")
|
||||
d3 = d2.set_index("index")
|
||||
assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# #2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
|
||||
df = df.set_index("Date")
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()["Date"][0] == stamp
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# #1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s = Series().set_value(dates[0], 1.0)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s2 = s.set_value(dates[1], np.nan)
|
||||
|
||||
exp = Series([1.0, np.nan], index=index)
|
||||
|
||||
assert_series_equal(s2, exp)
|
||||
|
||||
# s = Series(index[:1], index[:1])
|
||||
# s2 = s.set_value(dates[1], index[1])
|
||||
# assert s2.values.dtype == 'M8[ns]'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_slice_locs_indexerror():
|
||||
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)]
|
||||
s = Series(range(100000), times)
|
||||
s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
|
||||
|
||||
|
||||
def test_slicing_datetimes():
|
||||
# GH 7523
|
||||
|
||||
# unique
|
||||
df = DataFrame(
|
||||
np.arange(4.0, dtype="float64"),
|
||||
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
|
||||
)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) :]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[: datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11) :]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc["20010101 11":]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# duplicates
|
||||
df = pd.DataFrame(
|
||||
np.arange(5.0, dtype="float64"),
|
||||
index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
|
||||
)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 10) :]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[: datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11) :]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc["20010101 11":]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_datetime64_duplicated():
|
||||
dates = date_range("2010-07-01", end="2010-08-05")
|
||||
|
||||
tst = DataFrame({"symbol": "AAA", "date": dates})
|
||||
result = tst.duplicated(["date", "symbol"])
|
||||
assert (-result).all()
|
||||
|
||||
tst = DataFrame({"date": dates})
|
||||
result = tst.duplicated()
|
||||
assert (-result).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_pytz():
|
||||
from pytz import timezone as tz
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
|
||||
# comparison dates with datetime MUST be localized!
|
||||
date = tz("US/Central").localize(datetime(1990, 1, 1, 3))
|
||||
result[date] = 0
|
||||
result[date] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_dateutil():
|
||||
from dateutil.tz import tzutc
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
tz = (
|
||||
lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
) # handle special case for utc in dateutil
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="America/New_York")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = 0
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# repeat all the above with naive datetimes
|
||||
result = ts[datetime(1990, 1, 1, 4)]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
from pandas import period_range
|
||||
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="H")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
# FutureWarning from NumPy.
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_getitem_median_slice_bug():
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
from pandas import date_range
|
||||
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dups():
|
||||
dates = [
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
]
|
||||
|
||||
return Series(np.random.randn(len(dates)), index=dates)
|
||||
|
||||
|
||||
def test_constructor(dups):
|
||||
assert isinstance(dups, Series)
|
||||
assert isinstance(dups.index, DatetimeIndex)
|
||||
|
||||
|
||||
def test_is_unique_monotonic(dups):
|
||||
assert not dups.index.is_unique
|
||||
|
||||
|
||||
def test_index_unique(dups):
|
||||
uniques = dups.index.unique()
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
]
|
||||
)
|
||||
assert uniques.dtype == "M8[ns]" # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert dups.index.nunique() == 4
|
||||
|
||||
# #2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = dups.index.tz_localize("US/Eastern")
|
||||
dups_local.name = "foo"
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name="foo")
|
||||
expected = expected.tz_localize("US/Eastern")
|
||||
assert result.tz is not None
|
||||
assert result.name == "foo"
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [iNaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
arr = [
|
||||
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
|
||||
] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_dupes_contains():
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
|
||||
def test_duplicate_dates_indexing(dups):
|
||||
ts = dups
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
assert_series_equal(result, expected)
|
||||
else:
|
||||
assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
assert_series_equal(cp, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947116800000000000$"):
|
||||
ts[datetime(2000, 1, 6)]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_range_slice():
|
||||
idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"])
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_average_dup_values(dups):
|
||||
result = dups.groupby(level=0).mean()
|
||||
expected = dups.groupby(dups.index).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff():
|
||||
import datetime
|
||||
|
||||
# #1821
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
try:
|
||||
_index._SIZE_CUTOFF = 1000
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = datetime.timedelta(seconds=1)
|
||||
half_sec = datetime.timedelta(microseconds=500000)
|
||||
d = datetime.datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")
|
||||
)
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
finally:
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = pd.period_range("1/1/2000", freq="T", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = pd.Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
# TODO: unused?
|
||||
s = str(t) # noqa
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
compare(slice("2011-01-01", "2011-01-15"))
|
||||
compare(slice("2010-12-30", "2011-01-15"))
|
||||
compare(slice("2011-01-01", "2011-01-16"))
|
||||
|
||||
# partial ranges
|
||||
compare(slice("2011-01-01", "2011-01-6"))
|
||||
compare(slice("2011-01-06", "2011-01-8"))
|
||||
compare(slice("2011-01-06", "2011-01-12"))
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="M")
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df = DataFrame(dict(A=ts))
|
||||
result = df["2001"]["A"]
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts["2001"] = 1
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df.loc["2001", "A"] = 1
|
||||
|
||||
result = df["2001"]["A"]
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.rand(5, 5),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
expected = df.loc[[df.index[2]]]
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
"""
|
||||
test NaT support
|
||||
"""
|
||||
|
||||
|
||||
def test_set_none_nan():
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
series[3] = None
|
||||
assert series[3] is NaT
|
||||
|
||||
series[3:5] = None
|
||||
assert series[4] is NaT
|
||||
|
||||
series[5] = np.nan
|
||||
assert series[5] is NaT
|
||||
|
||||
series[5:7] = np.nan
|
||||
assert series[6] is NaT
|
||||
|
||||
|
||||
def test_nat_operations():
|
||||
# GH 8617
|
||||
s = Series([0, pd.NaT], dtype="m8[ns]")
|
||||
exp = s[0]
|
||||
assert s.median() == exp
|
||||
assert s.min() == exp
|
||||
assert s.max() == exp
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(method, freq):
|
||||
# GH14940
|
||||
s = Series([pd.NaT])
|
||||
expected = Series(pd.NaT)
|
||||
round_method = getattr(s.dt, method)
|
||||
assert_series_equal(round_method(freq), expected)
|
||||
|
||||
|
||||
def test_setitem_tuple_with_datetimetz():
|
||||
# GH 20441
|
||||
arr = date_range("2017", periods=4, tz="US/Eastern")
|
||||
index = [(0, 1), (0, 2), (0, 3), (0, 4)]
|
||||
result = Series(arr, index=index)
|
||||
expected = result.copy()
|
||||
result[(0, 1)] = np.nan
|
||||
expected.iloc[0] = np.nan
|
||||
assert_series_equal(result, expected)
|
@@ -0,0 +1,32 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_iloc():
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
|
||||
for i in range(len(s)):
|
||||
result = s.iloc[i]
|
||||
exp = s[s.index[i]]
|
||||
assert_almost_equal(result, exp)
|
||||
|
||||
# pass a slice
|
||||
result = s.iloc[slice(1, 3)]
|
||||
expected = s.loc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# test slice is a view
|
||||
result[:] = 0
|
||||
assert (s[1:3] == 0).all()
|
||||
|
||||
# list of integers
|
||||
result = s.iloc[[0, 2, 3, 4, 5]]
|
||||
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_nonunique():
|
||||
s = Series([0, 1, 2], index=[0, 1, 0])
|
||||
assert s.iloc[2] == 2
|
@@ -0,0 +1,879 @@
|
||||
""" test get/set & misc """
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
msg = r"index out of bounds|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
result = test_data.ts[indices]
|
||||
expected = test_data.ts.reindex(indices)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts[indices[0] : indices[2]]
|
||||
expected = test_data.ts.loc[indices[0] : indices[2]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[inds]
|
||||
expected = s.reindex(inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[arr_inds]
|
||||
expected = s.reindex(arr_inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(
|
||||
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = s.loc["a"]
|
||||
assert result == expected
|
||||
result = s.iloc[0]
|
||||
assert result == expected
|
||||
result = s["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
def test_getitem_get(test_data):
|
||||
test_series = test_data.series
|
||||
test_obj_series = test_data.objSeries
|
||||
|
||||
idx1 = test_series.index[5]
|
||||
idx2 = test_obj_series.index[5]
|
||||
|
||||
assert test_series[idx1] == test_series.get(idx1)
|
||||
assert test_obj_series[idx2] == test_obj_series.get(idx2)
|
||||
|
||||
assert test_series[idx1] == test_series[5]
|
||||
assert test_obj_series[idx2] == test_obj_series[5]
|
||||
|
||||
assert test_series.get(-1) == test_series.get(test_series.index[-1])
|
||||
assert test_series[5] == test_series.get(test_series.index[5])
|
||||
|
||||
# missing
|
||||
d = test_data.ts.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.ts[d]
|
||||
|
||||
# None
|
||||
# GH 5652
|
||||
for s in [Series(), Series(index=list("abc"))]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_getitem_fancy(test_data):
|
||||
slice1 = test_data.series[[1, 2, 3]]
|
||||
slice2 = test_data.objSeries[[1, 2, 3]]
|
||||
assert test_data.series.index[2] == slice1.index[1]
|
||||
assert test_data.objSeries.index[2] == slice2.index[1]
|
||||
assert test_data.series[2] == slice1[1]
|
||||
assert test_data.objSeries[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_generator(test_data):
|
||||
gen = (x > 0 for x in test_data.series)
|
||||
result = test_data.series[gen]
|
||||
result2 = test_data.series[iter(test_data.series > 0)]
|
||||
expected = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
def test_type_promotion():
|
||||
# GH12599
|
||||
s = pd.Series()
|
||||
s["a"] = pd.Timestamp("2016-01-01")
|
||||
s["b"] = 3.0
|
||||
s["c"] = "foo"
|
||||
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
pd.Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: 313}),
|
||||
pd.Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_out_of_bounds(test_data):
|
||||
# don't segfault, GH #495
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
test_data.ts[len(test_data.ts)]
|
||||
|
||||
# GH #917
|
||||
s = Series([])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-1]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_getitem_box_float64(test_data):
|
||||
value = test_data.ts[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")],
|
||||
)
|
||||
def test_get(arr):
|
||||
# GH 21260
|
||||
s = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = Series(arr[:6], index=list("abcdef"))
|
||||
assert s.get("c") == s.iloc[2]
|
||||
|
||||
result = s.get(slice("b", "d"))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get("Z")
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(arr)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = pd.date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
rng = pd.date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
assert isinstance(ser.iat[5], pd.Timestamp)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror():
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
s[1]
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
s.loc[1]
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing():
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
expected = s.loc[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s[["foo", "bar", "bah", "bam"]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = s["C"]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror():
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
s2[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_getitem_dataframe():
|
||||
rng = list(range(10))
|
||||
s = pd.Series(10, index=rng)
|
||||
df = pd.DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported,"
|
||||
" use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[df > 5]
|
||||
|
||||
|
||||
def test_setitem(test_data):
|
||||
test_data.ts[test_data.ts.index[5]] = np.NaN
|
||||
test_data.ts[[1, 2, 17]] = np.NaN
|
||||
test_data.ts[6] = np.NaN
|
||||
assert np.isnan(test_data.ts[6])
|
||||
assert np.isnan(test_data.ts[2])
|
||||
test_data.ts[np.isnan(test_data.ts)] = 5
|
||||
assert not np.isnan(test_data.ts[2])
|
||||
|
||||
# caught this bug when writing tests
|
||||
series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20))
|
||||
|
||||
series[::2] = 0
|
||||
assert (series[::2] == 0).all()
|
||||
|
||||
# set item that's not contained
|
||||
s = test_data.series.copy()
|
||||
s["foobar"] = 1
|
||||
|
||||
app = Series([1], index=["foobar"], name="series")
|
||||
expected = test_data.series.append(app)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# Test for issue #10193
|
||||
key = pd.Timestamp("2012-01-01")
|
||||
series = pd.Series()
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, [key])
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
series = pd.Series([], pd.DatetimeIndex([], freq="D"))
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, pd.DatetimeIndex([key], freq="D"))
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_setitem_dtypes():
|
||||
# change dtypes
|
||||
# GH 4463
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.iloc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([False])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan]))
|
||||
|
||||
s = Series([False, True])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan, 1.0]))
|
||||
|
||||
|
||||
def test_set_value(test_data):
|
||||
idx = test_data.ts.index[10]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
res = test_data.ts.set_value(idx, 0)
|
||||
assert res is test_data.ts
|
||||
assert test_data.ts[idx] == 0
|
||||
|
||||
# equiv
|
||||
s = test_data.series.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
res = s.set_value("foobar", 0)
|
||||
assert res is s
|
||||
assert res.index[-1] == "foobar"
|
||||
assert res["foobar"] == 0
|
||||
|
||||
s = test_data.series.copy()
|
||||
s.loc["foobar"] = 0
|
||||
assert s.index[-1] == "foobar"
|
||||
assert s["foobar"] == 0
|
||||
|
||||
|
||||
def test_setslice(test_data):
|
||||
sl = test_data.ts[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
# FutureWarning from NumPy about [slice(None, 5).
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_basic_getitem_setitem_corner(test_data):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "Can only tuple-index with a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
result = test_data.ts[[slice(None, 5)]]
|
||||
expected = test_data.ts[:5]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]] = 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"])
|
||||
def test_setitem_with_tz(tz):
|
||||
orig = pd.Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz))
|
||||
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2016-01-01 02:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series(
|
||||
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
|
||||
index=[1, 2],
|
||||
)
|
||||
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2012-01-01 00:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_setitem_with_tz_dst():
|
||||
# GH XXX
|
||||
tz = "US/Eastern"
|
||||
orig = pd.Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz))
|
||||
assert orig.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-11-06 00:00-04:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00-05:00", tz=tz),
|
||||
pd.Timestamp("2016-11-06 01:00-05:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series(
|
||||
[pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)],
|
||||
index=[1, 2],
|
||||
)
|
||||
assert vals.dtype == "datetime64[ns, {0}]".format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series(
|
||||
[
|
||||
pd.Timestamp("2016-11-06 00:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 00:00", tz=tz),
|
||||
pd.Timestamp("2012-01-01 00:00", tz=tz),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_categorical_assigning_ops():
|
||||
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
||||
s = orig.copy()
|
||||
s[:] = "a"
|
||||
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[1] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[s.index > 0] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[[False, True]] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.index = ["x", "y"]
|
||||
s["y"] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# ensure that one can set something to np.nan
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
||||
s[1] = np.nan
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_slice(test_data):
|
||||
numSlice = test_data.series[10:20]
|
||||
numSliceEnd = test_data.series[-10:]
|
||||
objSlice = test_data.objSeries[10:20]
|
||||
|
||||
assert test_data.series.index[9] not in numSlice.index
|
||||
assert test_data.objSeries.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == test_data.series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = test_data.series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (test_data.series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
s = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
s[::-1] # it works!
|
||||
|
||||
|
||||
def test_loc_setitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[inds] = 5
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[[3, 4, 7]] = 5
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result.iloc[5:10] = 10
|
||||
expected[5:10] = 10
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set slice with indices
|
||||
d1, d2 = test_data.series.index[[5, 15]]
|
||||
result.loc[d1:d2] = 6
|
||||
expected[5:16] = 6 # because it's inclusive
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set index value
|
||||
test_data.series.loc[d1] = 4
|
||||
test_data.series.loc[d2] = 6
|
||||
assert test_data.series[d1] == 4
|
||||
assert test_data.series[d2] == 6
|
||||
|
||||
|
||||
def test_setitem_na():
|
||||
# these induce dtype changes
|
||||
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# gets coerced to float, right?
|
||||
expected = Series([np.nan, 1, np.nan, 0])
|
||||
s = Series([True, True, False, False])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9])
|
||||
s = Series(np.arange(10))
|
||||
s[:5] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([])
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
result = s.fillna(timedelta(1))
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 14155
|
||||
s = Series(10 * [np.timedelta64(10, "m")])
|
||||
s.loc[[1, 2, 3]] = np.timedelta64(20, "m")
|
||||
expected = pd.Series(10 * [np.timedelta64(10, "m")])
|
||||
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m"))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"nat_val,should_cast",
|
||||
[
|
||||
(pd.NaT, True),
|
||||
(np.timedelta64("NaT", "ns"), True),
|
||||
(np.datetime64("NaT", "ns"), False),
|
||||
],
|
||||
)
|
||||
def test_td64_series_assign_nat(nat_val, should_cast):
|
||||
# some nat-like values should be cast to timedelta64 when inserting
|
||||
# into a timedelta64 series. Others should coerce to object
|
||||
# and retain their dtypes.
|
||||
base = pd.Series([0, 1, 2], dtype="m8[ns]")
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype="m8[ns]")
|
||||
if not should_cast:
|
||||
expected = expected.astype(object)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser.loc[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
ser = base.copy(deep=True)
|
||||
ser.iloc[0] = nat_val
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"td",
|
||||
[
|
||||
pd.Timedelta("9 days"),
|
||||
pd.Timedelta("9 days").to_timedelta64(),
|
||||
pd.Timedelta("9 days").to_pytimedelta(),
|
||||
],
|
||||
)
|
||||
def test_append_timedelta_does_not_cast(td):
|
||||
# GH#22717 inserting a Timedelta should _not_ cast to int64
|
||||
expected = pd.Series(["x", td], index=[0, "td"], dtype=object)
|
||||
|
||||
ser = pd.Series(["x"])
|
||||
ser["td"] = td
|
||||
tm.assert_series_equal(ser, expected)
|
||||
assert isinstance(ser["td"], pd.Timedelta)
|
||||
|
||||
ser = pd.Series(["x"])
|
||||
ser.loc["td"] = pd.Timedelta("9 days")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
assert isinstance(ser["td"], pd.Timedelta)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
df.set_index(["a", "b", "c"], inplace=True)
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df
|
||||
df["val"].update(s)
|
||||
|
||||
expected = DataFrame(dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
|
||||
expected.set_index(["a", "b", "c"], inplace=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 3970
|
||||
# these are chained assignments as well
|
||||
pd.set_option("chained_assignment", None)
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = 0.13
|
||||
|
||||
# TODO: unused
|
||||
df_tmp = df.iloc[ck] # noqa
|
||||
|
||||
df["bb"].iloc[0] = 0.15
|
||||
assert df["bb"].iloc[0] == 0.15
|
||||
pd.set_option("chained_assignment", "raise")
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
|
||||
df["c"] = np.nan
|
||||
df["c"].update(pd.Series(["foo"], index=[0]))
|
||||
|
||||
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=["foo", np.nan]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(test_data):
|
||||
seq = test_data.ts[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(test_data.ts[10])
|
||||
|
||||
|
||||
def test_cast_on_putmask():
|
||||
# GH 2746
|
||||
|
||||
# need to upcast
|
||||
s = Series([1, 2], index=[1, 2], dtype="int64")
|
||||
s[[True, False]] = Series([0], index=[1], dtype="int64")
|
||||
expected = Series([0, 2], index=[1, 2], dtype="int64")
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_type_promote_putmask():
|
||||
# GH8387: test that changing types does not break alignment
|
||||
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
||||
left, mask = ts.copy(), ts > 0
|
||||
right = ts[mask].copy().map(str)
|
||||
left[mask] = right
|
||||
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
||||
|
||||
s = Series([0, 1, 2, 0])
|
||||
mask = s > 0
|
||||
s2 = s[mask].map(str)
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, "1", "2", 0]))
|
||||
|
||||
s = Series([0, "foo", "bar", 0])
|
||||
mask = Series([False, True, True, False])
|
||||
s2 = s[mask]
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, "foo", "bar", 0]))
|
||||
|
||||
|
||||
def test_multilevel_preserve_name():
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
s = Series(np.random.randn(len(index)), index=index, name="sth")
|
||||
|
||||
result = s["foo"]
|
||||
result2 = s.loc["foo"]
|
||||
assert result.name == s.name
|
||||
assert result2.name == s.name
|
||||
|
||||
|
||||
def test_setitem_scalar_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
for n in range(len(series)):
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[n] = 1
|
||||
|
||||
assert array[n] == 0
|
||||
|
||||
|
||||
def test_setitem_slice_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[1:3] = 1
|
||||
|
||||
assert not array.any()
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH 6600
|
||||
df = DataFrame({"A": 0, "B": np.arange(5, dtype="int64"), "C": 0})
|
||||
k = df.iloc[4]
|
||||
|
||||
result = k.pop("B")
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=["A", "C"], name=4)
|
||||
assert_series_equal(k, expected)
|
||||
|
||||
|
||||
def test_take():
|
||||
s = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = s.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = s.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "index {} is out of bounds for size 5"
|
||||
with pytest.raises(IndexError, match=msg.format(10)):
|
||||
s.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg.format(5)):
|
||||
s.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
s = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = s.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_head_tail(test_data):
|
||||
assert_series_equal(test_data.series.head(), test_data.series[:5])
|
||||
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
|
||||
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
|
||||
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
|
@@ -0,0 +1,159 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, Timestamp
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
|
||||
def test_loc_uint64(val, expected):
|
||||
# see gh-19399
|
||||
s = Series({2 ** 63 - 1: 3, 2 ** 63: 4})
|
||||
assert s.loc[val] == expected
|
||||
|
||||
|
||||
def test_loc_getitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
assert_series_equal(test_data.series.loc[inds], test_data.series.reindex(inds))
|
||||
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
|
||||
|
||||
# slice with indices
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
result = test_data.ts.loc[d1:d2]
|
||||
expected = test_data.ts.truncate(d1, d2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# boolean
|
||||
mask = test_data.series > test_data.series.median()
|
||||
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
|
||||
|
||||
# ask for index value
|
||||
assert test_data.ts.loc[d1] == test_data.ts[d1]
|
||||
assert test_data.ts.loc[d2] == test_data.ts[d2]
|
||||
|
||||
|
||||
def test_loc_getitem_not_monotonic(test_data):
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
|
||||
ts2 = test_data.ts[::2][[1, 2, 0]]
|
||||
|
||||
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_setitem_integer_slice_keyerrors():
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# this is OK
|
||||
cp = s.copy()
|
||||
cp.iloc[4:10] = 0
|
||||
assert (cp.iloc[4:10] == 0).all()
|
||||
|
||||
# so is this
|
||||
cp = s.copy()
|
||||
cp.iloc[3:11] = 0
|
||||
assert (cp.iloc[3:11] == 0).values.all()
|
||||
|
||||
result = s.iloc[2:6]
|
||||
result2 = s.loc[3:11]
|
||||
expected = s.reindex([4, 6, 8, 10])
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
# non-monotonic, raise KeyError
|
||||
s2 = s.iloc[list(range(5)) + list(range(9, 4, -1))]
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
s2.loc[3:11]
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
s2.loc[3:11] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_iterator(test_data):
|
||||
idx = iter(test_data.series.index[:10])
|
||||
result = test_data.series.loc[idx]
|
||||
assert_series_equal(result, test_data.series[:10])
|
||||
|
||||
|
||||
def test_loc_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[mask] = 0
|
||||
expected = test_data.series
|
||||
expected[mask] = 0
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_setitem_corner(test_data):
|
||||
inds = list(test_data.series.index[[5, 8, 12]])
|
||||
test_data.series.loc[inds] = 5
|
||||
msg = r"\['foo'\] not in index"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.series.loc[inds + ["foo"]] = 5
|
||||
|
||||
|
||||
def test_basic_setitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices] = 0
|
||||
exp.loc[indices] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices[0] : indices[2]] = 0
|
||||
exp.loc[indices[0] : indices[2]] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
inds = [0, 4, 6]
|
||||
arr_inds = np.array([0, 4, 6])
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[inds] = 0
|
||||
s.loc[inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[arr_inds] = 0
|
||||
s.loc[arr_inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
inds_notfound = [0, 4, 5, 6]
|
||||
arr_inds_notfound = np.array([0, 4, 5, 6])
|
||||
msg = r"\[5\] not contained in the index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[inds_notfound] = 0
|
||||
with pytest.raises(Exception, match=msg):
|
||||
s[arr_inds_notfound] = 0
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(
|
||||
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
s2 = s.copy()
|
||||
expected = Timestamp("2011-01-03", tz="US/Eastern")
|
||||
s2.loc["a"] = expected
|
||||
result = s2.loc["a"]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = expected
|
||||
result = s2.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2["a"] = expected
|
||||
result = s2["a"]
|
||||
assert result == expected
|
@@ -0,0 +1,317 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=pd.Float64Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
|
||||
|
||||
idx = [2, np.nan]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx), Series([2, np.nan], index=idx))
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_delitem():
|
||||
# GH 5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# empty
|
||||
s = Series()
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=["a"])
|
||||
del s["a"]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
|
||||
s["a"] = 1
|
||||
assert_series_equal(s, Series(1, index=["a"]))
|
||||
del s["a"]
|
||||
assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object")))
|
||||
|
||||
|
||||
def test_slice_float64():
|
||||
values = np.arange(10.0, 50.0, 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
s = Series(np.random.randn(20), index=index)
|
||||
|
||||
result = s[start:end]
|
||||
expected = s.iloc[5:16]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[start:end]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3), index=index)
|
||||
|
||||
result = df[start:end]
|
||||
expected = df.iloc[5:16]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[start:end]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_negative_out_of_bounds():
|
||||
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11]
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11] = "foo"
|
||||
|
||||
|
||||
def test_getitem_regression():
|
||||
s = Series(range(5), index=list(range(5)))
|
||||
result = s[list(range(5))]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_bug():
|
||||
s = Series(range(10), index=list(range(10)))
|
||||
result = s[-12:]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
result = s[-7:]
|
||||
assert_series_equal(result, s[3:])
|
||||
|
||||
result = s[:-12]
|
||||
assert_series_equal(result, s[:0])
|
||||
|
||||
s = Series(range(10), index=list(range(10)))
|
||||
s[-12:] = 0
|
||||
assert (s == 0).all()
|
||||
|
||||
s[:-12] = 5
|
||||
assert (s == 0).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_integers():
|
||||
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = s[:4]
|
||||
expected = s.reindex([2, 4, 6, 8])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s[:4] = 0
|
||||
assert (s[:4] == 0).all()
|
||||
assert not (s[4:] == 0).any()
|
||||
|
||||
|
||||
def test_setitem_float_labels():
|
||||
# note labels are floats
|
||||
s = Series(["a", "b", "c"], index=[0, 0.5, 1])
|
||||
tmp = s.copy()
|
||||
|
||||
s.loc[1] = "zoo"
|
||||
tmp.iloc[2] = "zoo"
|
||||
|
||||
assert_series_equal(s, tmp)
|
||||
|
||||
|
||||
def test_slice_float_get_set(test_data):
|
||||
msg = (
|
||||
r"cannot do slice indexing on <class 'pandas\.core\.indexes"
|
||||
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
|
||||
r" of <class 'float'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0]
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0] = 0
|
||||
|
||||
|
||||
def test_slice_floats2():
|
||||
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
|
||||
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
i = np.arange(10, 20, dtype=float)
|
||||
i[2] = 12.2
|
||||
s.index = i
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
|
||||
def test_int_indexing():
|
||||
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
# not monotonic
|
||||
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
|
||||
def test_getitem_int64(test_data):
|
||||
idx = np.int64(5)
|
||||
assert test_data.ts[idx] == test_data.ts[5]
|
Reference in New Issue
Block a user