8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,292 @@
""" common utilities """
import itertools
from warnings import catch_warnings, filterwarnings
import numpy as np
from pandas.core.dtypes.common import is_scalar
from pandas import DataFrame, Float64Index, MultiIndex, Series, UInt64Index, date_range
from pandas.util import testing as tm
from pandas.io.formats.printing import pprint_thing
_verbose = False
def _mklbl(prefix, n):
return ["{prefix}{i}".format(prefix=prefix, i=i) for i in range(n)]
def _axify(obj, key, axis):
# create a tuple accessor
axes = [slice(None)] * obj.ndim
axes[axis] = key
return tuple(axes)
class Base:
""" indexing comprehensive base class """
_objs = {"series", "frame"}
_typs = {
"ints",
"uints",
"labels",
"mixed",
"ts",
"floats",
"empty",
"ts_rev",
"multi",
}
def setup_method(self, method):
self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2))
self.frame_ints = DataFrame(
np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3)
)
self.series_uints = Series(
np.random.rand(4), index=UInt64Index(np.arange(0, 8, 2))
)
self.frame_uints = DataFrame(
np.random.randn(4, 4),
index=UInt64Index(range(0, 8, 2)),
columns=UInt64Index(range(0, 12, 3)),
)
self.series_floats = Series(
np.random.rand(4), index=Float64Index(range(0, 8, 2))
)
self.frame_floats = DataFrame(
np.random.randn(4, 4),
index=Float64Index(range(0, 8, 2)),
columns=Float64Index(range(0, 12, 3)),
)
m_idces = [
MultiIndex.from_product([[1, 2], [3, 4]]),
MultiIndex.from_product([[5, 6], [7, 8]]),
MultiIndex.from_product([[9, 10], [11, 12]]),
]
self.series_multi = Series(np.random.rand(4), index=m_idces[0])
self.frame_multi = DataFrame(
np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1]
)
self.series_labels = Series(np.random.randn(4), index=list("abcd"))
self.frame_labels = DataFrame(
np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")
)
self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8])
self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8])
self.series_ts = Series(
np.random.randn(4), index=date_range("20130101", periods=4)
)
self.frame_ts = DataFrame(
np.random.randn(4, 4), index=date_range("20130101", periods=4)
)
dates_rev = date_range("20130101", periods=4).sort_values(ascending=False)
self.series_ts_rev = Series(np.random.randn(4), index=dates_rev)
self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev)
self.frame_empty = DataFrame()
self.series_empty = Series()
# form agglomerates
for o in self._objs:
d = dict()
for t in self._typs:
d[t] = getattr(self, "{o}_{t}".format(o=o, t=t), None)
setattr(self, o, d)
def generate_indices(self, f, values=False):
""" generate the indices
if values is True , use the axis values
is False, use the range
"""
axes = f.axes
if values:
axes = (list(range(len(a))) for a in axes)
return itertools.product(*axes)
def get_result(self, obj, method, key, axis):
""" return the result for this obj with this key and this axis """
if isinstance(key, dict):
key = key[axis]
# use an artificial conversion to map the key as integers to the labels
# so ix can work for comparisons
if method == "indexer":
method = "ix"
key = obj._get_axis(axis)[key]
# in case we actually want 0 index slicing
with catch_warnings(record=True):
try:
xp = getattr(obj, method).__getitem__(_axify(obj, key, axis))
except AttributeError:
xp = getattr(obj, method).__getitem__(key)
return xp
def get_value(self, f, i, values=False):
""" return the value for the location i """
# check against values
if values:
return f.values[i]
# this is equiv of f[col][row].....
# v = f
# for a in reversed(i):
# v = v.__getitem__(a)
# return v
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
return f.ix[i]
def check_values(self, f, func, values=False):
if f is None:
return
axes = f.axes
indicies = itertools.product(*axes)
for i in indicies:
result = getattr(f, func)[i]
# check against values
if values:
expected = f.values[i]
else:
expected = f
for a in reversed(i):
expected = expected.__getitem__(a)
tm.assert_almost_equal(result, expected)
def check_result(
self,
name,
method1,
key1,
method2,
key2,
typs=None,
objs=None,
axes=None,
fails=None,
):
def _eq(t, o, a, obj, k1, k2):
""" compare equal for these 2 keys """
if a is not None and a > obj.ndim - 1:
return
def _print(result, error=None):
if error is not None:
error = str(error)
v = (
"%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
"key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s"
% (name, result, t, o, method1, method2, a, error or "")
)
if _verbose:
pprint_thing(v)
try:
rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a))
try:
xp = self.get_result(obj, method2, k2, a)
except Exception:
result = "no comp"
_print(result)
return
detail = None
try:
if is_scalar(rs) and is_scalar(xp):
assert rs == xp
elif xp.ndim == 1:
tm.assert_series_equal(rs, xp)
elif xp.ndim == 2:
tm.assert_frame_equal(rs, xp)
result = "ok"
except AssertionError as e:
detail = str(e)
result = "fail"
# reverse the checks
if fails is True:
if result == "fail":
result = "ok (fail)"
_print(result)
if not result.startswith("ok"):
raise AssertionError(detail)
except AssertionError:
raise
except Exception as detail:
# if we are in fails, the ok, otherwise raise it
if fails is not None:
if isinstance(detail, fails):
result = "ok ({0.__name__})".format(type(detail))
_print(result)
return
result = type(detail).__name__
raise AssertionError(_print(result, error=detail))
if typs is None:
typs = self._typs
if objs is None:
objs = self._objs
if axes is not None:
if not isinstance(axes, (tuple, list)):
axes = [axes]
else:
axes = list(axes)
else:
axes = [0, 1]
# check
for o in objs:
if o not in self._objs:
continue
d = getattr(self, o)
for a in axes:
for t in typs:
if t not in self._typs:
continue
obj = d[t]
if obj is None:
continue
def _call(obj=obj):
obj = obj.copy()
k2 = key2
_eq(t, o, a, obj, key1, k2)
_call()

View File

@@ -0,0 +1,23 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
@pytest.fixture(
params=[
(libindex.Int64Engine, np.int64),
(libindex.Int32Engine, np.int32),
(libindex.Int16Engine, np.int16),
(libindex.Int8Engine, np.int8),
(libindex.UInt64Engine, np.uint64),
(libindex.UInt32Engine, np.uint32),
(libindex.UInt16Engine, np.uint16),
(libindex.UInt8Engine, np.uint8),
(libindex.Float64Engine, np.float64),
(libindex.Float32Engine, np.float32),
],
ids=lambda x: x[0].__name__,
)
def numeric_indexing_engine_type_and_dtype(request):
return request.param

View File

@@ -0,0 +1,114 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, IntervalIndex, Series
import pandas.util.testing as tm
class TestIntervalIndex:
def setup_method(self, method):
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_getitem_with_scalar(self):
s = self.s
expected = s.iloc[:3]
tm.assert_series_equal(expected, s[:3])
tm.assert_series_equal(expected, s[:2.5])
tm.assert_series_equal(expected, s[0.1:2.5])
expected = s.iloc[1:4]
tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, s[[2, 3, 4]])
tm.assert_series_equal(expected, s[[1.5, 3, 4]])
expected = s.iloc[2:5]
tm.assert_series_equal(expected, s[s >= 2])
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
def test_nonoverlapping_monotonic(self, direction, closed):
tpls = [(0, 1), (2, 3), (4, 5)]
if direction == "decreasing":
tpls = tpls[::-1]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
s = Series(list("abc"), idx)
for key, expected in zip(idx.left, s):
if idx.closed_left:
assert s[key] == expected
assert s.loc[key] == expected
else:
with pytest.raises(KeyError, match=str(key)):
s[key]
with pytest.raises(KeyError, match=str(key)):
s.loc[key]
for key, expected in zip(idx.right, s):
if idx.closed_right:
assert s[key] == expected
assert s.loc[key] == expected
else:
with pytest.raises(KeyError, match=str(key)):
s[key]
with pytest.raises(KeyError, match=str(key)):
s.loc[key]
for key, expected in zip(idx.mid, s):
assert s[key] == expected
assert s.loc[key] == expected
def test_non_matching(self):
s = self.s
# this is a departure from our current
# indexin scheme, but simpler
with pytest.raises(KeyError, match="^$"):
s.loc[[-1, 3, 4, 5]]
with pytest.raises(KeyError, match="^$"):
s.loc[[-1, 3]]
def test_large_series(self):
s = Series(
np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001))
)
result1 = s.loc[:80000]
result2 = s.loc[0:80000]
result3 = s.loc[0:80000:1]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
def test_loc_getitem_frame(self):
df = DataFrame({"A": range(10)})
s = pd.cut(df.A, 5)
df["B"] = s
df = df.set_index("B")
result = df.loc[4]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="10"):
df.loc[10]
# single list-like
result = df.loc[[4]]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
# non-unique
result = df.loc[[4, 5]]
expected = df.take([4, 5, 4, 5])
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="^$"):
df.loc[[10]]
# partial missing
with pytest.raises(KeyError, match="^$"):
df.loc[[10, 4]]

View File

@@ -0,0 +1,248 @@
import re
import numpy as np
import pytest
from pandas import Interval, IntervalIndex, Series
import pandas.util.testing as tm
class TestIntervalIndex:
def setup_method(self, method):
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_loc_with_interval(self):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
s = self.s
expected = 0
result = s.loc[Interval(0, 1)]
assert result == expected
result = s[Interval(0, 1)]
assert result == expected
expected = s.iloc[3:5]
result = s.loc[[Interval(3, 4), Interval(4, 5)]]
tm.assert_series_equal(expected, result)
result = s[[Interval(3, 4), Interval(4, 5)]]
tm.assert_series_equal(expected, result)
# missing or not exact
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
s.loc[Interval(3, 5, closed="left")]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
s[Interval(3, 5, closed="left")]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
s[Interval(3, 5)]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
s.loc[Interval(3, 5)]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
s[Interval(3, 5)]
with pytest.raises(
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
):
s.loc[Interval(-2, 0)]
with pytest.raises(
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
):
s[Interval(-2, 0)]
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
s.loc[Interval(5, 6)]
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
s[Interval(5, 6)]
def test_loc_with_scalar(self):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
s = self.s
assert s.loc[1] == 0
assert s.loc[1.5] == 1
assert s.loc[2] == 1
assert s[1] == 0
assert s[1.5] == 1
assert s[2] == 1
expected = s.iloc[1:4]
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
expected = s.iloc[[1, 1, 2, 1]]
tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]])
expected = s.iloc[2:5]
tm.assert_series_equal(expected, s.loc[s >= 2])
def test_loc_with_slices(self):
# loc with slices:
# - Interval objects: only works with exact matches
# - scalars: only works for non-overlapping, monotonic intervals,
# and start/stop select location based on the interval that
# contains them:
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
s = self.s
# slice of interval
expected = s.iloc[:3]
result = s.loc[Interval(0, 1) : Interval(2, 3)]
tm.assert_series_equal(expected, result)
result = s[Interval(0, 1) : Interval(2, 3)]
tm.assert_series_equal(expected, result)
expected = s.iloc[3:]
result = s.loc[Interval(3, 4) :]
tm.assert_series_equal(expected, result)
result = s[Interval(3, 4) :]
tm.assert_series_equal(expected, result)
msg = "Interval objects are not currently supported"
with pytest.raises(NotImplementedError, match=msg):
s.loc[Interval(3, 6) :]
with pytest.raises(NotImplementedError, match=msg):
s[Interval(3, 6) :]
with pytest.raises(NotImplementedError, match=msg):
s.loc[Interval(3, 4, closed="left") :]
with pytest.raises(NotImplementedError, match=msg):
s[Interval(3, 4, closed="left") :]
# TODO with non-existing intervals ?
# s.loc[Interval(-1, 0):Interval(2, 3)]
# slice of scalar
expected = s.iloc[:3]
tm.assert_series_equal(expected, s.loc[:3])
tm.assert_series_equal(expected, s.loc[:2.5])
tm.assert_series_equal(expected, s.loc[0.1:2.5])
tm.assert_series_equal(expected, s.loc[-1:3])
tm.assert_series_equal(expected, s[:3])
tm.assert_series_equal(expected, s[:2.5])
tm.assert_series_equal(expected, s[0.1:2.5])
# slice of scalar with step != 1
with pytest.raises(ValueError):
s[0:4:2]
def test_loc_with_overlap(self):
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
s = Series(range(len(idx)), index=idx)
# scalar
expected = s
result = s.loc[4]
tm.assert_series_equal(expected, result)
result = s[4]
tm.assert_series_equal(expected, result)
result = s.loc[[4]]
tm.assert_series_equal(expected, result)
result = s[[4]]
tm.assert_series_equal(expected, result)
# interval
expected = 0
result = s.loc[Interval(1, 5)]
result == expected
result = s[Interval(1, 5)]
result == expected
expected = s
result = s.loc[[Interval(1, 5), Interval(3, 7)]]
tm.assert_series_equal(expected, result)
result = s[[Interval(1, 5), Interval(3, 7)]]
tm.assert_series_equal(expected, result)
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
s.loc[Interval(3, 5)]
with pytest.raises(KeyError, match="^$"):
s.loc[[Interval(3, 5)]]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
s[Interval(3, 5)]
with pytest.raises(KeyError, match="^$"):
s[[Interval(3, 5)]]
# slices with interval (only exact matches)
expected = s
result = s.loc[Interval(1, 5) : Interval(3, 7)]
tm.assert_series_equal(expected, result)
result = s[Interval(1, 5) : Interval(3, 7)]
tm.assert_series_equal(expected, result)
msg = "'can only get slices from an IntervalIndex if bounds are"
" non-overlapping and all monotonic increasing or decreasing'"
with pytest.raises(KeyError, match=msg):
s.loc[Interval(1, 6) : Interval(3, 8)]
with pytest.raises(KeyError, match=msg):
s[Interval(1, 6) : Interval(3, 8)]
# slices with scalar raise for overlapping intervals
# TODO KeyError is the appropriate error?
with pytest.raises(KeyError, match=msg):
s.loc[1:4]
def test_non_unique(self):
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
result = s.loc[Interval(1, 3)]
assert result == 0
result = s.loc[[Interval(1, 3)]]
expected = s.iloc[0:1]
tm.assert_series_equal(expected, result)
def test_non_unique_moar(self):
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
expected = s.iloc[[0, 1]]
result = s.loc[Interval(1, 3)]
tm.assert_series_equal(expected, result)
expected = s
result = s.loc[Interval(1, 3) :]
tm.assert_series_equal(expected, result)
expected = s
result = s[Interval(1, 3) :]
tm.assert_series_equal(expected, result)
expected = s.iloc[[0, 1]]
result = s[[Interval(1, 3)]]
tm.assert_series_equal(expected, result)

View File

@@ -0,0 +1,30 @@
import numpy as np
import pytest
from pandas import DataFrame, Index, MultiIndex
from pandas.util import testing as tm
@pytest.fixture
def multiindex_dataframe_random_data():
"""DataFrame with 2 level MultiIndex with random data"""
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
return DataFrame(
np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
)
@pytest.fixture
def multiindex_year_month_day_dataframe_random_data():
"""DataFrame with 3 level MultiIndex (year, month, day) covering
first 100 business days from 2000-01-01 with random data"""
tdf = tm.makeTimeDataFrame(100)
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
# use Int64Index, to make sure things work
ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
ymd.index.set_names(["year", "month", "day"], inplace=True)
return ymd

View File

@@ -0,0 +1,64 @@
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex, Series
from pandas.core import common as com
import pandas.util.testing as tm
def test_detect_chained_assignment():
# Inplace ops, originally from:
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
events = {
("eyes", "left"): a,
("eyes", "right"): b,
("ears", "left"): c,
("ears", "right"): d,
}
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
zed = DataFrame(events, index=["a", "b"], columns=multiind)
with pytest.raises(com.SettingWithCopyError):
zed["eyes"]["right"].fillna(value=555, inplace=True)
def test_cache_updating():
# 5216
# make sure that we don't try to set a dead cache
a = np.random.rand(10, 3)
df = DataFrame(a, columns=["x", "y", "z"])
tuples = [(i, j) for i in range(5) for j in range(2)]
index = MultiIndex.from_tuples(tuples)
df.index = index
# setting via chained assignment
# but actually works, since everything is a view
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
assert result == 1
# correct setting
df.loc[(0, 0), "z"] = 2
result = df.loc[(0, 0), "z"]
assert result == 2
def test_indexer_caching():
# GH5727
# make sure that indexers are in the _internal_names_set
n = 1000001
arrays = (range(n), range(n))
index = MultiIndex.from_tuples(zip(*arrays))
s = Series(np.zeros(n), index=index)
str(s)
# setitem
expected = Series(np.ones(n), index=index)
s = Series(np.zeros(n), index=index)
s[s == 0] = 1
tm.assert_series_equal(s, expected)

View File

@@ -0,0 +1,22 @@
from datetime import datetime
import numpy as np
from pandas import Index, Period, Series, period_range
def test_multiindex_period_datetime():
# GH4861, using datetime in period of multiindex raises exception
idx1 = Index(["a", "a", "a", "b", "b"])
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
# try Period as index
expected = s.iloc[0]
result = s.loc["a", Period("2012-01")]
assert result == expected
# try datetime as index
result = s.loc["a", datetime(2012, 1, 1)]
assert result == expected

View File

@@ -0,0 +1,252 @@
import numpy as np
import pytest
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm
# ----------------------------------------------------------------------------
# test indexing of Series with multi-level Index
# ----------------------------------------------------------------------------
@pytest.mark.parametrize(
"access_method",
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
)
@pytest.mark.parametrize(
"level1_value, expected",
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
)
def test_series_getitem_multiindex(access_method, level1_value, expected):
# GH 6018
# series regression getitem with a multi-index
s = Series([1, 2, 3])
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
result = access_method(s, level1_value)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("level0_value", ["D", "A"])
def test_series_getitem_duplicates_multiindex(level0_value):
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
# the appropriate error, only in PY3 of course!
index = MultiIndex(
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
arr = np.random.randn(len(index), 1)
df = DataFrame(arr, index=index, columns=["val"])
# confirm indexing on missing value raises KeyError
if level0_value != "A":
with pytest.raises(KeyError, match=r"^'A'$"):
df.val["A"]
with pytest.raises(KeyError, match=r"^'X'$"):
df.val["X"]
result = df.val[level0_value]
expected = Series(
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]])
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[42:65])
expected.index = expected.index.droplevel(0).droplevel(0)
result = indexer(s)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]]
)
def test_series_getitem_returns_scalar(
multiindex_year_month_day_dataframe_random_data, indexer
):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.iloc[49]
result = indexer(s)
assert result == expected
@pytest.mark.parametrize(
"indexer,expected_error,expected_error_msg",
[
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
(lambda s: s.__getitem__(len(s)), IndexError, "index out of bounds"),
(lambda s: s[len(s)], IndexError, "index out of bounds"),
(
lambda s: s.iloc[len(s)],
IndexError,
"single positional indexer is out-of-bounds",
),
],
)
def test_series_getitem_indexing_errors(
multiindex_year_month_day_dataframe_random_data,
indexer,
expected_error,
expected_error_msg,
):
s = multiindex_year_month_day_dataframe_random_data["A"]
with pytest.raises(expected_error, match=expected_error_msg):
indexer(s)
def test_series_getitem_corner_generator(
multiindex_year_month_day_dataframe_random_data
):
s = multiindex_year_month_day_dataframe_random_data["A"]
result = s[(x > 0 for x in s)]
expected = s[s > 0]
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index
# ----------------------------------------------------------------------------
def test_getitem_simple(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data.T
expected = df.values[:, 0]
result = df["foo", "one"].values
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_error_msg",
[
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
(lambda df: df["foobar"], r"^'foobar'$"),
],
)
def test_frame_getitem_simple_key_error(
multiindex_dataframe_random_data, indexer, expected_error_msg
):
df = multiindex_dataframe_random_data.T
with pytest.raises(KeyError, match=expected_error_msg):
indexer(df)
def test_frame_getitem_multicolumn_empty_level():
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
df.columns = [
["level1 item1", "level1 item2"],
["", "level2 item2"],
["level3 item1", "level3 item2"],
]
result = df["level1 item1"]
expected = DataFrame(
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_slice",
[
(lambda df: df["foo"], slice(3)),
(lambda df: df["bar"], slice(3, 5)),
(lambda df: df.loc[:, "bar"], slice(3, 5)),
],
)
def test_frame_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice
):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mixed_depth_get():
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.randn(4, 6), columns=index)
result = df["a"]
expected = df["a", "", ""].rename("a")
tm.assert_series_equal(result, expected)
result = df["routine1", "result1"]
expected = df["routine1", "result1", ""]
expected = expected.rename(("routine1", "result1"))
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index with duplicates
# ----------------------------------------------------------------------------
@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
index = ["h1", "h3", "h5"]
columns = MultiIndex(
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=["main", "sub"],
)
return DataFrame(data, index=index, columns=columns)
@pytest.mark.parametrize(
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
)
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
df = dataframe_with_duplicate_index
index = Index(["h1", "h3", "h5"])
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
result = df["A"]["A1"]
tm.assert_series_equal(result, expected)
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame(
[["d", 4, 4], ["e", 5, 5]],
index=Index(["B2", "B2"], name="sub"),
columns=["h1", "h3", "h5"],
).T
result = df["A"]["B2"]
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,171 @@
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
@pytest.fixture
def simple_multiindex_dataframe():
"""
Factory function to create simple 3 x 3 dataframe with
both columns and row MultiIndex using supplied data or
random data by default.
"""
def _simple_multiindex_dataframe(data=None):
if data is None:
data = np.random.randn(3, 3)
return DataFrame(
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
)
return _simple_multiindex_dataframe
@pytest.mark.parametrize(
"indexer, expected",
[
(
lambda df: df.iloc[0],
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
),
(
lambda df: df.iloc[2],
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
),
(
lambda df: df.iloc[:, 2],
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
),
],
)
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
result = indexer(df)
expected = expected(arr)
tm.assert_series_equal(result, expected)
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
df = simple_multiindex_dataframe()
result = df.iloc[[0, 1]]
expected = df.xs(4, drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_returns_scalar(simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_iloc_getitem_multiple_items():
# GH 5528
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
index = MultiIndex.from_tuples(tup)
df = DataFrame(np.random.randn(4, 4), index=index)
result = df.iloc[[2, 3]]
expected = df.xs("b", drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labels():
# this is basically regular indexing
arr = np.random.randn(4, 3)
df = DataFrame(
arr,
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
)
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_frame_getitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[:4]
expected = df[:4]
tm.assert_frame_equal(result, expected)
def test_frame_setitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
df.iloc[:4] = 0
assert (df.values[:4] == 0).all()
assert (df.values[4:] != 0).all()
def test_indexing_ambiguity_bug_1678():
# GH 1678
columns = MultiIndex.from_tuples(
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
)
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
result = df.iloc[:, 1]
expected = df.loc[:, ("Ohio", "Red")]
tm.assert_series_equal(result, expected)
def test_iloc_integer_locations():
# GH 13797
data = [
["str00", "str01"],
["str10", "str11"],
["str20", "srt21"],
["str30", "str31"],
["str40", "str41"],
]
index = MultiIndex.from_tuples(
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
)
expected = DataFrame(data)
df = DataFrame(data, index=index)
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"data, indexes, values, expected_k",
[
# test without indexer value in first level of MultiIndex
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
# test like code sample 1 in the issue
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
# test like code sample 2 in the issue
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
# test like code sample 3 in the issue
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
],
)
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
# GH17148
df = DataFrame(data=data, columns=["i", "j", "k"])
df = df.set_index(["i", "j"])
series = df.k.copy()
for i, v in zip(indexes, values):
series.iloc[i] += v
df["k"] = expected_k
expected = df.k
tm.assert_series_equal(series, expected)
def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,92 @@
import warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, Series
import pandas.util.testing as tm
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_multiindex_get_loc(): # GH7724, GH2646
with warnings.catch_warnings(record=True):
# test indexing into a multi-index before & past the lexsort depth
from numpy.random import randint, choice, randn
cols = ["jim", "joe", "jolie", "joline", "jolia"]
def validate(mi, df, key):
mask = np.ones(len(df)).astype("bool")
# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k
if not mask.any():
assert key[: i + 1] not in mi.index
continue
assert key[: i + 1] in mi.index
right = df[mask].copy()
if i + 1 != len(key): # partial key
right.drop(cols[: i + 1], axis=1, inplace=True)
right.set_index(cols[i + 1 : -1], inplace=True)
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
else: # full key
right.set_index(cols[:-1], inplace=True)
if len(right) == 1: # single hit
right = Series(
right["jolia"].values, name=right.index[0], index=["jolia"]
)
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
def loop(mi, df, keys):
for key in keys:
validate(mi, df, key)
n, m = 1000, 50
vals = [
randint(0, 10, n),
choice(list("abcdefghij"), n),
choice(pd.date_range("20141009", periods=10).tolist(), n),
choice(list("ZYXWVUTSRQ"), n),
randn(n),
]
vals = list(map(tuple, zip(*vals)))
# bunch of keys for testing
keys = [
randint(0, 11, m),
choice(list("abcdefghijk"), m),
choice(pd.date_range("20141009", periods=11).tolist(), m),
choice(list("ZYXWVUTSRQP"), m),
]
keys = list(map(tuple, zip(*keys)))
keys += list(map(lambda t: t[:-1], vals[:: n // m]))
# covers both unique index and non-unique index
df = DataFrame(vals, columns=cols)
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
for frame in a, b:
for i in range(5): # lexsort depth
df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i])
mi = df.set_index(cols[:-1])
assert not mi.index.lexsort_depth < i
loop(mi, df, keys)
@pytest.mark.slow
def test_large_mi_dataframe_indexing():
# GH10645
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
assert not (10 ** 6, 0) in result

View File

@@ -0,0 +1,75 @@
from warnings import catch_warnings, simplefilter
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
from pandas import DataFrame, MultiIndex
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
class TestMultiIndexIx:
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5
# with integer labels
df = frame.copy()
df.columns = list(range(3))
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7
with catch_warnings(record=True):
simplefilter("ignore", FutureWarning)
df = frame.copy()
df.columns = list(range(3))
df.ix[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7
def test_ix_general(self):
# ix general issues
# GH 2817
data = {
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
"year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012},
}
df = DataFrame(data).set_index(keys=["col", "year"])
key = 4.0, 2012
# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
# this is ok
df.sort_index(inplace=True)
res = df.loc[key]
# col has float dtype, result should be Float64Index
index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"])
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
def test_ix_multiindex_missing_label_raises(self):
# GH 21593
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.ix[2]
def test_series_ix_getitem_fancy(
self, multiindex_year_month_day_dataframe_random_data
):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[49:51])
result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,392 @@
import itertools
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm
@pytest.fixture
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.randn(6, 2), index=index)
class TestMultiIndexLoc:
def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = Series([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
result = x.loc[[1, 3]]
tm.assert_series_equal(result, expected)
# GH15424
y1 = Series([1, 3], index=[1, 2])
result = x.loc[y1]
tm.assert_series_equal(result, expected)
empty = Series(data=[], dtype=np.float64)
expected = Series(
[], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64)
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = np.array([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
# empty array:
empty = np.array([])
expected = Series(
[], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64)
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
# 0-dim array (scalar):
scalar = np.int64(1)
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
result = x.loc[scalar]
tm.assert_series_equal(result, expected)
def test_loc_multiindex_labels(self):
df = DataFrame(
np.random.randn(3, 3),
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j"], ["X", "X", "Y"]],
)
# the first 2 rows
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc["i"]
tm.assert_frame_equal(result, expected)
# 2nd (last) column
expected = df.iloc[:, [2]].droplevel(0, axis=1)
result = df.loc[:, "j"]
tm.assert_frame_equal(result, expected)
# bottom right corner
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
result = df.loc["j"].loc[:, "j"]
tm.assert_frame_equal(result, expected)
# with a tuple
expected = df.iloc[[0, 1]]
result = df.loc[("i", "X")]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_ints(self):
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc[4]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_missing_label_raises(self):
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
def test_loc_multiindex_list_missing_label(self, key, pos):
# GH 27148 - lists with missing labels do not raise:
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
expected = df.iloc[pos]
result = df.loc[key]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_too_many_dims_raises(self):
# GH 14885
s = Series(
range(8),
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
)
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
s.loc["a", "b"]
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
s.loc["a", "d", "g"]
with pytest.raises(IndexingError, match="Too many indexers"):
s.loc["a", "d", "g", "j"]
def test_loc_multiindex_indexer_none(self):
# GH6788
# multi-index indexer is None (meaning take all)
attributes = ["Attribute" + str(i) for i in range(1)]
attribute_values = ["Value" + str(i) for i in range(5)]
index = MultiIndex.from_product([attributes, attribute_values])
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
df = DataFrame(df, columns=index)
result = df[attributes]
tm.assert_frame_equal(result, df)
# GH 7349
# loc with a multi-index seems to be doing fallback
df = DataFrame(
np.arange(12).reshape(-1, 1),
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
)
expected = df.loc[([1, 2],), :]
result = df.loc[[1, 2]]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_incomplete(self):
# GH 7399
# incomplete indexers
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.loc[:, "a":"c"]
result = s.loc[0:4, "a":"c"]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[:4, "a":"c"]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[0:, "a":"c"]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
# GH 7400
# multiindexer gettitem with list of indexers skips wrong element
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
result = s.loc[2:4:2, "a":"c"]
tm.assert_series_equal(result, expected)
def test_get_loc_single_level(self, single_level_multiindex):
single_level = single_level_multiindex
s = Series(np.random.randn(len(single_level)), index=single_level)
for k in single_level.values:
s[k]
def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices
index = MultiIndex.from_tuples(
[t for t in itertools.product([6, 7, 8], ["a", "b"])]
)
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_tuples(
[t for t in itertools.product([10, 20, 30], ["a", "b"])]
)
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)
# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ["a", "b"]
tm.assert_frame_equal(result, expected)
result = df.loc[:, 10]
expected = df[10]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
)
@pytest.mark.parametrize(
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
)
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)
a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
)
keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)
# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys)
)
result = df.loc[indexer, "Data"]
expected = Series(
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"indexer, pos",
[
([], []), # empty ok
(["A"], slice(3)),
(["A", "D"], slice(3)),
(["D", "E"], []), # no values found - fine
(["D"], []), # same, with single item list: GH 27148
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
],
)
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product(
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
)
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
expected = s.iloc[pos]
result = s.loc[indexer]
tm.assert_series_equal(result, expected)
def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[49:51])
result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
# GH 8737
# empty indexer
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
df = df.sort_index(level=0, axis=1)
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
result = df.loc[:, columns_indexer]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
# regression from < 0.14.0
# GH 7914
df = DataFrame(
[[np.mean, np.median], ["mean", "median"]],
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
index=["function", "name"],
)
result = df.loc["function", ("functs", "mean")]
expected = np.mean
assert result == expected
def test_loc_getitem_tuple_plus_slice():
# GH 671
df = DataFrame(
{
"a": np.arange(10),
"b": np.arange(10),
"c": np.random.randn(10),
"d": np.random.randn(10),
}
).set_index(["a", "b"])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
def test_loc_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
with pytest.raises(KeyError, match=r"^3$"):
df.loc[3]
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
# test setup - check key not in dataframe
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
df.loc[("bar", "three"), "B"]
# in theory should be inserting in a sorted space????
df.loc[("bar", "three"), "B"] = 0
expected = 0
result = df.sort_index().loc[("bar", "three"), "B"]
assert result == expected

View File

@@ -0,0 +1,106 @@
import numpy as np
import pytest
import pandas._libs.index as _index
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexBasic:
def test_multiindex_perf_warn(self):
df = DataFrame(
{
"jim": [0, 0, 1, 1],
"joe": ["x", "x", "z", "y"],
"jolie": np.random.rand(4),
}
).set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning, clear=[pd.core.index]):
df.loc[(1, "z")]
df = df.iloc[[2, 1, 3, 0]]
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0,)]
def test_multiindex_contains_dropped(self):
# GH 19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)
# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx
# also applies to strings
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
assert "a" in idx
idx = idx.drop("a")
assert "a" in idx.levels[0]
assert "a" not in idx
@pytest.mark.parametrize(
"data, expected",
[
(MultiIndex.from_product([(), ()]), True),
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
(MultiIndex.from_product([("a", "b"), (1, 2)]), False),
],
)
def test_multiindex_is_homogeneous_type(self, data, expected):
assert data._is_homogeneous_type is expected
def test_indexing_over_hashtable_size_cutoff(self):
n = 10000
old_cutoff = _index._SIZE_CUTOFF
_index._SIZE_CUTOFF = 20000
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
# hai it works!
assert s[("a", 5)] == 5
assert s[("a", 6)] == 6
assert s[("a", 7)] == 7
_index._SIZE_CUTOFF = old_cutoff
def test_multi_nan_indexing(self):
# GH 3588
df = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
result = df.set_index(["a", "b"], drop=False)
expected = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
},
index=[
Index(["R1", "R2", np.nan, "R4"], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(result, expected)
def test_contains(self):
# GH 24570
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
assert tx[0] in idx
assert "element_not_exit" not in idx
assert "0 day 09:30:00" in idx

View File

@@ -0,0 +1,201 @@
from warnings import catch_warnings, simplefilter
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex
from pandas.util import testing as tm
class TestMultiIndexPartial:
def test_getitem_partial_int(self):
# GH 12416
# with single item
l1 = [10, 20]
l2 = ["a", "b"]
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
expected = DataFrame(index=range(2), columns=l2)
result = df[20]
tm.assert_frame_equal(result, expected)
# with list
expected = DataFrame(
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
)
result = df[[20]]
tm.assert_frame_equal(result, expected)
# missing item:
with pytest.raises(KeyError, match="1"):
df[1]
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
df[[1]]
def test_series_slice_partial(self):
pass
def test_xs_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
ymd = multiindex_year_month_day_dataframe_random_data
result = frame.xs("foo")
result2 = frame.loc["foo"]
expected = frame.T["foo"].T
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)
result = ymd.xs((2000, 4))
expected = ymd.loc[2000, 4]
tm.assert_frame_equal(result, expected)
# ex from #1796
index = MultiIndex(
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
codes=[
[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[0, 1, 0, 1, 0, 1, 0, 1],
],
)
df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd"))
result = df.xs(["foo", "one"])
expected = df.loc["foo", "one"]
tm.assert_frame_equal(result, expected)
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
ymd = ymd.T
result = ymd[2000, 2]
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
expected.columns = expected.columns.droplevel(0).droplevel(0)
tm.assert_frame_equal(result, expected)
def test_fancy_slice_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
result = frame.loc["bar":"baz"]
expected = frame[3:7]
tm.assert_frame_equal(result, expected)
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[(2000, 2):(2000, 4)]
lev = ymd.index.codes[1]
expected = ymd[(lev >= 1) & (lev <= 3)]
tm.assert_frame_equal(result, expected)
def test_getitem_partial_column_select(self):
idx = MultiIndex(
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
)
df = DataFrame(np.random.rand(3, 2), index=idx)
result = df.loc[("a", "y"), :]
expected = df.loc[("a", "y")]
tm.assert_frame_equal(result, expected)
result = df.loc[("a", "y"), [1, 0]]
expected = df.loc[("a", "y")][[1, 0]]
tm.assert_frame_equal(result, expected)
with catch_warnings(record=True):
simplefilter("ignore", FutureWarning)
result = df.ix[("a", "y"), [1, 0]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
df.loc[("a", "foo"), :]
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd.copy()
exp = ymd.copy()
df.loc[2000, 4] = 0
exp.loc[2000, 4].values[:] = 0
tm.assert_frame_equal(df, exp)
df["A"].loc[2000, 4] = 1
exp["A"].loc[2000, 4].values[:] = 1
tm.assert_frame_equal(df, exp)
df.loc[2000] = 5
exp.loc[2000].values[:] = 5
tm.assert_frame_equal(df, exp)
# this works...for now
df["A"].iloc[14] = 5
assert df["A"][14] == 5
# ---------------------------------------------------------------------
# AMBIGUOUS CASES!
def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_data):
pytest.skip("skipping for now")
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[2000, 0]
expected = ymd.loc[2000]["A"]
tm.assert_series_equal(result, expected)
# need to put in some work here
# self.ymd.loc[2000, 0] = 0
# assert (self.ymd.loc[2000]['A'] == 0).all()
# Pretty sure the second (and maybe even the first) is already wrong.
with pytest.raises(Exception):
ymd.loc[(2000, 6)]
with pytest.raises(Exception):
ymd.loc[(2000, 6), 0]
# ---------------------------------------------------------------------
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
expected = frame.copy()
result = frame.copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame.copy()
result = frame.copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
def test_loc_getitem_partial_both_axis():
# gh-12660
iterables = [["a", "b"], [2, 1]]
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns)
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
result = df.loc["a", "b"]
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,41 @@
from numpy.random import randn
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexSetOps:
def test_multiindex_symmetric_difference(self):
# GH 13490
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
result = idx ^ idx
assert result.names == idx.names
idx2 = idx.copy().rename(["A", "B"])
result = idx ^ idx2
assert result.names == [None, None]
def test_mixed_depth_insert(self):
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(randn(4, 6), columns=index)
result = df.copy()
expected = df.copy()
result["b"] = [1, 2, 3, 4]
expected["b", "", ""] = [1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
def test_dataframe_insert_column_all_na(self):
# GH #1534
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
s = Series({(1, 1): 1, (1, 2): 2})
df["new"] = s
assert df["new"].isna().all()

View File

@@ -0,0 +1,475 @@
from warnings import catch_warnings, simplefilter
import numpy as np
from numpy.random import randn
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna
import pandas.core.common as com
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
class TestMultiIndexSetItem:
def test_setitem_multiindex(self):
with catch_warnings(record=True):
for index_fn in ("ix", "loc"):
def assert_equal(a, b):
assert a == b
def check(target, indexers, value, compare_fn, expected=None):
fn = getattr(target, index_fn)
fn.__setitem__(indexers, value)
result = fn.__getitem__(indexers)
if expected is None:
expected = value
compare_fn(result, expected)
# GH7190
index = MultiIndex.from_product(
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
)
t, n = 0, 2
df = DataFrame(
np.nan,
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
index=index,
)
check(
target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal
)
df = DataFrame(
-999,
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
index=index,
)
check(
target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal
)
df = DataFrame(
columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index
)
check(
target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal
)
# gh-7218: assigning with 0-dim arrays
df = DataFrame(
-999,
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
index=index,
)
check(
target=df,
indexers=((t, n), "X"),
value=np.array(3),
compare_fn=assert_equal,
expected=3,
)
# GH5206
df = DataFrame(
np.arange(25).reshape(5, 5),
columns="A,B,C,D,E".split(","),
dtype=float,
)
df["F"] = 99
row_selection = df["A"] % 2 == 0
col_selection = ["B", "C"]
with catch_warnings(record=True):
df.ix[row_selection, col_selection] = df["F"]
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
with catch_warnings(record=True):
tm.assert_frame_equal(df.ix[row_selection, col_selection], output)
check(
target=df,
indexers=(row_selection, col_selection),
value=df["F"],
compare_fn=tm.assert_frame_equal,
expected=output,
)
# GH11372
idx = MultiIndex.from_product(
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
)
cols = MultiIndex.from_product(
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
)
df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols)
subidx = MultiIndex.from_tuples(
[("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))]
)
subcols = MultiIndex.from_tuples(
[("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))]
)
vals = DataFrame(
np.random.random((2, 2)), index=subidx, columns=subcols
)
check(
target=df,
indexers=(subidx, subcols),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# set all columns
vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols)
check(
target=df,
indexers=(subidx, slice(None, None, None)),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# identity
copy = df.copy()
check(
target=df,
indexers=(df.index, df.columns),
value=df,
compare_fn=tm.assert_frame_equal,
expected=copy,
)
def test_multiindex_setitem(self):
# GH 3738
# setting with a multi-index right hand side
arrays = [
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
np.array(["one", "two", "one", "one", "two", "one"]),
np.arange(0, 6, 1),
]
df_orig = DataFrame(
np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"]
).sort_index()
expected = df_orig.loc[["bar"]] * 2
df = df_orig.copy()
df.loc[["bar"]] *= 2
tm.assert_frame_equal(df.loc[["bar"]], expected)
# raise because these have differing levels
with pytest.raises(TypeError):
df.loc["bar"] *= 2
# from SO
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
df_orig = DataFrame.from_dict(
{
"price": {
("DE", "Coal", "Stock"): 2,
("DE", "Gas", "Stock"): 4,
("DE", "Elec", "Demand"): 1,
("FR", "Gas", "Stock"): 5,
("FR", "Solar", "SupIm"): 0,
("FR", "Wind", "SupIm"): 0,
}
}
)
df_orig.index = MultiIndex.from_tuples(
df_orig.index, names=["Sit", "Com", "Type"]
)
expected = df_orig.copy()
expected.iloc[[0, 2, 3]] *= 2
idx = pd.IndexSlice
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], :] *= 2
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], "price"] *= 2
tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
# GH3777 part 2
# mixed dtype
df = DataFrame(
np.random.randint(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
)
df["d"] = np.nan
arr = np.array([0.0, 1.0])
with catch_warnings(record=True):
df.ix[4, "d"] = arr
tm.assert_series_equal(df.ix[4, "d"], Series(arr, index=[8, 10], name="d"))
# single dtype
df = DataFrame(
np.random.randint(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
)
with catch_warnings(record=True):
df.ix[4, "c"] = arr
exp = Series(arr, index=[8, 10], name="c", dtype="float64")
tm.assert_series_equal(df.ix[4, "c"], exp)
# scalar ok
with catch_warnings(record=True):
df.ix[4, "c"] = 10
exp = Series(10, index=[8, 10], name="c", dtype="float64")
tm.assert_series_equal(df.ix[4, "c"], exp)
# invalid assignments
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, "c"] = [0, 1, 2, 3]
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, "c"] = [0]
# groupby example
NUM_ROWS = 100
NUM_COLS = 10
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(
np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
dtype=np.int64,
columns=col_names,
)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df["new_col"] = np.nan
f_index = np.arange(5)
def f(name, df2):
return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex(
f_index
)
# TODO(wesm): unused?
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
# we are actually operating on a copy here
# but in this case, that's ok
for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
with catch_warnings(record=True):
df.ix[name, "new_col"] = new_vals
def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
s[2000, 3] = np.nan
assert isna(s.values[42:65]).all()
assert notna(s.values[:42]).all()
assert notna(s.values[65:]).all()
s[2000, 3, 10] = np.nan
assert isna(s[49])
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()
values = df.values
result = df[df > 0]
expected = df.where(df > 0)
tm.assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)
with pytest.raises(TypeError, match="boolean values only"):
df[df * 0] = 2
def test_frame_getitem_setitem_multislice(self):
levels = [["t1", "t2"], ["a", "b", "c"]]
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
result = df.loc[:, "value"]
tm.assert_series_equal(df["value"], result)
with catch_warnings(record=True):
simplefilter("ignore", FutureWarning)
result = df.ix[:, "value"]
tm.assert_series_equal(df["value"], result)
result = df.loc[df.index[1:3], "value"]
tm.assert_series_equal(df["value"][1:3], result)
result = df.loc[:, :]
tm.assert_frame_equal(df, result)
result = df
df.loc[:, "value"] = 10
result["value"] = 10
tm.assert_frame_equal(df, result)
df.loc[:, :] = 10
tm.assert_frame_equal(df, result)
def test_frame_setitem_multi_column(self):
df = DataFrame(randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]])
cp = df.copy()
cp["a"] = cp["b"]
tm.assert_frame_equal(cp["a"], cp["b"])
# set with ndarray
cp = df.copy()
cp["a"] = cp["b"].values
tm.assert_frame_equal(cp["a"], cp["b"])
# ---------------------------------------
# #1803
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
df = DataFrame(index=[1, 3, 5], columns=columns)
# Works, but adds a column instead of updating the two existing ones
df["A"] = 0.0 # Doesn't work
assert (df["A"].values == 0).all()
# it broadcasts
df["B", "1"] = [1, 2, 3]
df["A"] = df["B", "1"]
sliced_a1 = df["A", "1"]
sliced_a2 = df["A", "2"]
sliced_b1 = df["B", "1"]
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
assert sliced_a1.name == ("A", "1")
assert sliced_a2.name == ("A", "2")
assert sliced_b1.name == ("B", "1")
def test_getitem_setitem_tuple_plus_columns(
self, multiindex_year_month_day_dataframe_random_data
):
# GH #1013
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd[:5]
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
tm.assert_series_equal(result, expected)
def test_getitem_setitem_slice_integers(self):
index = MultiIndex(
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
)
frame = DataFrame(
np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"]
)
res = frame.loc[1:2]
exp = frame.reindex(frame.index[2:])
tm.assert_frame_equal(res, exp)
frame.loc[1:2] = 7
assert (frame.loc[1:2] == 7).values.all()
series = Series(np.random.randn(len(index)), index=index)
res = series.loc[1:2]
exp = series.reindex(series.index[2:])
tm.assert_series_equal(res, exp)
series.loc[1:2] = 7
assert (series.loc[1:2] == 7).values.all()
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
dft = frame.T
s = dft["foo", "two"]
dft["foo", "two"] = s > s.median()
tm.assert_series_equal(dft["foo", "two"], s > s.median())
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
reindexed = dft.reindex(columns=[("foo", "two")])
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]
frame.loc[subset] = 99
assert (frame.loc[subset].values == 99).all()
col = frame["B"]
col[subset] = 97
assert (frame.loc[subset, "B"] == 97).all()
def test_nonunique_assignment_1750(self):
df = DataFrame(
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
)
df = df.set_index(["A", "B"])
ix = MultiIndex.from_tuples([(1, 1)])
df.loc[ix, "C"] = "_"
assert (df.xs((1, 1))["C"] == "_").all()
def test_astype_assignment_with_dups(self):
# GH 4686
# assignment with dups that has a dtype change
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
index = df.index.copy()
df["A"] = df["A"].astype(np.float64)
tm.assert_index_equal(df.index, index)
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
df["foo"].values[:] = 0
assert (df["foo"].values == 0).all()
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df["foo"]["one"] = 2
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df["foo"]["one"] = 2
result = df
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,707 @@
from warnings import catch_warnings
import numpy as np
import pytest
from pandas.errors import UnsortedIndexError
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
from pandas.core.indexing import _non_reducing_slice
from pandas.tests.indexing.common import _mklbl
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
class TestMultiIndexSlicers:
def test_per_axis_per_level_getitem(self):
# GH6134
# example test case
ix = MultiIndex.from_product(
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
)
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
]
]
tm.assert_frame_equal(result, expected)
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == "A1" or a == "A2" or a == "A3")
and (c == "C1" or c == "C2" or c == "C3")
]
]
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
tm.assert_frame_equal(result, expected)
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df = df.sort_index(axis=0).sort_index(axis=1)
# identity
result = df.loc[(slice(None), slice(None)), :]
tm.assert_frame_equal(result, df)
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
result = df.loc[:, (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
# index
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), 1), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# columns
result = df.loc[:, (slice(None), ["foo"])]
expected = df.iloc[:, [1, 3]]
tm.assert_frame_equal(result, expected)
# both
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
expected = df.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc["A", "a"]
expected = DataFrame(
dict(bar=[1, 5, 9], foo=[0, 4, 8]),
index=Index([1, 2, 3], name="two"),
columns=Index(["bar", "foo"], name="lvl1"),
)
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), [1, 2]), :]
expected = df.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# multi-level series
s = Series(np.arange(len(ix.to_numpy())), index=ix)
result = s.loc["A1":"A3", :, ["C1", "C3"]]
expected = s.loc[
[
tuple([a, b, c, d])
for a, b, c, d in s.index.values
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
]
]
tm.assert_series_equal(result, expected)
# boolean indexers
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
expected = df.iloc[[2, 3]]
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError):
df.loc[(slice(None), np.array([True, False])), :]
# ambiguous notation
# this is interpreted as slicing on both axes (GH #16396)
result = df.loc[slice(None), [1]]
expected = df.iloc[:, []]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# not lexsorted
assert df.index.lexsort_depth == 2
df = df.sort_index(level=1, axis=0)
assert df.index.lexsort_depth == 0
msg = (
"MultiIndex slicing requires the index to be "
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc[(slice(None), slice("bar")), :]
# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_multiindex_slicers_non_unique(self):
# GH 7106
# non-unique mi index support
df = (
DataFrame(
dict(
A=["foo", "foo", "foo", "foo"],
B=["a", "a", "a", "a"],
C=[1, 2, 1, 3],
D=[1, 2, 3, 4],
)
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3]))
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
tm.assert_frame_equal(result, expected)
# this is equivalent of an xs expression
result = df.xs(1, level=2, drop_level=False)
tm.assert_frame_equal(result, expected)
df = (
DataFrame(
dict(
A=["foo", "foo", "foo", "foo"],
B=["a", "a", "a", "a"],
C=[1, 2, 1, 2],
D=[1, 2, 3, 4],
)
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3]))
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
assert not result.index.is_unique
tm.assert_frame_equal(result, expected)
# GH12896
# numpy-implementation dependent bug
ints = [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
12,
13,
14,
14,
16,
17,
18,
19,
200000,
200000,
]
n = len(ints)
idx = MultiIndex.from_arrays([["a"] * n, ints])
result = Series([1] * n, index=idx)
result = result.sort_index()
result = result.loc[(slice(None), slice(100000))]
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
tm.assert_series_equal(result, expected)
def test_multiindex_slicers_datetimelike(self):
# GH 7429
# buggy/inconsistent behavior when slicing with datetime-like
import datetime
dates = [
datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i)
for i in range(6)
]
freq = [1, 2]
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
df = DataFrame(
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
index=index,
columns=list("ABCD"),
)
# multi-axis slicing
idx = pd.IndexSlice
expected = df.iloc[[0, 2, 4], [0, 1]]
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
slice(1, 1),
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
idx[
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
],
idx[1:1],
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
1,
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
# with strings
result = df.loc[
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
]
tm.assert_frame_equal(result, expected)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
df = DataFrame(
{
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
"DATE": [
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-09-03",
"2013-10-01",
"2013-07-09",
"2013-08-06",
"2013-09-03",
],
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
}
)
df["DATE"] = pd.to_datetime(df["DATE"])
df1 = df.set_index(["A", "B", "DATE"])
df1 = df1.sort_index()
# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice("A1")), :]
expected = df1.iloc[0:10]
tm.assert_frame_equal(result, expected)
# A2 - Get all values from the start to "A2"
result = df1.loc[(slice("A2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None), slice("B1", "B2")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
tm.assert_frame_equal(result, expected)
# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice("A2"), slice("B0")), :]
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
# the As)
result = df1.loc[(slice(None), slice("B2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
tm.assert_frame_equal(result, expected)
# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
def test_per_axis_per_level_doc_examples(self):
# test index maker
idx = pd.IndexSlice
# from indexing.rst / advanced
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (c == "C1" or c == "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx[:, :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
# not sorted
with pytest.raises(UnsortedIndexError):
df.loc["A1", ("a", slice("foo"))]
# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
)
df = df.sort_index(axis=1)
# slicing
df.loc["A1", (slice(None), "foo")]
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
# setitem
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
def test_loc_axis_arguments(self):
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = (
DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
.sort_index()
.sort_index(axis=1)
)
# axis 0
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
expected = df.loc[
[
tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (c == "C1" or c == "C3")
]
]
tm.assert_frame_equal(result, expected)
# axis 1
result = df.loc(axis=1)[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="columns")[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
# invalid axis
with pytest.raises(ValueError):
df.loc(axis=-1)[:, :, ["C1", "C3"]]
with pytest.raises(ValueError):
df.loc(axis=2)[:, :, ["C1", "C3"]]
with pytest.raises(ValueError):
df.loc(axis="foo")[:, :, ["C1", "C3"]]
def test_per_axis_per_level_setitem(self):
# test index maker
idx = pd.IndexSlice
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df_orig = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
# identity
df = df_orig.copy()
df.loc[(slice(None), slice(None)), :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[:, (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
# index
df = df_orig.copy()
df.loc[(slice(None), [1]), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, 1] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
# columns
df = df_orig.copy()
df.loc[:, (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[:, [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# both
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc["A", "a"] = 100
expected = df_orig.copy()
expected.iloc[0:3, 0:2] = 100
tm.assert_frame_equal(df, expected)
# setting with a list-like
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100, 100], [100, 100]], dtype="int64"
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# not enough values
df = df_orig.copy()
with pytest.raises(ValueError):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100], [100, 100]], dtype="int64"
)
with pytest.raises(ValueError):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[100, 100, 100, 100], dtype="int64"
)
# with an alignable rhs
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
(slice(None), 1), (slice(None), ["foo"])
]
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
rhs.loc[:, ("c", "bah")] = 10
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
def test_multiindex_label_slicing_with_negative_step(self):
s = Series(
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
)
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
with catch_warnings(record=True):
tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
assert_slices_equivalent(SLC[::-1], SLC[::-1])
assert_slices_equivalent(SLC["d"::-1], SLC[15::-1])
assert_slices_equivalent(SLC[("d",)::-1], SLC[15::-1])
assert_slices_equivalent(SLC[:"d":-1], SLC[:11:-1])
assert_slices_equivalent(SLC[:("d",):-1], SLC[:11:-1])
assert_slices_equivalent(SLC["d":"b":-1], SLC[15:3:-1])
assert_slices_equivalent(SLC[("d",):"b":-1], SLC[15:3:-1])
assert_slices_equivalent(SLC["d":("b",):-1], SLC[15:3:-1])
assert_slices_equivalent(SLC[("d",):("b",):-1], SLC[15:3:-1])
assert_slices_equivalent(SLC["b":"d":-1], SLC[:0])
assert_slices_equivalent(SLC[("c", 2)::-1], SLC[10::-1])
assert_slices_equivalent(SLC[:("c", 2):-1], SLC[:9:-1])
assert_slices_equivalent(SLC[("e", 0):("c", 2):-1], SLC[16:9:-1])
def test_multiindex_slice_first_level(self):
# GH 12697
freq = ["a", "b", "c", "d"]
idx = MultiIndex.from_product([freq, np.arange(500)])
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
result = df_slice.loc["a"]
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
tm.assert_frame_equal(result, expected)
result = df_slice.loc["d"]
expected = DataFrame(
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
)
tm.assert_frame_equal(result, expected)
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
result = s[5:]
expected = s.reindex(s.index[5:])
tm.assert_series_equal(result, expected)
exp = ymd["A"].copy()
s[5:] = 0
exp.values[5:] = 0
tm.assert_numpy_array_equal(s.values, exp.values)
result = ymd[5:]
expected = ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)
def test_non_reducing_slice_on_multiindex(self):
# GH 19861
dic = {
("a", "d"): [1, 4],
("a", "c"): [2, 3],
("b", "c"): [3, 2],
("b", "d"): [4, 1],
}
df = pd.DataFrame(dic, index=[0, 1])
idx = pd.IndexSlice
slice_ = idx[:, idx["b", "d"]]
tslice_ = _non_reducing_slice(slice_)
result = df.loc[tslice_]
expected = pd.DataFrame({("b", "d"): [4, 1]})
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,97 @@
import numpy as np
from numpy.random import randn
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexSorted:
def test_getitem_multilevel_index_tuple_not_sorted(self):
index_columns = list("abc")
df = DataFrame(
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
)
df = df.set_index(index_columns)
query_index = df.index[:1]
rs = df.loc[query_index, "data"]
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
xp = Series(["x"], index=xp_idx, name="data")
tm.assert_series_equal(rs, xp)
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.sort_index(level=1).T
# buglet with int typechecking
result = df.iloc[:, : np.int32(3)]
expected = df.reindex(columns=df.columns[:3])
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted2(self):
# 13431
df = DataFrame(
{
"col1": ["b", "d", "b", "a"],
"col2": [3, 1, 1, 2],
"data": ["one", "two", "three", "four"],
}
)
df2 = df.set_index(["col1", "col2"])
df2_original = df2.copy()
df2.index.set_levels(["b", "d", "a"], level="col1", inplace=True)
df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True)
assert not df2.index.is_lexsorted()
assert not df2.index.is_monotonic
assert df2_original.index.equals(df2.index)
expected = df2.sort_index()
assert expected.index.is_lexsorted()
assert expected.index.is_monotonic
result = df2.sort_index(level=0)
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T
df["foo", "four"] = "foo"
arrays = [np.array(x) for x in zip(*df.columns.values)]
result = df["foo"]
result2 = df.loc[:, "foo"]
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
df = df.T
result = df.xs("foo")
result2 = df.loc["foo"]
expected = df.reindex(df.index[arrays[0] == "foo"])
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
def test_series_getitem_not_sorted(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
s = Series(randn(8), index=index)
arrays = [np.array(x) for x in zip(*index.values)]
result = s["qux"]
result2 = s.loc["qux"]
expected = s[arrays[0] == "qux"]
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)

View File

@@ -0,0 +1,249 @@
from itertools import product
import numpy as np
import pytest
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
import pandas.core.common as com
from pandas.util import testing as tm
@pytest.fixture
def four_level_index_dataframe():
arr = np.array(
[
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
]
)
index = MultiIndex(
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
names=["one", "two", "three", "four"],
)
return DataFrame(arr, index=index, columns=list("ABCDE"))
@pytest.mark.parametrize(
"key, level, exp_arr, exp_index",
[
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
],
)
def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index):
# see gh-2903
arr = np.random.randn(4, 4)
index = MultiIndex(
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
names=["lvl0", "lvl1"],
)
df = DataFrame(arr, columns=index)
result = df.xs(key, level=level, axis=1)
expected = DataFrame(exp_arr(arr), columns=exp_index)
tm.assert_frame_equal(result, expected)
def test_xs_values(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(("bar", "two")).values
expected = df.values[4]
tm.assert_almost_equal(result, expected)
def test_xs_loc_equality(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(("bar", "two"))
expected = df.loc[("bar", "two")]
tm.assert_series_equal(result, expected)
def test_xs_missing_values_in_index():
# see gh-6574
# missing values in returned index should be preserved
acc = [
("a", "abcde", 1),
("b", "bbcde", 2),
("y", "yzcde", 25),
("z", "xbcde", 24),
("z", None, 26),
("z", "zbcde", 25),
("z", "ybcde", 26),
]
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
expected = DataFrame(
{"cnt": [24, 26, 25, 26]},
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
)
result = df.xs("z", level="a1")
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data):
# see gh-13719
frame = multiindex_dataframe_random_data
df = concat([frame] * 2)
assert df.index.is_unique is False
expected = concat([frame.xs("one", level="second")] * 2)
result = df.xs(key, level=level)
tm.assert_frame_equal(result, expected)
def test_xs_level(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs("two", level="second")
expected = df[df.index.get_level_values(1) == "two"]
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_frame_equal(result, expected)
def test_xs_level_eq_2():
arr = np.random.randn(3, 5)
index = MultiIndex(
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
)
df = DataFrame(arr, index=index)
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
result = df.xs("c", level=2)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer",
[
lambda df: df.xs(("a", 4), level=["one", "four"]),
lambda df: df.xs("a").xs(4, level="four"),
],
)
def test_xs_level_multiple(indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
expected_index = MultiIndex(
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
)
expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE"))
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_xs_setting_with_copy_error(multiindex_dataframe_random_data):
# this is a copy in 0.14
df = multiindex_dataframe_random_data
result = df.xs("two", level="second")
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[:] = 10
def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe):
# this is a copy in 0.14
df = four_level_index_dataframe
result = df.xs(("a", 4), level=["one", "four"])
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[:] = 10
def test_xs_integer_key():
# see gh-2107
dates = range(20111201, 20111205)
ids = "abcde"
index = MultiIndex.from_tuples(
[x for x in product(dates, ids)], names=["date", "secid"]
)
df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"])
result = df.xs(20111201, level="date")
expected = df.loc[20111201, :]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
)
def test_xs_level0(indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
]
expected_index = MultiIndex(
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
codes=[[0, 1], [0, 1], [1, 0]],
names=["two", "three", "four"],
)
expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE"))
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_xs_level_series(multiindex_dataframe_random_data):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
df = multiindex_dataframe_random_data
s = df["A"]
result = s[:, "two"]
expected = df.xs("two", level=1)["A"]
tm.assert_series_equal(result, expected)
def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
df = multiindex_year_month_day_dataframe_random_data
s = df["A"]
result = s[2000, 5]
expected = df.loc[2000, 5]["A"]
tm.assert_series_equal(result, expected)
def test_xs_level_series_slice_not_implemented(
multiindex_year_month_day_dataframe_random_data
):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
# not implementing this for now
df = multiindex_year_month_day_dataframe_random_data
s = df["A"]
msg = r"\(2000, slice\(3, 4, None\)\)"
with pytest.raises(TypeError, match=msg):
s[2000, 3:4]
def test_series_getitem_multiindex_xs():
# GH6258
dt = list(date_range("20130903", periods=3))
idx = MultiIndex.from_product([list("AB"), dt])
s = Series([1, 3, 4, 1, 3, 4], index=idx)
expected = Series([1, 1], index=list("AB"))
result = s.xs("20130903", level=1)
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs_by_label():
# GH5684
idx = MultiIndex.from_tuples(
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
)
s = Series([1, 2, 3, 4], index=idx)
s.index.set_names(["L1", "L2"], inplace=True)
expected = Series([1, 3], index=["a", "b"])
expected.index.set_names(["L1"], inplace=True)
result = s.xs("one", level="L2")
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,254 @@
import numpy as np
import pandas as pd
import pandas.util.testing as tm
class TestIndexingCallable:
def test_frame_loc_callable(self):
# GH 11485
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
# iloc cannot use boolean Series (see GH3635)
# return bool indexer
res = df.loc[lambda x: x.A > 2]
tm.assert_frame_equal(res, df.loc[df.A > 2])
res = df.loc[lambda x: x.A > 2]
tm.assert_frame_equal(res, df.loc[df.A > 2])
res = df.loc[lambda x: x.A > 2,] # noqa: E231
tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231
res = df.loc[lambda x: x.A > 2,] # noqa: E231
tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231
res = df.loc[lambda x: x.B == "b", :]
tm.assert_frame_equal(res, df.loc[df.B == "b", :])
res = df.loc[lambda x: x.B == "b", :]
tm.assert_frame_equal(res, df.loc[df.B == "b", :])
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
res = df.loc[lambda x: x.A > 2, lambda x: "B"]
tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
res = df.loc[lambda x: x.A > 2, lambda x: "B"]
tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
# scalar
res = df.loc[lambda x: 1, lambda x: "A"]
assert res == df.loc[1, "A"]
res = df.loc[lambda x: 1, lambda x: "A"]
assert res == df.loc[1, "A"]
def test_frame_loc_callable_mixture(self):
# GH 11485
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
res = df.loc[lambda x: x.A > 2, ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[lambda x: x.A > 2, ["A", "B"]]
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
res = df.loc[[2, 3], lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
res = df.loc[[2, 3], lambda x: ["A", "B"]]
tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
res = df.loc[3, lambda x: ["A", "B"]]
tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
res = df.loc[3, lambda x: ["A", "B"]]
tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
def test_frame_loc_callable_labels(self):
# GH 11485
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return label
res = df.loc[lambda x: ["A", "C"]]
tm.assert_frame_equal(res, df.loc[["A", "C"]])
res = df.loc[lambda x: ["A", "C"],] # noqa: E231
tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231
res = df.loc[lambda x: ["A", "C"], :]
tm.assert_frame_equal(res, df.loc[["A", "C"], :])
res = df.loc[lambda x: ["A", "C"], lambda x: "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
# mixture
res = df.loc[["A", "C"], lambda x: "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[["A", "C"], lambda x: ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
res = df.loc[lambda x: ["A", "C"], "X"]
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
res = df.loc[lambda x: ["A", "C"], ["X"]]
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
def test_frame_loc_callable_setitem(self):
# GH 11485
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return label
res = df.copy()
res.loc[lambda x: ["A", "C"]] = -20
exp = df.copy()
exp.loc[["A", "C"]] = -20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], :] = 20
exp = df.copy()
exp.loc[["A", "C"], :] = 20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1
exp = df.copy()
exp.loc[["A", "C"], "X"] = -1
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10]
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = [5, 10]
tm.assert_frame_equal(res, exp)
# mixture
res = df.copy()
res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2])
exp = df.copy()
exp.loc[["A", "C"], "X"] = np.array([-1, -2])
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[["A", "C"], lambda x: ["X"]] = 10
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = 10
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], "X"] = -2
exp = df.copy()
exp.loc[["A", "C"], "X"] = -2
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ["A", "C"], ["X"]] = -4
exp = df.copy()
exp.loc[["A", "C"], ["X"]] = -4
tm.assert_frame_equal(res, exp)
def test_frame_iloc_callable(self):
# GH 11485
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return location
res = df.iloc[lambda x: [1, 3]]
tm.assert_frame_equal(res, df.iloc[[1, 3]])
res = df.iloc[lambda x: [1, 3], :]
tm.assert_frame_equal(res, df.iloc[[1, 3], :])
res = df.iloc[lambda x: [1, 3], lambda x: 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[lambda x: [1, 3], lambda x: [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
# mixture
res = df.iloc[[1, 3], lambda x: 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[[1, 3], lambda x: [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
res = df.iloc[lambda x: [1, 3], 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[lambda x: [1, 3], [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
def test_frame_iloc_callable_setitem(self):
# GH 11485
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
# return location
res = df.copy()
res.iloc[lambda x: [1, 3]] = 0
exp = df.copy()
exp.iloc[[1, 3]] = 0
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], :] = -1
exp = df.copy()
exp.iloc[[1, 3], :] = -1
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], lambda x: 0] = 5
exp = df.copy()
exp.iloc[[1, 3], 0] = 5
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
exp = df.copy()
exp.iloc[[1, 3], [0]] = 25
tm.assert_frame_equal(res, exp)
# mixture
res = df.copy()
res.iloc[[1, 3], lambda x: 0] = -3
exp = df.copy()
exp.iloc[[1, 3], 0] = -3
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[[1, 3], lambda x: [0]] = -5
exp = df.copy()
exp.iloc[[1, 3], [0]] = -5
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], 0] = 10
exp = df.copy()
exp.iloc[[1, 3], 0] = 10
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
exp = df.copy()
exp.iloc[[1, 3], [0]] = [-5, -5]
tm.assert_frame_equal(res, exp)

View File

@@ -0,0 +1,760 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
Index,
Interval,
Series,
Timestamp,
)
from pandas.api.types import CategoricalDtype as CDT
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
class TestCategoricalIndex:
def setup_method(self, method):
self.df = DataFrame(
{
"A": np.arange(6, dtype="int64"),
"B": Series(list("aabbca")).astype(CDT(list("cab"))),
}
).set_index("B")
self.df2 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
"B": Series(list("aabbca")).astype(CDT(list("cabe"))),
}
).set_index("B")
self.df3 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
"B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))),
}
).set_index("B")
self.df4 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
"B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
}
).set_index("B")
def test_loc_scalar(self):
result = self.df.loc["a"]
expected = DataFrame(
{"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))}
).set_index("B")
assert_frame_equal(result, expected)
df = self.df.copy()
df.loc["a"] = 20
expected = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20],
"B": (Series(list("aabbca")).astype(CDT(list("cab")))),
}
).set_index("B")
assert_frame_equal(df, expected)
# value not in the categories
with pytest.raises(KeyError, match=r"^'d'$"):
df.loc["d"]
msg = "cannot append a non-category item to a CategoricalIndex"
with pytest.raises(TypeError, match=msg):
df.loc["d"] = 10
msg = (
"cannot insert an item into a CategoricalIndex that is not"
" already an existing category"
)
with pytest.raises(TypeError, match=msg):
df.loc["d", "A"] = 10
with pytest.raises(TypeError, match=msg):
df.loc["d", "C"] = 10
def test_getitem_scalar(self):
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
s = Series([1, 2], index=cats)
expected = s.iloc[0]
result = s[cats[0]]
assert result == expected
def test_slicing_directly(self):
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
sliced = cat[3]
assert sliced == "d"
sliced = cat[3:5]
expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
tm.assert_index_equal(sliced.categories, expected.categories)
def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reversed = cat[::-1]
exp = np.array([4, 3, 2, 1], dtype=np.int64)
tm.assert_numpy_array_equal(reversed.__array__(), exp)
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
result = df.iloc[10]
tm.assert_series_equal(result, expected)
expected = DataFrame(
{"value": np.arange(11, 21).astype("int64")},
index=np.arange(10, 20).astype("int64"),
)
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
result = df.iloc[10:20]
tm.assert_frame_equal(result, expected)
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
result = df.loc[8]
tm.assert_series_equal(result, expected)
def test_slicing_and_getting_ops(self):
# systematically test the slicing operations:
# for all slicing ops:
# - returning a dataframe
# - returning a column
# - returning a row
# - returning a single value
cats = Categorical(
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
)
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 2, 3, 4, 5, 6, 7]
df = DataFrame({"cats": cats, "values": values}, index=idx)
# the expected values
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
idx2 = Index(["j", "k"])
values2 = [3, 4]
# 2:4,: | "j":"k",:
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
# :,"cats" | :,0
exp_col = Series(cats, index=idx, name="cats")
# "j",: | 2,:
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
# "j","cats | 2,0
exp_val = "b"
# iloc
# frame
res_df = df.iloc[2:4, :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.iloc[2, :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
# col
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.iloc[2, 0]
assert res_val == exp_val
# loc
# frame
res_df = df.loc["j":"k", :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.loc["j", :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
# col
res_col = df.loc[:, "cats"]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.loc["j", "cats"]
assert res_val == exp_val
# ix
# frame
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
res_df = df.loc["j":"k", :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.loc["j", :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
# col
res_col = df.loc[:, "cats"]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.loc["j", df.columns[0]]
assert res_val == exp_val
# iat
res_val = df.iat[2, 0]
assert res_val == exp_val
# at
res_val = df.at["j", "cats"]
assert res_val == exp_val
# fancy indexing
exp_fancy = df.iloc[[2]]
res_fancy = df[df["cats"] == "b"]
tm.assert_frame_equal(res_fancy, exp_fancy)
res_fancy = df[df["values"] == 3]
tm.assert_frame_equal(res_fancy, exp_fancy)
# get_value
res_val = df.at["j", "cats"]
assert res_val == exp_val
# i : int, slice, or sequence of integers
res_row = df.iloc[2]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
res_df = df.iloc[slice(2, 4)]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
res_df = df.iloc[[2, 3]]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
res_df = df.iloc[:, slice(0, 2)]
tm.assert_frame_equal(res_df, df)
assert is_categorical_dtype(res_df["cats"])
res_df = df.iloc[:, [0, 1]]
tm.assert_frame_equal(res_df, df)
assert is_categorical_dtype(res_df["cats"])
def test_slicing_doc_examples(self):
# GH 7918
cats = Categorical(
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
)
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 2, 2, 2, 3, 4, 5]
df = DataFrame({"cats": cats, "values": values}, index=idx)
result = df.iloc[2:4, :]
expected = DataFrame(
{
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
"values": [2, 2],
},
index=["j", "k"],
)
tm.assert_frame_equal(result, expected)
result = df.iloc[2:4, :].dtypes
expected = Series(["category", "int64"], ["cats", "values"])
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", "cats"]
expected = Series(
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
index=["h", "i", "j"],
name="cats",
)
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", df.columns[0:1]]
expected = DataFrame(
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
index=["h", "i", "j"],
)
tm.assert_frame_equal(result, expected)
def test_getitem_category_type(self):
# GH 14580
# test iloc() on Series with Categorical data
s = Series([1, 2, 3]).astype("category")
# get slice
result = s.iloc[0:2]
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
# get list of indexes
result = s.iloc[[0, 1]]
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
# get boolean array
result = s.iloc[[True, False, False]]
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
def test_loc_listlike(self):
# list of labels
result = self.df.loc[["c", "a"]]
expected = self.df.iloc[[4, 0, 1, 5]]
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.loc[["a", "b", "e"]]
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
assert_frame_equal(result, expected, check_index_type=True)
# element in the categories but not in the values
with pytest.raises(KeyError, match=r"^'e'$"):
self.df2.loc["e"]
# assign is ok
df = self.df2.copy()
df.loc["e"] = 20
result = df.loc[["a", "b", "e"]]
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
assert_frame_equal(result, expected)
df = self.df2.copy()
result = df.loc[["a", "b", "e"]]
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
assert_frame_equal(result, expected, check_index_type=True)
# not all labels in the categories
with pytest.raises(
KeyError,
match="'a list-indexer must only include values that are in the"
" categories'",
):
self.df2.loc[["a", "d"]]
def test_loc_listlike_dtypes(self):
# GH 11586
# unique categories and codes
index = CategoricalIndex(["a", "b", "c"])
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
# unique slice
res = df.loc[["a", "b"]]
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = "a list-indexer must only include values that are in the categories"
with pytest.raises(KeyError, match=msg):
df.loc[["a", "x"]]
# duplicated categories and codes
index = CategoricalIndex(["a", "b", "a"])
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
# unique slice
res = df.loc[["a", "b"]]
exp = DataFrame(
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp = DataFrame(
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = "a list-indexer must only include values that are in the categories"
with pytest.raises(KeyError, match=msg):
df.loc[["a", "x"]]
# contains unused category
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
res = df.loc[["a", "b"]]
exp = DataFrame(
{"A": [1, 3, 2], "B": [5, 7, 6]},
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
res = df.loc[["a", "e"]]
exp = DataFrame(
{"A": [1, 3, np.nan], "B": [5, 7, np.nan]},
index=CategoricalIndex(["a", "a", "e"], categories=list("abcde")),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp = DataFrame(
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = "a list-indexer must only include values that are in the categories"
with pytest.raises(KeyError, match=msg):
df.loc[["a", "x"]]
def test_get_indexer_array(self):
arr = np.array(
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
dtype=object,
)
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
result = ci.get_indexer(arr)
expected = np.array([0, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self):
# https://github.com/pandas-dev/pandas/issues/19551
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_getitem_with_listlike(self):
# GH 16115
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
expected = DataFrame(
[[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
)
dummies = pd.get_dummies(cats)
result = dummies[[c for c in dummies.columns]]
assert_frame_equal(result, expected)
def test_setitem_listlike(self):
# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(
np.random.randint(0, 5, size=150000).astype(np.int8)
).add_categories([-1000])
indexer = np.array([100000]).astype(np.int64)
c[indexer] = -1000
# we are asserting the code result here
# which maps to the -1000 category
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
def test_ix_categorical_index(self):
# GH 12531
df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ"))
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
assert_series_equal(cdf.loc["A", :], expect)
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
assert_series_equal(cdf.loc[:, "X"], expect)
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
assert_frame_equal(cdf.loc[["A", "B"], :], expect)
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
# non-unique
df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX"))
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
assert_frame_equal(cdf.loc["A", :], expect)
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
assert_frame_equal(cdf.loc[:, "X"], expect)
expect = DataFrame(
df.loc[["A", "B"], :],
columns=cdf.columns,
index=CategoricalIndex(list("AAB")),
)
assert_frame_equal(cdf.loc[["A", "B"], :], expect)
expect = DataFrame(
df.loc[:, ["X", "Y"]],
index=cdf.index,
columns=CategoricalIndex(list("XXY")),
)
assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
def test_read_only_source(self):
# GH 10043
rw_array = np.eye(10)
rw_df = DataFrame(rw_array)
ro_array = np.eye(10)
ro_array.setflags(write=False)
ro_df = DataFrame(ro_array)
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
def test_reindexing(self):
# reindexing
# convert to a regular index
result = self.df2.reindex(["a", "b", "e"])
expected = DataFrame(
{"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["a", "b"])
expected = DataFrame(
{"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["e"])
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["d"])
expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
# since we are actually reindexing with a Categorical
# then return a Categorical
cats = list("cabe")
result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
expected = DataFrame(
{"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(Categorical(["a"], categories=cats))
expected = DataFrame(
{"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["a", "b", "e"])
expected = DataFrame(
{"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["a", "b"])
expected = DataFrame(
{"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(["e"])
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
# give back the type of categorical that we received
result = self.df2.reindex(
Categorical(["a", "d"], categories=cats, ordered=True)
)
expected = DataFrame(
{
"A": [0, 1, 5, np.nan],
"B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
expected = DataFrame(
{"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
).set_index("B")
assert_frame_equal(result, expected, check_index_type=True)
# passed duplicate indexers are not allowed
msg = "cannot reindex with a non-unique indexer"
with pytest.raises(ValueError, match=msg):
self.df2.reindex(["a", "a"])
# args NotImplemented ATM
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
with pytest.raises(NotImplementedError, match=msg.format("method")):
self.df2.reindex(["a"], method="ffill")
with pytest.raises(NotImplementedError, match=msg.format("level")):
self.df2.reindex(["a"], level=1)
with pytest.raises(NotImplementedError, match=msg.format("limit")):
self.df2.reindex(["a"], limit=2)
def test_loc_slice(self):
# slicing
# not implemented ATM
# GH9748
msg = (
"cannot do slice indexing on {klass} with these "
r"indexers \[1\] of {kind}".format(
klass=str(CategoricalIndex), kind=str(int)
)
)
with pytest.raises(TypeError, match=msg):
self.df.loc[1:5]
# result = df.loc[1:5]
# expected = df.iloc[[1,2,3,4]]
# assert_frame_equal(result, expected)
def test_loc_and_at_with_categorical_index(self):
# GH 20629
s = Series([1, 2, 3], index=pd.CategoricalIndex(["A", "B", "C"]))
assert s.loc["A"] == 1
assert s.at["A"] == 1
df = DataFrame(
[[1, 2], [3, 4], [5, 6]], index=pd.CategoricalIndex(["A", "B", "C"])
)
assert df.loc["B", 1] == 4
assert df.at["B", 1] == 4
def test_boolean_selection(self):
df3 = self.df3
df4 = self.df4
result = df3[df3.index == "a"]
expected = df3.iloc[[]]
assert_frame_equal(result, expected)
result = df4[df4.index == "a"]
expected = df4.iloc[[]]
assert_frame_equal(result, expected)
result = df3[df3.index == 1]
expected = df3.iloc[[0, 1, 3]]
assert_frame_equal(result, expected)
result = df4[df4.index == 1]
expected = df4.iloc[[0, 1, 3]]
assert_frame_equal(result, expected)
# since we have an ordered categorical
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=True,
# name='B')
result = df3[df3.index < 2]
expected = df3.iloc[[4]]
assert_frame_equal(result, expected)
result = df3[df3.index > 1]
expected = df3.iloc[[]]
assert_frame_equal(result, expected)
# unordered
# cannot be compared
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=False,
# name='B')
msg = "Unordered Categoricals can only compare equality or not"
with pytest.raises(TypeError, match=msg):
df4[df4.index < 2]
with pytest.raises(TypeError, match=msg):
df4[df4.index > 1]
def test_indexing_with_category(self):
# https://github.com/pandas-dev/pandas/issues/12564
# consistent result if comparing as Dataframe
cat = DataFrame({"A": ["foo", "bar", "baz"]})
exp = DataFrame({"A": [True, False, False]})
res = cat[["A"]] == "foo"
tm.assert_frame_equal(res, exp)
cat["A"] = cat["A"].astype("category")
res = cat[["A"]] == "foo"
tm.assert_frame_equal(res, exp)
def test_map_with_dict_or_series(self):
orig_values = ["a", "B", 1, "a"]
new_values = ["one", 2, 3.0, "one"]
cur_index = pd.CategoricalIndex(orig_values, name="XXX")
expected = pd.CategoricalIndex(
new_values, name="XXX", categories=[3.0, 2, "one"]
)
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
output = cur_index.map(mapper)
# Order of categories in output can be different
tm.assert_index_equal(expected, output)
mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])}
output = cur_index.map(mapper)
# Order of categories in output can be different
tm.assert_index_equal(expected, output)

View File

@@ -0,0 +1,406 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series, Timestamp, date_range, option_context
from pandas.core import common as com
from pandas.util import testing as tm
class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self):
# this is chained assignment, but will 'work'
with option_context("chained_assignment", None):
# #3970
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
# Creates a second float block
df["cc"] = 0.0
# caches a reference to the 'bb' series
df["bb"]
# repr machinery triggers consolidation
repr(df)
# Assignment to wrong series
df["bb"].iloc[0] = 0.17
df._clear_item_cache()
tm.assert_almost_equal(df["bb"][0], 0.17)
def test_setitem_cache_updating(self):
# GH 5424
cont = ["one", "two", "three", "four", "five", "six", "seven"]
for do_ref in [False, False]:
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
# ref the cache
if do_ref:
df.loc[0, "c"]
# set it
df.loc[7, "c"] = 1
assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame(
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
)
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
# loop through df to update out
six = Timestamp("5/7/2014")
eix = Timestamp("5/9/2014")
for ix, row in df.iterrows():
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
# try via a chain indexing
# this actually works
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
for ix, row in df.iterrows():
v = out[row["C"]][six:eix] + row["D"]
out[row["C"]][six:eix] = v
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
for ix, row in df.iterrows():
out.loc[six:eix, row["C"]] += row["D"]
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
class TestChaining:
def test_setitem_chained_setfault(self):
# GH6026
data = ["right", "left", "left", "left", "right", "left", "timeout"]
mdata = ["right", "left", "left", "left", "right", "left", "none"]
df = DataFrame({"response": np.array(data)})
mask = df.response == "timeout"
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
recarray = np.rec.fromarrays([data], names=["response"])
df = DataFrame(recarray)
mask = df.response == "timeout"
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
df = DataFrame({"response": data, "response1": data})
mask = df.response == "timeout"
df.response[mask] = "none"
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
# GH 6056
expected = DataFrame(dict(A=[np.nan, "bar", "bah", "foo", "bar"]))
df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"])))
df["A"].iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"])))
df.A.iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
def test_detect_chained_assignment(self):
pd.set_option("chained_assignment", "raise")
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64")
assert df._is_copy is None
df["A"][0] = -5
df["A"][1] = -6
tm.assert_frame_equal(df, expected)
# test with the chaining
df = DataFrame(
{
"A": Series(range(2), dtype="int64"),
"B": np.array(np.arange(2, 4), dtype=np.float64),
}
)
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
df["A"][0] = -5
with pytest.raises(com.SettingWithCopyError):
df["A"][1] = np.nan
assert df["A"]._is_copy is None
# Using a copy (the chain), fails
df = DataFrame(
{
"A": Series(range(2), dtype="int64"),
"B": np.array(np.arange(2, 4), dtype=np.float64),
}
)
with pytest.raises(com.SettingWithCopyError):
df.loc[0]["A"] = -5
# Doc example
df = DataFrame(
{
"a": ["one", "one", "two", "three", "two", "one", "six"],
"c": Series(range(7), dtype="int64"),
}
)
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
indexer = df.a.str.startswith("o")
df[indexer]["c"] = 42
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
with pytest.raises(com.SettingWithCopyError):
df["A"][0] = 111
with pytest.raises(com.SettingWithCopyError):
df.loc[0]["A"] = 111
df.loc[0, "A"] = 111
tm.assert_frame_equal(df, expected)
# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df._is_copy is None
with tm.ensure_clean("__tmp__pickle") as path:
df.to_pickle(path)
df2 = pd.read_pickle(path)
df2["B"] = df2["A"]
df2["B"] = df2["A"]
# gh-5597: a spurious raise as we are setting the entire column here
from string import ascii_letters as letters
def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()
df.append([letters[idx[0] : idx[1]]])
return DataFrame(df, columns=["letters"])
df = random_text(100000)
# Always a copy
x = df.iloc[[0, 1, 2]]
assert x._is_copy is not None
x = df.iloc[[0, 1, 2, 4]]
assert x._is_copy is not None
# Explicitly copy
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer].copy()
assert df._is_copy is None
df["letters"] = df["letters"].apply(str.lower)
# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df["letters"] = df["letters"].apply(str.lower)
# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df.loc[:, "letters"] = df["letters"].apply(str.lower)
# Should be ok even though it's a copy!
assert df._is_copy is None
df["letters"] = df["letters"].apply(str.lower)
assert df._is_copy is None
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
# an identical take, so no copy
df = DataFrame({"a": [1]}).dropna()
assert df._is_copy is None
df["a"] += 1
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0].sort_values()
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
tm.assert_series_equal(s, df[0].sort_values())
# see gh-6025: false positives
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
str(df)
df["column1"] = df["column1"] + "b"
str(df)
df = df[df["column2"] != 8]
str(df)
df["column1"] = df["column1"] + "c"
str(df)
# from SO:
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
df = DataFrame(np.arange(0, 9), columns=["count"])
df["group"] = "b"
with pytest.raises(com.SettingWithCopyError):
df.iloc[0:5]["group"] = "a"
# Mixed type setting but same dtype & changing dtype
df = DataFrame(
dict(
A=date_range("20130101", periods=5),
B=np.random.randn(5),
C=np.arange(5, dtype="int64"),
D=list("abcde"),
)
)
with pytest.raises(com.SettingWithCopyError):
df.loc[2]["D"] = "foo"
with pytest.raises(com.SettingWithCopyError):
df.loc[2]["C"] = "foo"
with pytest.raises(com.SettingWithCopyError):
df["C"][2] = "foo"
def test_setting_with_copy_bug(self):
# operating on a copy
df = DataFrame(
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
)
mask = pd.isna(df.c)
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df[["c"]][mask] = df[["b"]][mask]
# invalid warning as we are returning a new object
# GH 8730
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
df2 = df1[["x"]]
# this should not raise
df2["y"] = ["g", "h", "i"]
def test_detect_chained_assignment_warnings(self):
with option_context("chained_assignment", "warn"):
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.loc[0]["A"] = 111
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
# xref gh-13017.
with option_context("chained_assignment", "warn"):
df = pd.DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
)
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.c.loc[df.c > 0] = None
expected = pd.DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
)
tm.assert_frame_equal(df, expected)
def test_chained_getitem_with_lists(self):
# GH6394
# Regression in chained getitem indexing with embedded list-like from
# 0.12
def check(result, expected):
tm.assert_numpy_array_equal(result, expected)
assert isinstance(result, np.ndarray)
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
expected = df["A"].iloc[2]
result = df.loc[2, "A"]
check(result, expected)
result2 = df.iloc[2]["A"]
check(result2, expected)
result3 = df["A"].loc[2]
check(result3, expected)
result4 = df["A"].iloc[2]
check(result4, expected)
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_cache_updating(self):
# GH 4939, make sure to update the cache on setitem
df = tm.makeDataFrame()
df["A"] # cache series
df.ix["Hello Friend"] = df.ix[0]
assert "Hello Friend" in df["A"].index
assert "Hello Friend" in df["B"].index
# 10264
df = DataFrame(
np.zeros((5, 5), dtype="int64"),
columns=["a", "b", "c", "d", "e"],
index=range(5),
)
df["f"] = 0
df.f.values[3] = 1
# TODO(wesm): unused?
# y = df.iloc[np.arange(2, len(df))]
df.f.values[3] = 2
expected = DataFrame(
np.zeros((5, 6), dtype="int64"),
columns=["a", "b", "c", "d", "e", "f"],
index=range(5),
)
expected.at[3, "f"] = 2
tm.assert_frame_equal(df, expected)
expected = Series([0, 0, 0, 2, 0], name="f")
tm.assert_series_equal(df.f, expected)
def test_deprecate_is_copy(self):
# GH18801
df = DataFrame({"A": [1, 2, 3]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# getter
df.is_copy
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# setter
df.is_copy = "test deprecated is_copy"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,358 @@
from datetime import datetime, timedelta
from dateutil import tz
import numpy as np
import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
from pandas.util import testing as tm
class TestDatetimeIndex:
def test_setitem_with_datetime_tz(self):
# 16889
# support .loc with alignment and tz-aware DatetimeIndex
mask = np.array([True, False, True, False])
idx = date_range("20010101", periods=4, tz="UTC")
df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
result = df.copy()
result.loc[mask, :] = df.loc[mask, :]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[mask] = df.loc[mask]
tm.assert_frame_equal(result, df)
idx = date_range("20010101", periods=4)
df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
result = df.copy()
result.loc[mask, :] = df.loc[mask, :]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[mask] = df.loc[mask]
tm.assert_frame_equal(result, df)
def test_indexing_with_datetime_tz(self):
# GH#8260
# support datetime64 with tz
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
dr = date_range("20130110", periods=3)
df = DataFrame({"A": idx, "B": dr})
df["C"] = idx
df.iloc[1, 1] = pd.NaT
df.iloc[1, 2] = pd.NaT
# indexing
result = df.iloc[1]
expected = Series(
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
index=list("ABC"),
dtype="object",
name=1,
)
tm.assert_series_equal(result, expected)
result = df.loc[1]
expected = Series(
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
index=list("ABC"),
dtype="object",
name=1,
)
tm.assert_series_equal(result, expected)
# indexing - fast_xs
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
result = df.iloc[5]
expected = Series(
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5
)
tm.assert_series_equal(result, expected)
result = df.loc[5]
tm.assert_series_equal(result, expected)
# indexing - boolean
result = df[df.a > df.a[3]]
expected = df.iloc[4:]
tm.assert_frame_equal(result, expected)
# indexing - setting an element
df = DataFrame(
data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]),
columns=["time"],
)
df["new_col"] = ["new", "old"]
df.time = df.set_index("time").index.tz_localize("UTC")
v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific")
# trying to set a single element on a part of a different timezone
# this converts to object
df2 = df.copy()
df2.loc[df2.new_col == "new", "time"] = v
expected = Series([v[0], df.loc[1, "time"]], name="time")
tm.assert_series_equal(df2.time, expected)
v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s")
df.loc[df.new_col == "new", "time"] = v
tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v)
def test_consistency_with_tz_aware_scalar(self):
# xef gh-12938
# various ways of indexing the same tz-aware scalar
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
df = pd.concat([df, df]).reset_index(drop=True)
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
result = df[0][0]
assert result == expected
result = df.iloc[0, 0]
assert result == expected
result = df.loc[0, 0]
assert result == expected
result = df.iat[0, 0]
assert result == expected
result = df.at[0, 0]
assert result == expected
result = df[0].loc[0]
assert result == expected
result = df[0].at[0]
assert result == expected
def test_indexing_with_datetimeindex_tz(self):
# GH 12050
# indexing on a series with a datetimeindex with tz
index = date_range("2015-01-01", periods=2, tz="utc")
ser = Series(range(2), index=index, dtype="int64")
# list-like indexing
for sel in (index, list(index)):
# getitem
tm.assert_series_equal(ser[sel], ser)
# setitem
result = ser.copy()
result[sel] = 1
expected = Series(1, index=index)
tm.assert_series_equal(result, expected)
# .loc getitem
tm.assert_series_equal(ser.loc[sel], ser)
# .loc setitem
result = ser.copy()
result.loc[sel] = 1
expected = Series(1, index=index)
tm.assert_series_equal(result, expected)
# single element indexing
# getitem
assert ser[index[1]] == 1
# setitem
result = ser.copy()
result[index[1]] = 5
expected = Series([0, 5], index=index)
tm.assert_series_equal(result, expected)
# .loc getitem
assert ser.loc[index[1]] == 1
# .loc setitem
result = ser.copy()
result.loc[index[1]] = 5
expected = Series([0, 5], index=index)
tm.assert_series_equal(result, expected)
def test_partial_setting_with_datetimelike_dtype(self):
# GH9478
# a datetimeindex alignment issue with partial setting
df = DataFrame(
np.arange(6.0).reshape(3, 2),
columns=list("AB"),
index=date_range("1/1/2000", periods=3, freq="1H"),
)
expected = df.copy()
expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT]
mask = df.A < 1
df.loc[mask, "C"] = df.loc[mask].index
tm.assert_frame_equal(df, expected)
def test_loc_setitem_datetime(self):
# GH 9516
dt1 = Timestamp("20130101 09:00:00")
dt2 = Timestamp("20130101 10:00:00")
for conv in [
lambda x: x,
lambda x: x.to_datetime64(),
lambda x: x.to_pydatetime(),
lambda x: np.datetime64(x),
]:
df = DataFrame()
df.loc[conv(dt1), "one"] = 100
df.loc[conv(dt2), "one"] = 200
expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
tm.assert_frame_equal(df, expected)
def test_series_partial_set_datetime(self):
# GH 11497
idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx")
ser = Series([0.1, 0.2], index=idx, name="s")
result = ser.loc[[Timestamp("2011-01-01"), Timestamp("2011-01-02")]]
exp = Series([0.1, 0.2], index=idx, name="s")
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [
Timestamp("2011-01-02"),
Timestamp("2011-01-02"),
Timestamp("2011-01-01"),
]
exp = Series(
[0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
keys = [
Timestamp("2011-01-03"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
def test_series_partial_set_period(self):
# GH 11497
idx = pd.period_range("2011-01-01", "2011-01-02", freq="D", name="idx")
ser = Series([0.1, 0.2], index=idx, name="s")
result = ser.loc[
[pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-02", freq="D")]
]
exp = Series([0.1, 0.2], index=idx, name="s")
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-01", freq="D"),
]
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s")
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
keys = [
pd.Period("2011-01-03", freq="D"),
pd.Period("2011-01-02", freq="D"),
pd.Period("2011-01-03", freq="D"),
]
exp = Series(
[np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[keys]
tm.assert_series_equal(result, exp)
def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
data = ["2016-06-28 08:30:00.123456789"]
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
df = DataFrame({"a": [10]}, index=index)
result = df.loc[df.index[0]]
expected = Series(10, index=["a"], name=df.index[0])
tm.assert_series_equal(result, expected)
result = df.copy()
result.loc[df.index[0], "a"] = -1
expected = DataFrame(-1, index=index, columns=["a"])
tm.assert_frame_equal(result, expected)
def test_loc_getitem_across_dst(self):
# GH 21846
idx = pd.date_range(
"2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
)
series2 = pd.Series([0, 1, 2, 3, 4], index=idx)
t_1 = pd.Timestamp(
"2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min"
)
t_2 = pd.Timestamp(
"2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min"
)
result = series2.loc[t_1:t_2]
expected = pd.Series([2, 3], index=idx[2:4])
tm.assert_series_equal(result, expected)
result = series2[t_1]
expected = 2
assert result == expected
def test_loc_incremental_setitem_with_dst(self):
# GH 20724
base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
result = pd.Series([0], index=[idxs[0]])
for ts in idxs:
result.loc[ts] = 1
expected = pd.Series(1, index=idxs)
tm.assert_series_equal(result, expected)
def test_loc_setitem_with_existing_dst(self):
# GH 18308
start = pd.Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
end = pd.Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
ts = pd.Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
idx = pd.date_range(start, end, closed="left", freq="H")
result = pd.DataFrame(index=idx, columns=["value"])
result.loc[ts, "value"] = 12
expected = pd.DataFrame(
[np.nan] * len(idx) + [12],
index=idx.append(pd.DatetimeIndex([ts])),
columns=["value"],
dtype=object,
)
tm.assert_frame_equal(result, expected)
def test_loc_str_slicing(self):
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = ix.to_series()
result = ser.loc[:"2017-12"]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)
def test_loc_label_slicing(self):
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,819 @@
""" test positional based indexing with iloc """
from warnings import catch_warnings, filterwarnings, simplefilter
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series, concat, date_range, isna
from pandas.api.types import is_scalar
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm
class TestiLoc(Base):
def test_iloc_exceeds_bounds(self):
# GH6296
# iloc should allow indexers that exceed the bounds
df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE"))
# lists of positions should raise IndexError!
msg = "positional indexers are out-of-bounds"
with pytest.raises(IndexError, match=msg):
df.iloc[:, [0, 1, 2, 3, 4, 5]]
with pytest.raises(IndexError, match=msg):
df.iloc[[1, 30]]
with pytest.raises(IndexError, match=msg):
df.iloc[[1, -30]]
with pytest.raises(IndexError, match=msg):
df.iloc[[100]]
s = df["A"]
with pytest.raises(IndexError, match=msg):
s.iloc[[100]]
with pytest.raises(IndexError, match=msg):
s.iloc[[-100]]
# still raise on a single indexer
msg = "single positional indexer is out-of-bounds"
with pytest.raises(IndexError, match=msg):
df.iloc[30]
with pytest.raises(IndexError, match=msg):
df.iloc[-30]
# GH10779
# single positive/negative indexer exceeding Series bounds should raise
# an IndexError
with pytest.raises(IndexError, match=msg):
s.iloc[30]
with pytest.raises(IndexError, match=msg):
s.iloc[-30]
# slices are ok
result = df.iloc[:, 4:10] # 0 < start < len < stop
expected = df.iloc[:, 4:]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -4:-10] # stop < 0 < start < len
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
expected = df.iloc[:, :4:-1]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
expected = df.iloc[:, 4::-1]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -10:4] # start < 0 < stop < len
expected = df.iloc[:, :4]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:4] # 0 < stop < len < start
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:11] # 0 < len < start < stop
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
# slice bounds exceeding is ok
result = s.iloc[18:30]
expected = s.iloc[18:]
tm.assert_series_equal(result, expected)
result = s.iloc[30:]
expected = s.iloc[:0]
tm.assert_series_equal(result, expected)
result = s.iloc[30::-1]
expected = s.iloc[::-1]
tm.assert_series_equal(result, expected)
# doc example
def check(result, expected):
str(result)
result.dtypes
tm.assert_frame_equal(result, expected)
dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
check(dfl.iloc[4:6], dfl.iloc[[4]])
msg = "positional indexers are out-of-bounds"
with pytest.raises(IndexError, match=msg):
dfl.iloc[[4, 5, 6]]
msg = "single positional indexer is out-of-bounds"
with pytest.raises(IndexError, match=msg):
dfl.iloc[:, 4]
@pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))])
@pytest.mark.parametrize(
"index_vals,column_vals",
[
([slice(None), ["A", "D"]]),
(["1", "2"], slice(None)),
([pd.datetime(2019, 1, 1)], slice(None)),
],
)
def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals):
# GH 25753
df = DataFrame(
np.random.randn(len(index), len(columns)), index=index, columns=columns
)
msg = ".iloc requires numeric indexers, got"
with pytest.raises(IndexError, match=msg):
df.iloc[index_vals, column_vals]
def test_iloc_getitem_int(self):
# integer
self.check_result(
"integer", "iloc", 2, "ix", {0: 4, 1: 6, 2: 8}, typs=["ints", "uints"]
)
self.check_result(
"integer",
"iloc",
2,
"indexer",
2,
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
def test_iloc_getitem_neg_int(self):
# neg integer
self.check_result(
"neg int", "iloc", -1, "ix", {0: 6, 1: 9, 2: 12}, typs=["ints", "uints"]
)
self.check_result(
"neg int",
"iloc",
-1,
"indexer",
-1,
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
@pytest.mark.parametrize("dims", [1, 2])
def test_iloc_getitem_invalid_scalar(self, dims):
# GH 21982
if dims == 1:
s = Series(np.arange(10))
else:
s = DataFrame(np.arange(100).reshape(10, 10))
with pytest.raises(TypeError, match="Cannot index by location index"):
s.iloc["a"]
def test_iloc_array_not_mutating_negative_indices(self):
# GH 21867
array_with_neg_numbers = np.array([1, 2, -1])
array_copy = array_with_neg_numbers.copy()
df = pd.DataFrame(
{"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]},
index=[1, 2, 3],
)
df.iloc[array_with_neg_numbers]
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
df.iloc[:, array_with_neg_numbers]
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
def test_iloc_getitem_list_int(self):
# list of ints
self.check_result(
"list int",
"iloc",
[0, 1, 2],
"ix",
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
typs=["ints", "uints"],
)
self.check_result(
"list int",
"iloc",
[2],
"ix",
{0: [4], 1: [6], 2: [8]},
typs=["ints", "uints"],
)
self.check_result(
"list int",
"iloc",
[0, 1, 2],
"indexer",
[0, 1, 2],
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
# array of ints (GH5006), make sure that a single indexer is returning
# the correct type
self.check_result(
"array int",
"iloc",
np.array([0, 1, 2]),
"ix",
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
typs=["ints", "uints"],
)
self.check_result(
"array int",
"iloc",
np.array([2]),
"ix",
{0: [4], 1: [6], 2: [8]},
typs=["ints", "uints"],
)
self.check_result(
"array int",
"iloc",
np.array([0, 1, 2]),
"indexer",
[0, 1, 2],
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
def test_iloc_getitem_neg_int_can_reach_first_index(self):
# GH10547 and GH10779
# negative integers should be able to reach index 0
df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]})
s = df["A"]
expected = df.iloc[0]
result = df.iloc[-3]
tm.assert_series_equal(result, expected)
expected = df.iloc[[0]]
result = df.iloc[[-3]]
tm.assert_frame_equal(result, expected)
expected = s.iloc[0]
result = s.iloc[-3]
assert result == expected
expected = s.iloc[[0]]
result = s.iloc[[-3]]
tm.assert_series_equal(result, expected)
# check the length 1 Series case highlighted in GH10547
expected = Series(["a"], index=["A"])
result = expected.iloc[[-1]]
tm.assert_series_equal(result, expected)
def test_iloc_getitem_dups(self):
self.check_result(
"list int (dups)",
"iloc",
[0, 1, 1, 3],
"ix",
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
objs=["series", "frame"],
typs=["ints", "uints"],
)
# GH 6766
df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
df = concat([df1, df2], axis=1)
# cross-sectional indexing
result = df.iloc[0, 0]
assert isna(result)
result = df.iloc[0, :]
expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0)
tm.assert_series_equal(result, expected)
def test_iloc_getitem_array(self):
# array like
s = Series(index=range(1, 4))
self.check_result(
"array like",
"iloc",
s.index,
"ix",
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
typs=["ints", "uints"],
)
def test_iloc_getitem_bool(self):
# boolean indexers
b = [True, False, True, False]
self.check_result("bool", "iloc", b, "ix", b, typs=["ints", "uints"])
self.check_result(
"bool",
"iloc",
b,
"ix",
b,
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
@pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
def test_iloc_getitem_bool_diff_len(self, index):
# GH26658
s = Series([1, 2, 3])
with pytest.raises(
IndexError,
match=("Item wrong length {} instead of {}.".format(len(index), len(s))),
):
_ = s.iloc[index]
def test_iloc_getitem_slice(self):
# slices
self.check_result(
"slice",
"iloc",
slice(1, 3),
"ix",
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
typs=["ints", "uints"],
)
self.check_result(
"slice",
"iloc",
slice(1, 3),
"indexer",
slice(1, 3),
typs=["labels", "mixed", "ts", "floats", "empty"],
fails=IndexError,
)
def test_iloc_getitem_slice_dups(self):
df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])
df2 = DataFrame(
np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
)
# axis=1
df = concat([df1, df2], axis=1)
tm.assert_frame_equal(df.iloc[:, :4], df1)
tm.assert_frame_equal(df.iloc[:, 4:], df2)
df = concat([df2, df1], axis=1)
tm.assert_frame_equal(df.iloc[:, :2], df2)
tm.assert_frame_equal(df.iloc[:, 2:], df1)
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
# axis=0
df = concat([df, df], axis=0)
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
tm.assert_frame_equal(df.iloc[10:, :2], df2)
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
def test_iloc_setitem(self):
df = self.frame_ints
df.iloc[1, 1] = 1
result = df.iloc[1, 1]
assert result == 1
df.iloc[:, 2:3] = 0
expected = df.iloc[:, 2:3]
result = df.iloc[:, 2:3]
tm.assert_frame_equal(result, expected)
# GH5771
s = Series(0, index=[4, 5, 6])
s.iloc[1:2] += 1
expected = Series([0, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)
def test_iloc_setitem_list(self):
# setitem with an iloc list
df = DataFrame(
np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
)
df.iloc[[0, 1], [1, 2]]
df.iloc[[0, 1], [1, 2]] += 100
expected = DataFrame(
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
index=["A", "B", "C"],
columns=["A", "B", "C"],
)
tm.assert_frame_equal(df, expected)
def test_iloc_setitem_pandas_object(self):
# GH 17193
s_orig = Series([0, 1, 2, 3])
expected = Series([0, -1, -2, 3])
s = s_orig.copy()
s.iloc[Series([1, 2])] = [-1, -2]
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.iloc[pd.Index([1, 2])] = [-1, -2]
tm.assert_series_equal(s, expected)
def test_iloc_setitem_dups(self):
# GH 6766
# iloc with a mask aligning from another iloc
df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
df = concat([df1, df2], axis=1)
expected = df.fillna(3)
expected["A"] = expected["A"].astype("float64")
inds = np.isnan(df.iloc[:, 0])
mask = inds[inds].index
df.iloc[mask, 0] = df.iloc[mask, 2]
tm.assert_frame_equal(df, expected)
# del a dup column across blocks
expected = DataFrame({0: [1, 2], 1: [3, 4]})
expected.columns = ["B", "B"]
del df["A"]
tm.assert_frame_equal(df, expected)
# assign back to self
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
tm.assert_frame_equal(df, expected)
# reversed x 2
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
tm.assert_frame_equal(df, expected)
def test_iloc_getitem_frame(self):
df = DataFrame(
np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2)
)
result = df.iloc[2]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
exp = df.ix[4]
tm.assert_series_equal(result, exp)
result = df.iloc[2, 2]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
exp = df.ix[4, 4]
assert result == exp
# slice
result = df.iloc[4:8]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[8:14]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 2:3]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[:, 4:5]
tm.assert_frame_equal(result, expected)
# list of integers
result = df.iloc[[0, 1, 3]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[[0, 2, 6]]
tm.assert_frame_equal(result, expected)
result = df.iloc[[0, 1, 3], [0, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[[0, 2, 6], [0, 2]]
tm.assert_frame_equal(result, expected)
# neg indices
result = df.iloc[[-1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[[18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
# dups indices
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[[18, 18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
# with index-like
s = Series(index=range(1, 5))
result = df.iloc[s.index]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", FutureWarning)
expected = df.ix[[2, 4, 6, 8]]
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labelled_frame(self):
# try with labelled frame
df = DataFrame(
np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
)
result = df.iloc[1, 1]
exp = df.loc["b", "B"]
assert result == exp
result = df.iloc[:, 2:3]
expected = df.loc[:, ["C"]]
tm.assert_frame_equal(result, expected)
# negative indexing
result = df.iloc[-1, -1]
exp = df.loc["j", "D"]
assert result == exp
# out-of-bounds exception
msg = "single positional indexer is out-of-bounds"
with pytest.raises(IndexError, match=msg):
df.iloc[10, 5]
# trying to use a label
msg = (
r"Location based indexing can only have \[integer, integer"
r" slice \(START point is INCLUDED, END point is EXCLUDED\),"
r" listlike of integers, boolean array\] types"
)
with pytest.raises(ValueError, match=msg):
df.iloc["j", "D"]
def test_iloc_getitem_doc_issue(self):
# multi axis slicing issue with single block
# surfaced in GH 6059
arr = np.random.randn(6, 4)
index = date_range("20130101", periods=6)
columns = list("ABCD")
df = DataFrame(arr, index=index, columns=columns)
# defines ref_locs
df.describe()
result = df.iloc[3:5, 0:2]
str(result)
result.dtypes
expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2])
tm.assert_frame_equal(result, expected)
# for dups
df.columns = list("aaaa")
result = df.iloc[3:5, 0:2]
str(result)
result.dtypes
expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa"))
tm.assert_frame_equal(result, expected)
# related
arr = np.random.randn(6, 4)
index = list(range(0, 12, 2))
columns = list(range(0, 8, 2))
df = DataFrame(arr, index=index, columns=columns)
df._data.blocks[0].mgr_locs
result = df.iloc[1:5, 2:4]
str(result)
result.dtypes
expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
tm.assert_frame_equal(result, expected)
def test_iloc_setitem_series(self):
df = DataFrame(
np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
)
df.iloc[1, 1] = 1
result = df.iloc[1, 1]
assert result == 1
df.iloc[:, 2:3] = 0
expected = df.iloc[:, 2:3]
result = df.iloc[:, 2:3]
tm.assert_frame_equal(result, expected)
s = Series(np.random.randn(10), index=range(0, 20, 2))
s.iloc[1] = 1
result = s.iloc[1]
assert result == 1
s.iloc[:4] = 0
expected = s.iloc[:4]
result = s.iloc[:4]
tm.assert_series_equal(result, expected)
s = Series([-1] * 6)
s.iloc[0::2] = [0, 2, 4]
s.iloc[1::2] = [1, 3, 5]
result = s
expected = Series([0, 1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)
def test_iloc_setitem_list_of_lists(self):
# GH 7551
# list-of-list is set incorrectly in mixed vs. single dtyped frames
df = DataFrame(
dict(A=np.arange(5, dtype="int64"), B=np.arange(5, 10, dtype="int64"))
)
df.iloc[2:4] = [[10, 11], [12, 13]]
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
tm.assert_frame_equal(df, expected)
df = DataFrame(dict(A=list("abcde"), B=np.arange(5, 10, dtype="int64")))
df.iloc[2:4] = [["x", 11], ["y", 13]]
expected = DataFrame(dict(A=["a", "b", "x", "y", "e"], B=[5, 6, 11, 13, 9]))
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
def test_iloc_setitem_with_scalar_index(self, indexer, value):
# GH #19474
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
# elementwisely, not using "setter('A', ['Z'])".
df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
df.iloc[0, indexer] = value
result = df.iloc[0, 0]
assert is_scalar(result) and result == "Z"
def test_iloc_mask(self):
# GH 3631, iloc with a mask (of a series) should raise
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
mask = df.a % 2 == 0
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
with pytest.raises(ValueError, match=msg):
df.iloc[mask]
mask.index = range(len(mask))
msg = "iLocation based boolean indexing on an integer type is not available"
with pytest.raises(NotImplementedError, match=msg):
df.iloc[mask]
# ndarray ok
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
tm.assert_frame_equal(result, df)
# the possibilities
locs = np.arange(4)
nums = 2 ** locs
reps = [bin(num) for num in nums]
df = DataFrame({"locs": locs, "nums": nums}, reps)
expected = {
(None, ""): "0b1100",
(None, ".loc"): "0b1100",
(None, ".iloc"): "0b1100",
("index", ""): "0b11",
("index", ".loc"): "0b11",
("index", ".iloc"): (
"iLocation based boolean indexing cannot use an indexable as a mask"
),
("locs", ""): "Unalignable boolean Series provided as indexer "
"(index of the boolean Series and of the indexed "
"object do not match).",
("locs", ".loc"): "Unalignable boolean Series provided as indexer "
"(index of the boolean Series and of the "
"indexed object do not match).",
("locs", ".iloc"): (
"iLocation based boolean indexing on an "
"integer type is not available"
),
}
# UserWarnings from reindex of a boolean mask
with catch_warnings(record=True):
simplefilter("ignore", UserWarning)
result = dict()
for idx in [None, "index", "locs"]:
mask = (df.nums > 2).values
if idx:
mask = Series(mask, list(reversed(getattr(df, idx))))
for method in ["", ".loc", ".iloc"]:
try:
if method:
accessor = getattr(df, method[1:])
else:
accessor = df
ans = str(bin(accessor[mask]["nums"].sum()))
except Exception as e:
ans = str(e)
key = tuple([idx, method])
r = expected.get(key)
if r != ans:
raise AssertionError(
"[{key}] does not match [{ans}], received [{r}]".format(
key=key, ans=ans, r=r
)
)
def test_iloc_non_unique_indexing(self):
# GH 4017, non-unique indexing (on the axis)
df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000})
idx = np.arange(30) * 99
expected = df.iloc[idx]
df3 = concat([df, 2 * df, 3 * df])
result = df3.iloc[idx]
tm.assert_frame_equal(result, expected)
df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
df2 = concat([df2, 2 * df2, 3 * df2])
sidx = df2.index.to_series()
expected = df2.iloc[idx[idx <= sidx.max()]]
new_list = []
for r, s in expected.iterrows():
new_list.append(s)
new_list.append(s * 2)
new_list.append(s * 3)
expected = DataFrame(new_list)
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df2.loc[idx]
tm.assert_frame_equal(result, expected, check_index_type=False)
def test_iloc_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(
df.iloc[:, []],
df.iloc[:, :0],
check_index_type=True,
check_column_type=True,
)
# horizontal empty
tm.assert_frame_equal(
df.iloc[[], :],
df.iloc[:0, :],
check_index_type=True,
check_column_type=True,
)
# horizontal empty
tm.assert_frame_equal(
df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
)
def test_identity_slice_returns_new_object(self):
# GH13873
original_df = DataFrame({"a": [1, 2, 3]})
sliced_df = original_df.iloc[:]
assert sliced_df is not original_df
# should be a shallow copy
original_df["a"] = [4, 4, 4]
assert (sliced_df["a"] == 4).all()
original_series = Series([1, 2, 3, 4, 5, 6])
sliced_series = original_series.iloc[:]
assert sliced_series is not original_series
# should also be a shallow copy
original_series[:3] = [7, 8, 9]
assert all(sliced_series[:3] == [7, 8, 9])
def test_indexing_zerodim_np_array(self):
# GH24919
df = DataFrame([[1, 2], [3, 4]])
result = df.iloc[np.array(0)]
s = pd.Series([1, 2], name=0)
tm.assert_series_equal(result, s)
def test_series_indexing_zerodim_np_array(self):
# GH24919
s = Series([1, 2])
result = s.iloc[np.array(0)]
assert result == 1

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,163 @@
import numpy as np
from pandas._libs import algos as libalgos, index as libindex
import pandas.util.testing as tm
class TestNumericEngine:
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
# monotonic increasing
engine = engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False
# monotonic decreasing
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True
# neither monotonic increasing or decreasing
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False
def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
# unique
arr = np.array([1, 3, 2], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is True
# not unique
arr = np.array([1, 2, 1], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is False
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
# unique
arr = np.array([1, 2, 3], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == 1
# monotonic
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == slice(1000, 2000)
# not monotonic
arr = np.array([1, 2, 3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc(2)
assert (result == expected).all()
def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
new = np.arange(12, dtype=dtype)
result = engine.get_backfill_indexer(new)
expected = libalgos.backfill(arr, new)
tm.assert_numpy_array_equal(result, expected)
def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
new = np.arange(12, dtype=dtype)
result = engine.get_pad_indexer(new)
expected = libalgos.pad(arr, new)
tm.assert_numpy_array_equal(result, expected)
class TestObjectEngine:
engine_type = libindex.ObjectEngine
dtype = np.object_
values = list("abc")
def test_is_monotonic(self):
num = 1000
arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)
# monotonic increasing
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False
# monotonic decreasing
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True
# neither monotonic increasing or decreasing
arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False
def test_is_unique(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is True
# not unique
arr = np.array(["a", "b", "a"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is False
def test_get_loc(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc("b") == 1
# monotonic
num = 1000
arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc("b") == slice(1000, 2000)
# not monotonic
arr = np.array(self.values * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc("b")
assert (result == expected).all()
def test_get_backfill_indexer(self):
arr = np.array(["a", "e", "j"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
new = np.array(list("abcdefghij"), dtype=self.dtype)
result = engine.get_backfill_indexer(new)
expected = libalgos.backfill["object"](arr, new)
tm.assert_numpy_array_equal(result, expected)
def test_get_pad_indexer(self):
arr = np.array(["a", "e", "j"], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
new = np.array(list("abcdefghij"), dtype=self.dtype)
result = engine.get_pad_indexer(new)
expected = libalgos.pad["object"](arr, new)
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,14 @@
import pytest
from pandas import DataFrame
import pandas.util.testing as tm
class TestIndexingSlow:
@pytest.mark.slow
def test_large_dataframe_indexing(self):
# GH10692
result = DataFrame({"x": range(10 ** 6)}, dtype="int64")
result.loc[len(result)] = len(result) + 1
expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64")
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,345 @@
""" test indexing with ix """
from warnings import catch_warnings
import numpy as np
import pytest
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import DataFrame, Series, option_context
from pandas.util import testing as tm
def test_ix_deprecation():
# GH 15114
df = DataFrame({"A": [1, 2, 3]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
df.ix[1, "A"]
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
class TestIX:
def test_ix_loc_setitem_consistency(self):
# GH 5771
# loc with slice and series
s = Series(0, index=[4, 5, 6])
s.loc[4:5] += 1
expected = Series([1, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)
# GH 5928
# chained indexing assignment
df = DataFrame({"a": [0, 1, 2]})
expected = df.copy()
with catch_warnings(record=True):
expected.ix[[0, 1, 2], "a"] = -expected.ix[[0, 1, 2], "a"]
with catch_warnings(record=True):
df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]]
tm.assert_frame_equal(df, expected)
df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2]})
with catch_warnings(record=True):
df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]].astype("float64") + 0.5
expected = DataFrame({"a": [0.5, -0.5, -1.5], "b": [0, 1, 2]})
tm.assert_frame_equal(df, expected)
# GH 8607
# ix setitem consistency
df = DataFrame(
{
"delta": [1174, 904, 161],
"elapsed": [7673, 9277, 1470],
"timestamp": [1413840976, 1413842580, 1413760580],
}
)
expected = DataFrame(
{
"delta": [1174, 904, 161],
"elapsed": [7673, 9277, 1470],
"timestamp": pd.to_datetime(
[1413840976, 1413842580, 1413760580], unit="s"
),
}
)
df2 = df.copy()
df2["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
df2.loc[:, "timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
with catch_warnings(record=True):
df2.ix[:, 2] = pd.to_datetime(df["timestamp"], unit="s")
tm.assert_frame_equal(df2, expected)
def test_ix_loc_consistency(self):
# GH 8613
# some edge cases where ix/loc should return the same
# this is not an exhaustive case
def compare(result, expected):
if is_scalar(expected):
assert result == expected
else:
assert expected.equals(result)
# failure cases for .loc, but these work for .ix
df = DataFrame(np.random.randn(5, 4), columns=list("ABCD"))
for key in [
slice(1, 3),
tuple([slice(0, 2), slice(0, 2)]),
tuple([slice(0, 2), df.columns[0:2]]),
]:
for index in [
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeDateIndex,
tm.makePeriodIndex,
tm.makeTimedeltaIndex,
]:
df.index = index(len(df.index))
with catch_warnings(record=True):
df.ix[key]
msg = (
r"cannot do slice indexing"
r" on {klass} with these indexers \[(0|1)\] of"
r" {kind}".format(klass=type(df.index), kind=str(int))
)
with pytest.raises(TypeError, match=msg):
df.loc[key]
df = DataFrame(
np.random.randn(5, 4),
columns=list("ABCD"),
index=pd.date_range("2012-01-01", periods=5),
)
for key in [
"2012-01-03",
"2012-01-31",
slice("2012-01-03", "2012-01-03"),
slice("2012-01-03", "2012-01-04"),
slice("2012-01-03", "2012-01-06", 2),
slice("2012-01-03", "2012-01-31"),
tuple([[True, True, True, False, True]]),
]:
# getitem
# if the expected raises, then compare the exceptions
try:
with catch_warnings(record=True):
expected = df.ix[key]
except KeyError:
with pytest.raises(KeyError, match=r"^'2012-01-31'$"):
df.loc[key]
continue
result = df.loc[key]
compare(result, expected)
# setitem
df1 = df.copy()
df2 = df.copy()
with catch_warnings(record=True):
df1.ix[key] = 10
df2.loc[key] = 10
compare(df2, df1)
# edge cases
s = Series([1, 2, 3, 4], index=list("abde"))
result1 = s["a":"c"]
with catch_warnings(record=True):
result2 = s.ix["a":"c"]
result3 = s.loc["a":"c"]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
# now work rather than raising KeyError
s = Series(range(5), [-2, -1, 1, 2, 3])
with catch_warnings(record=True):
result1 = s.ix[-10:3]
result2 = s.loc[-10:3]
tm.assert_series_equal(result1, result2)
with catch_warnings(record=True):
result1 = s.ix[0:3]
result2 = s.loc[0:3]
tm.assert_series_equal(result1, result2)
def test_ix_weird_slicing(self):
# http://stackoverflow.com/q/17056560/1240268
df = DataFrame({"one": [1, 2, 3, np.nan, np.nan], "two": [1, 2, 3, 4, 5]})
df.loc[df["one"] > 1, "two"] = -df["two"]
expected = DataFrame(
{
"one": {0: 1.0, 1: 2.0, 2: 3.0, 3: np.nan, 4: np.nan},
"two": {0: 1, 1: -2, 2: -3, 3: 4, 4: 5},
}
)
tm.assert_frame_equal(df, expected)
def test_ix_assign_column_mixed(self, float_frame):
# GH #1142
df = float_frame
df["foo"] = "bar"
orig = df.loc[:, "B"].copy()
df.loc[:, "B"] = df.loc[:, "B"] + 1
tm.assert_series_equal(df.B, orig + 1)
# GH 3668, mixed frame with series value
df = DataFrame({"x": np.arange(10), "y": np.arange(10, 20), "z": "bar"})
expected = df.copy()
for i in range(5):
indexer = i * 2
v = 1000 + i * 200
expected.loc[indexer, "y"] = v
assert expected.loc[indexer, "y"] == v
df.loc[df.x % 2 == 0, "y"] = df.loc[df.x % 2 == 0, "y"] * 100
tm.assert_frame_equal(df, expected)
# GH 4508, making sure consistency of assignments
df = DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
df.loc[[0, 2], "b"] = [100, -100]
expected = DataFrame({"a": [1, 2, 3], "b": [100, 1, -100]})
tm.assert_frame_equal(df, expected)
df = DataFrame({"a": list(range(4))})
df["b"] = np.nan
df.loc[[1, 3], "b"] = [100, -100]
expected = DataFrame({"a": [0, 1, 2, 3], "b": [np.nan, 100, np.nan, -100]})
tm.assert_frame_equal(df, expected)
# ok, but chained assignments are dangerous
# if we turn off chained assignment it will work
with option_context("chained_assignment", None):
df = DataFrame({"a": list(range(4))})
df["b"] = np.nan
df["b"].loc[[1, 3]] = [100, -100]
tm.assert_frame_equal(df, expected)
def test_ix_get_set_consistency(self):
# GH 4544
# ix/loc get/set not consistent when
# a mixed int/string index
df = DataFrame(
np.arange(16).reshape((4, 4)),
columns=["a", "b", 8, "c"],
index=["e", 7, "f", "g"],
)
with catch_warnings(record=True):
assert df.ix["e", 8] == 2
assert df.loc["e", 8] == 2
with catch_warnings(record=True):
df.ix["e", 8] = 42
assert df.ix["e", 8] == 42
assert df.loc["e", 8] == 42
df.loc["e", 8] = 45
with catch_warnings(record=True):
assert df.ix["e", 8] == 45
assert df.loc["e", 8] == 45
def test_ix_slicing_strings(self):
# see gh-3836
data = {
"Classification": ["SA EQUITY CFD", "bbb", "SA EQUITY", "SA SSF", "aaa"],
"Random": [1, 2, 3, 4, 5],
"X": ["correct", "wrong", "correct", "correct", "wrong"],
}
df = DataFrame(data)
x = df[~df.Classification.isin(["SA EQUITY CFD", "SA EQUITY", "SA SSF"])]
with catch_warnings(record=True):
df.ix[x.index, "X"] = df["Classification"]
expected = DataFrame(
{
"Classification": {
0: "SA EQUITY CFD",
1: "bbb",
2: "SA EQUITY",
3: "SA SSF",
4: "aaa",
},
"Random": {0: 1, 1: 2, 2: 3, 3: 4, 4: 5},
"X": {0: "correct", 1: "bbb", 2: "correct", 3: "correct", 4: "aaa"},
}
) # bug was 4: 'bbb'
tm.assert_frame_equal(df, expected)
def test_ix_setitem_out_of_bounds_axis_0(self):
df = DataFrame(
np.random.randn(2, 5),
index=["row{i}".format(i=i) for i in range(2)],
columns=["col{i}".format(i=i) for i in range(5)],
)
with catch_warnings(record=True):
msg = "cannot set by positional indexing with enlargement"
with pytest.raises(ValueError, match=msg):
df.ix[2, 0] = 100
def test_ix_setitem_out_of_bounds_axis_1(self):
df = DataFrame(
np.random.randn(5, 2),
index=["row{i}".format(i=i) for i in range(5)],
columns=["col{i}".format(i=i) for i in range(2)],
)
with catch_warnings(record=True):
msg = "cannot set by positional indexing with enlargement"
with pytest.raises(ValueError, match=msg):
df.ix[0, 2] = 100
def test_ix_empty_list_indexer_is_ok(self):
with catch_warnings(record=True):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(
df.ix[:, []],
df.iloc[:, :0],
check_index_type=True,
check_column_type=True,
)
# horizontal empty
tm.assert_frame_equal(
df.ix[[], :],
df.iloc[:0, :],
check_index_type=True,
check_column_type=True,
)
# horizontal empty
tm.assert_frame_equal(
df.ix[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
)
def test_ix_duplicate_returns_series(self):
df = DataFrame(
np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc")
)
with catch_warnings(record=True):
r = df.ix[0.2, "a"]
e = df.loc[0.2, "a"]
tm.assert_series_equal(r, e)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,583 @@
"""
test setting *parts* of objects both positionally and label based
TOD: these should be split among the indexer tests
"""
from warnings import catch_warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, Series, date_range
from pandas.util import testing as tm
class TestPartialSetting:
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
def test_partial_setting(self):
# GH2578, allow ix and friends to partially set
# series
s_orig = Series([1, 2, 3])
s = s_orig.copy()
s[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s[5] = 5.0
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5.0
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
# iloc/iat raise
s = s_orig.copy()
with pytest.raises(IndexError):
s.iloc[3] = 5.0
with pytest.raises(IndexError):
s.iat[3] = 5.0
# ## frame ##
df_orig = DataFrame(
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
)
# iloc/iat raise
df = df_orig.copy()
with pytest.raises(IndexError):
df.iloc[4, 2] = 5.0
with pytest.raises(IndexError):
df.iat[4, 2] = 5.0
# row setting where it exists
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
df = df_orig.copy()
df.iloc[1] = df.iloc[2]
tm.assert_frame_equal(df, expected)
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
df = df_orig.copy()
df.loc[1] = df.loc[2]
tm.assert_frame_equal(df, expected)
# like 2578, partial setting with dtype preservation
expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
df = df_orig.copy()
df.loc[3] = df.loc[2]
tm.assert_frame_equal(df, expected)
# single dtype frame, overwrite
expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, "B"] = df.ix[:, "A"]
tm.assert_frame_equal(df, expected)
# mixed dtype frame, overwrite
expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
df = df_orig.copy()
df["B"] = df["B"].astype(np.float64)
with catch_warnings(record=True):
df.ix[:, "B"] = df.ix[:, "A"]
tm.assert_frame_equal(df, expected)
# single dtype frame, partial setting
expected = df_orig.copy()
expected["C"] = df["A"]
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, "C"] = df.ix[:, "A"]
tm.assert_frame_equal(df, expected)
# mixed frame, partial setting
expected = df_orig.copy()
expected["C"] = df["A"]
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, "C"] = df.ix[:, "A"]
tm.assert_frame_equal(df, expected)
# GH 8473
dates = date_range("1/1/2000", periods=8)
df_orig = DataFrame(
np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]
)
expected = pd.concat(
[df_orig, DataFrame({"A": 7}, index=[dates[-1] + dates.freq])], sort=True
)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, "A"] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, "A"] = 7
tm.assert_frame_equal(df, expected)
exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq])
expected = pd.concat([df_orig, exp_other], axis=1)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
def test_partial_setting_mixed_dtype(self):
# in a mixed dtype environment, try to preserve dtypes
# by appending
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
s = df.loc[1].copy()
s.name = 2
expected = df.append(s)
df.loc[2] = df.loc[1]
tm.assert_frame_equal(df, expected)
# columns will align
df = DataFrame(columns=["A", "B"])
df.loc[0] = Series(1, index=range(4))
tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
# columns will align
df = DataFrame(columns=["A", "B"])
df.loc[0] = Series(1, index=["B"])
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
tm.assert_frame_equal(df, exp)
# list-like must conform
df = DataFrame(columns=["A", "B"])
with pytest.raises(ValueError):
df.loc[0] = [1, 2, 3]
# TODO: #15657, these are left as object and not coerced
df = DataFrame(columns=["A", "B"])
df.loc[3] = [6, 7]
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
tm.assert_frame_equal(df, exp)
def test_series_partial_set(self):
# partial set with new index
# Regression from GH4825
ser = Series([0.1, 0.2], index=[1, 2])
# loc equiv to .reindex
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([3, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3, "x"]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([3, 2, 3, "x"])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, "x", 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([2, 2, "x", 1])
tm.assert_series_equal(result, expected, check_index_type=True)
# raises as nothing in in the index
msg = (
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are"
r" in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
ser.loc[[3, 3, 3]]
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([2, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[3, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([3, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[5, 3, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([5, 3, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[5, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([5, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[7, 2, 2]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([7, 2, 2])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[4, 5, 5]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([4, 5, 5])
tm.assert_series_equal(result, expected, check_index_type=True)
# iloc
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
def test_series_partial_set_with_name(self):
# GH 11497
idx = Index([1, 2], dtype="int64", name="idx")
ser = Series([0.1, 0.2], index=idx, name="s")
# loc
exp_idx = Index([3, 2, 3], dtype="int64", name="idx")
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name="s")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([3, 2, 3, "x"], dtype="object", name="idx")
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name="s")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3, "x"]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([2, 2, "x", 1], dtype="object", name="idx")
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name="s")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, "x", 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
# raises as nothing in in the index
msg = (
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64',"
r" name='idx'\)\] are in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
ser.loc[[3, 3, 3]]
exp_idx = Index([2, 2, 3], dtype="int64", name="idx")
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name="s")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([3, 4, 4], dtype="int64", name="idx")
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name="s")
idx = Index([1, 2, 3], dtype="int64", name="idx")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([5, 3, 3], dtype="int64", name="idx")
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name="s")
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([5, 4, 4], dtype="int64", name="idx")
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name="s")
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([7, 2, 2], dtype="int64", name="idx")
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s")
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([4, 5, 5], dtype="int64", name="idx")
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s")
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
tm.assert_series_equal(result, expected, check_index_type=True)
# iloc
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
@pytest.mark.filterwarnings("ignore:\\n.ix")
def test_partial_set_invalid(self):
# GH 4940
# allow only setting of 'valid' values
orig = tm.makeTimeDataFrame()
df = orig.copy()
# don't allow not string inserts
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.loc[100.0, :] = df.ix[0]
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.loc[100, :] = df.ix[0]
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.ix[100.0, :] = df.ix[0]
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[100, :] = df.ix[0]
# allow object conversion here
df = orig.copy()
with catch_warnings(record=True):
df.loc["a", :] = df.ix[0]
exp = orig.append(Series(df.ix[0], name="a"))
tm.assert_frame_equal(df, exp)
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
assert df.index.dtype == "object"
def test_partial_set_empty_series(self):
# GH5226
# partially set with an empty object series
s = Series()
s.loc[1] = 1
tm.assert_series_equal(s, Series([1], index=[1]))
s.loc[3] = 3
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
s = Series()
s.loc[1] = 1.0
tm.assert_series_equal(s, Series([1.0], index=[1]))
s.loc[3] = 3.0
tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3]))
s = Series()
s.loc["foo"] = 1
tm.assert_series_equal(s, Series([1], index=["foo"]))
s.loc["bar"] = 3
tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"]))
s.loc[3] = 4
tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3]))
def test_partial_set_empty_frame(self):
# partially set with an empty object
# frame
df = DataFrame()
with pytest.raises(ValueError):
df.loc[1] = 1
with pytest.raises(ValueError):
df.loc[1] = Series([1], index=["foo"])
with pytest.raises(ValueError):
df.loc[:, 1] = 1
# these work as they don't really change
# anything but the index
# GH5632
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
def f():
df = DataFrame()
df["foo"] = Series([], dtype="object")
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df["foo"] = Series(df.index)
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df["foo"] = df.index
return df
tm.assert_frame_equal(f(), expected)
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
expected["foo"] = expected["foo"].astype("float64")
def f():
df = DataFrame()
df["foo"] = []
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df["foo"] = Series(np.arange(len(df)), dtype="float64")
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
tm.assert_index_equal(df.index, Index([], dtype="object"))
df["foo"] = range(len(df))
return df
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
expected["foo"] = expected["foo"].astype("float64")
tm.assert_frame_equal(f(), expected)
df = DataFrame()
tm.assert_index_equal(df.columns, Index([], dtype=object))
df2 = DataFrame()
df2[1] = Series([1], index=["foo"])
df.loc[:, 1] = Series([1], index=["foo"])
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
tm.assert_frame_equal(df, df2)
# no index to start
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
df = DataFrame(columns=["A", "B"])
df[0] = Series(1, index=range(4))
df.dtypes
str(df)
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=["A", "B"])
df.loc[:, 0] = Series(1, index=range(4))
df.dtypes
str(df)
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_row(self):
# GH5720, GH5744
# don't create rows when empty
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
expected["A"] = expected["A"].astype("int64")
expected["B"] = expected["B"].astype("float64")
expected["New"] = expected["New"].astype("float64")
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
y["New"] = np.nan
tm.assert_frame_equal(y, expected)
# tm.assert_frame_equal(y,expected)
expected = DataFrame(columns=["a", "b", "c c", "d"])
expected["d"] = expected["d"].astype("int64")
df = DataFrame(columns=["a", "b", "c c"])
df["d"] = 3
tm.assert_frame_equal(df, expected)
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
# reindex columns is ok
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
result = y.reindex(columns=["A", "B", "C"])
expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
expected["A"] = expected["A"].astype("int64")
expected["B"] = expected["B"].astype("float64")
expected["C"] = expected["C"].astype("float64")
tm.assert_frame_equal(result, expected)
def test_partial_set_empty_frame_set_series(self):
# GH 5756
# setting with empty Series
df = DataFrame(Series())
tm.assert_frame_equal(df, DataFrame({0: Series()}))
df = DataFrame(Series(name="foo"))
tm.assert_frame_equal(df, DataFrame({"foo": Series()}))
def test_partial_set_empty_frame_empty_copy_assignment(self):
# GH 5932
# copy on empty with assignment fails
df = DataFrame(index=[0])
df = df.copy()
df["a"] = 0
expected = DataFrame(0, index=[0], columns=["a"])
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_empty_consistencies(self):
# GH 6171
# consistency on empty frames
df = DataFrame(columns=["x", "y"])
df["x"] = [1, 2]
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
tm.assert_frame_equal(df, expected, check_dtype=False)
df = DataFrame(columns=["x", "y"])
df["x"] = ["1", "2"]
expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object)
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=["x", "y"])
df.loc[0, "x"] = 1
expected = DataFrame(dict(x=[1], y=[np.nan]))
tm.assert_frame_equal(df, expected, check_dtype=False)

View File

@@ -0,0 +1,247 @@
""" test scalar indexing, including at and iat """
import numpy as np
import pytest
from pandas import DataFrame, Series, Timedelta, Timestamp, date_range
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm
class TestScalar(Base):
def test_at_and_iat_get(self):
def _check(f, func, values=False):
if f is not None:
indicies = self.generate_indices(f, values)
for i in indicies:
result = getattr(f, func)[i]
expected = self.get_value(f, i, values)
tm.assert_almost_equal(result, expected)
for o in self._objs:
d = getattr(self, o)
# iat
for f in [d["ints"], d["uints"]]:
_check(f, "iat", values=True)
for f in [d["labels"], d["ts"], d["floats"]]:
if f is not None:
msg = "iAt based indexing can only have integer indexers"
with pytest.raises(ValueError, match=msg):
self.check_values(f, "iat")
# at
for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
_check(f, "at")
def test_at_and_iat_set(self):
def _check(f, func, values=False):
if f is not None:
indicies = self.generate_indices(f, values)
for i in indicies:
getattr(f, func)[i] = 1
expected = self.get_value(f, i, values)
tm.assert_almost_equal(expected, 1)
for t in self._objs:
d = getattr(self, t)
# iat
for f in [d["ints"], d["uints"]]:
_check(f, "iat", values=True)
for f in [d["labels"], d["ts"], d["floats"]]:
if f is not None:
msg = "iAt based indexing can only have integer indexers"
with pytest.raises(ValueError, match=msg):
_check(f, "iat")
# at
for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
_check(f, "at")
def test_at_iat_coercion(self):
# as timestamp is not a tuple!
dates = date_range("1/1/2000", periods=8)
df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"])
s = df["A"]
result = s.at[dates[5]]
xp = s.values[5]
assert result == xp
# GH 7729
# make sure we are boxing the returns
s = Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]")
expected = Timestamp("2014-02-02")
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
result = r()
assert result == expected
s = Series(["1 days", "2 days"], dtype="timedelta64[ns]")
expected = Timedelta("2 days")
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
result = r()
assert result == expected
def test_iat_invalid_args(self):
pass
def test_imethods_with_dups(self):
# GH6493
# iat/iloc with dups
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
result = s.iloc[2]
assert result == 2
result = s.iat[2]
assert result == 2
msg = "index 10 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s.iat[10]
msg = "index -10 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s.iat[-10]
result = s.iloc[[2, 3]]
expected = Series([2, 3], [2, 2], dtype="int64")
tm.assert_series_equal(result, expected)
df = s.to_frame()
result = df.iloc[2]
expected = Series(2, index=[0], name=2)
tm.assert_series_equal(result, expected)
result = df.iat[2, 0]
assert result == 2
def test_at_to_fail(self):
# at should not fallback
# GH 7814
s = Series([1, 2, 3], index=list("abc"))
result = s.at["a"]
assert result == 1
msg = (
"At based indexing on an non-integer index can only have"
" non-integer indexers"
)
with pytest.raises(ValueError, match=msg):
s.at[0]
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
result = df.at["a", "A"]
assert result == 1
with pytest.raises(ValueError, match=msg):
df.at["a", 0]
s = Series([1, 2, 3], index=[3, 2, 1])
result = s.at[1]
assert result == 3
msg = "At based indexing on an integer index can only have integer indexers"
with pytest.raises(ValueError, match=msg):
s.at["a"]
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
result = df.at[1, 0]
assert result == 3
with pytest.raises(ValueError, match=msg):
df.at["a", 0]
# GH 13822, incorrect error string with non-unique columns when missing
# column is accessed
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
df.columns = ["x", "x", "z"]
# Check that we get the correct value in the KeyError
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
df[["x", "y", "z"]]
def test_at_with_tz(self):
# gh-15822
df = DataFrame(
{
"name": ["John", "Anderson"],
"date": [
Timestamp(2017, 3, 13, 13, 32, 56),
Timestamp(2017, 2, 16, 12, 10, 3),
],
}
)
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
result = df.loc[0, "date"]
assert result == expected
result = df.at[0, "date"]
assert result == expected
def test_series_set_tz_timestamp(self, tz_naive_fixture):
# GH 25506
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
result = Series(ts)
result.at[1] = ts
expected = Series([ts, ts])
tm.assert_series_equal(result, expected)
def test_mixed_index_at_iat_loc_iloc_series(self):
# GH 19860
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
for el, item in s.items():
assert s.at[el] == s.loc[el] == item
for i in range(len(s)):
assert s.iat[i] == s.iloc[i] == i + 1
with pytest.raises(KeyError, match="^4$"):
s.at[4]
with pytest.raises(KeyError, match="^4$"):
s.loc[4]
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
# GH 19860
df = DataFrame(
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
)
for rowIdx, row in df.iterrows():
for el, item in row.items():
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
for row in range(2):
for i in range(5):
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
with pytest.raises(KeyError, match="^3$"):
df.at[0, 3]
with pytest.raises(KeyError, match="^3$"):
df.loc[0, 3]
def test_iat_setter_incompatible_assignment(self):
# GH 23236
result = DataFrame({"a": [0, 1], "b": [4, 5]})
result.iat[0, 0] = None
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
tm.assert_frame_equal(result, expected)
def test_getitem_zerodim_np_array(self):
# GH24924
# dataframe __getitem__
df = DataFrame([[1, 2], [3, 4]])
result = df[np.array(0)]
expected = Series([1, 3], name=0)
tm.assert_series_equal(result, expected)
# series __getitem__
s = Series([1, 2])
result = s[np.array(0)]
assert result == 1

View File

@@ -0,0 +1,120 @@
import numpy as np
import pytest
import pandas as pd
from pandas.util import testing as tm
class TestTimedeltaIndexing:
def test_boolean_indexing(self):
# GH 14946
df = pd.DataFrame({"x": range(10)})
df.index = pd.to_timedelta(range(10), unit="s")
conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3]
expected_data = [
[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9],
]
for cond, data in zip(conditions, expected_data):
result = df.assign(x=df.mask(cond, 10).astype("int64"))
expected = pd.DataFrame(
data,
index=pd.to_timedelta(range(10), unit="s"),
columns=["x"],
dtype="int64",
)
tm.assert_frame_equal(expected, result)
@pytest.mark.parametrize(
"indexer, expected",
[
(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]),
],
)
def test_list_like_indexing(self, indexer, expected):
# GH 16637
df = pd.DataFrame({"x": range(10)}, dtype="int64")
df.index = pd.to_timedelta(range(10), unit="s")
df.loc[df.index[indexer], "x"] = 20
expected = pd.DataFrame(
expected,
index=pd.to_timedelta(range(10), unit="s"),
columns=["x"],
dtype="int64",
)
tm.assert_frame_equal(expected, df)
def test_string_indexing(self):
# GH 16896
df = pd.DataFrame({"x": range(3)}, index=pd.to_timedelta(range(3), unit="days"))
expected = df.iloc[0]
sliced = df.loc["0 days"]
tm.assert_series_equal(sliced, expected)
@pytest.mark.parametrize("value", [None, pd.NaT, np.nan])
def test_masked_setitem(self, value):
# issue (#18586)
series = pd.Series([0, 1, 2], dtype="timedelta64[ns]")
series[series == series[0]] = value
expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]")
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize("value", [None, pd.NaT, np.nan])
def test_listlike_setitem(self, value):
# issue (#18586)
series = pd.Series([0, 1, 2], dtype="timedelta64[ns]")
series.iloc[0] = value
expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]")
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"start,stop, expected_slice",
[
[np.timedelta64(0, "ns"), None, slice(0, 11)],
[np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)],
[None, np.timedelta64(4, "D"), slice(0, 5)],
],
)
def test_numpy_timedelta_scalar_indexing(self, start, stop, expected_slice):
# GH 20393
s = pd.Series(range(11), pd.timedelta_range("0 days", "10 days"))
result = s.loc[slice(start, stop)]
expected = s.iloc[expected_slice]
tm.assert_series_equal(result, expected)
def test_roundtrip_thru_setitem(self):
# PR 23462
dt1 = pd.Timedelta(0)
dt2 = pd.Timedelta(28767471428571405)
df = pd.DataFrame({"dt": pd.Series([dt1, dt2])})
df_copy = df.copy()
s = pd.Series([dt1])
expected = df["dt"].iloc[1].value
df.loc[[True, False]] = s
result = df["dt"].iloc[1].value
assert expected == result
tm.assert_frame_equal(df, df_copy)
def test_loc_str_slicing(self):
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[:"1 days"]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)
def test_loc_slicing(self):
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
ser = ix.to_series()
result = ser.loc[: ix[-2]]
expected = ser.iloc[:-1]
tm.assert_series_equal(result, expected)