8th day of python challenges 111-117
This commit is contained in:
292
venv/lib/python3.6/site-packages/pandas/tests/indexing/common.py
Normal file
292
venv/lib/python3.6/site-packages/pandas/tests/indexing/common.py
Normal file
@@ -0,0 +1,292 @@
|
||||
""" common utilities """
|
||||
|
||||
import itertools
|
||||
from warnings import catch_warnings, filterwarnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
from pandas import DataFrame, Float64Index, MultiIndex, Series, UInt64Index, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
_verbose = False
|
||||
|
||||
|
||||
def _mklbl(prefix, n):
|
||||
return ["{prefix}{i}".format(prefix=prefix, i=i) for i in range(n)]
|
||||
|
||||
|
||||
def _axify(obj, key, axis):
|
||||
# create a tuple accessor
|
||||
axes = [slice(None)] * obj.ndim
|
||||
axes[axis] = key
|
||||
return tuple(axes)
|
||||
|
||||
|
||||
class Base:
|
||||
""" indexing comprehensive base class """
|
||||
|
||||
_objs = {"series", "frame"}
|
||||
_typs = {
|
||||
"ints",
|
||||
"uints",
|
||||
"labels",
|
||||
"mixed",
|
||||
"ts",
|
||||
"floats",
|
||||
"empty",
|
||||
"ts_rev",
|
||||
"multi",
|
||||
}
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2))
|
||||
self.frame_ints = DataFrame(
|
||||
np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3)
|
||||
)
|
||||
|
||||
self.series_uints = Series(
|
||||
np.random.rand(4), index=UInt64Index(np.arange(0, 8, 2))
|
||||
)
|
||||
self.frame_uints = DataFrame(
|
||||
np.random.randn(4, 4),
|
||||
index=UInt64Index(range(0, 8, 2)),
|
||||
columns=UInt64Index(range(0, 12, 3)),
|
||||
)
|
||||
|
||||
self.series_floats = Series(
|
||||
np.random.rand(4), index=Float64Index(range(0, 8, 2))
|
||||
)
|
||||
self.frame_floats = DataFrame(
|
||||
np.random.randn(4, 4),
|
||||
index=Float64Index(range(0, 8, 2)),
|
||||
columns=Float64Index(range(0, 12, 3)),
|
||||
)
|
||||
|
||||
m_idces = [
|
||||
MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
MultiIndex.from_product([[9, 10], [11, 12]]),
|
||||
]
|
||||
|
||||
self.series_multi = Series(np.random.rand(4), index=m_idces[0])
|
||||
self.frame_multi = DataFrame(
|
||||
np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1]
|
||||
)
|
||||
|
||||
self.series_labels = Series(np.random.randn(4), index=list("abcd"))
|
||||
self.frame_labels = DataFrame(
|
||||
np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")
|
||||
)
|
||||
|
||||
self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8])
|
||||
self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8])
|
||||
|
||||
self.series_ts = Series(
|
||||
np.random.randn(4), index=date_range("20130101", periods=4)
|
||||
)
|
||||
self.frame_ts = DataFrame(
|
||||
np.random.randn(4, 4), index=date_range("20130101", periods=4)
|
||||
)
|
||||
|
||||
dates_rev = date_range("20130101", periods=4).sort_values(ascending=False)
|
||||
self.series_ts_rev = Series(np.random.randn(4), index=dates_rev)
|
||||
self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev)
|
||||
|
||||
self.frame_empty = DataFrame()
|
||||
self.series_empty = Series()
|
||||
|
||||
# form agglomerates
|
||||
for o in self._objs:
|
||||
|
||||
d = dict()
|
||||
for t in self._typs:
|
||||
d[t] = getattr(self, "{o}_{t}".format(o=o, t=t), None)
|
||||
|
||||
setattr(self, o, d)
|
||||
|
||||
def generate_indices(self, f, values=False):
|
||||
""" generate the indices
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = (list(range(len(a))) for a in axes)
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
def get_result(self, obj, method, key, axis):
|
||||
""" return the result for this obj with this key and this axis """
|
||||
|
||||
if isinstance(key, dict):
|
||||
key = key[axis]
|
||||
|
||||
# use an artificial conversion to map the key as integers to the labels
|
||||
# so ix can work for comparisons
|
||||
if method == "indexer":
|
||||
method = "ix"
|
||||
key = obj._get_axis(axis)[key]
|
||||
|
||||
# in case we actually want 0 index slicing
|
||||
with catch_warnings(record=True):
|
||||
try:
|
||||
xp = getattr(obj, method).__getitem__(_axify(obj, key, axis))
|
||||
except AttributeError:
|
||||
xp = getattr(obj, method).__getitem__(key)
|
||||
|
||||
return xp
|
||||
|
||||
def get_value(self, f, i, values=False):
|
||||
""" return the value for the location i """
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
return f.values[i]
|
||||
|
||||
# this is equiv of f[col][row].....
|
||||
# v = f
|
||||
# for a in reversed(i):
|
||||
# v = v.__getitem__(a)
|
||||
# return v
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
return f.ix[i]
|
||||
|
||||
def check_values(self, f, func, values=False):
|
||||
|
||||
if f is None:
|
||||
return
|
||||
axes = f.axes
|
||||
indicies = itertools.product(*axes)
|
||||
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
expected = f.values[i]
|
||||
else:
|
||||
expected = f
|
||||
for a in reversed(i):
|
||||
expected = expected.__getitem__(a)
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def check_result(
|
||||
self,
|
||||
name,
|
||||
method1,
|
||||
key1,
|
||||
method2,
|
||||
key2,
|
||||
typs=None,
|
||||
objs=None,
|
||||
axes=None,
|
||||
fails=None,
|
||||
):
|
||||
def _eq(t, o, a, obj, k1, k2):
|
||||
""" compare equal for these 2 keys """
|
||||
|
||||
if a is not None and a > obj.ndim - 1:
|
||||
return
|
||||
|
||||
def _print(result, error=None):
|
||||
if error is not None:
|
||||
error = str(error)
|
||||
v = (
|
||||
"%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
|
||||
"key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s"
|
||||
% (name, result, t, o, method1, method2, a, error or "")
|
||||
)
|
||||
if _verbose:
|
||||
pprint_thing(v)
|
||||
|
||||
try:
|
||||
rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a))
|
||||
|
||||
try:
|
||||
xp = self.get_result(obj, method2, k2, a)
|
||||
except Exception:
|
||||
result = "no comp"
|
||||
_print(result)
|
||||
return
|
||||
|
||||
detail = None
|
||||
|
||||
try:
|
||||
if is_scalar(rs) and is_scalar(xp):
|
||||
assert rs == xp
|
||||
elif xp.ndim == 1:
|
||||
tm.assert_series_equal(rs, xp)
|
||||
elif xp.ndim == 2:
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
result = "ok"
|
||||
except AssertionError as e:
|
||||
detail = str(e)
|
||||
result = "fail"
|
||||
|
||||
# reverse the checks
|
||||
if fails is True:
|
||||
if result == "fail":
|
||||
result = "ok (fail)"
|
||||
|
||||
_print(result)
|
||||
if not result.startswith("ok"):
|
||||
raise AssertionError(detail)
|
||||
|
||||
except AssertionError:
|
||||
raise
|
||||
except Exception as detail:
|
||||
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
result = "ok ({0.__name__})".format(type(detail))
|
||||
_print(result)
|
||||
return
|
||||
|
||||
result = type(detail).__name__
|
||||
raise AssertionError(_print(result, error=detail))
|
||||
|
||||
if typs is None:
|
||||
typs = self._typs
|
||||
|
||||
if objs is None:
|
||||
objs = self._objs
|
||||
|
||||
if axes is not None:
|
||||
if not isinstance(axes, (tuple, list)):
|
||||
axes = [axes]
|
||||
else:
|
||||
axes = list(axes)
|
||||
else:
|
||||
axes = [0, 1]
|
||||
|
||||
# check
|
||||
for o in objs:
|
||||
if o not in self._objs:
|
||||
continue
|
||||
|
||||
d = getattr(self, o)
|
||||
for a in axes:
|
||||
for t in typs:
|
||||
if t not in self._typs:
|
||||
continue
|
||||
|
||||
obj = d[t]
|
||||
if obj is None:
|
||||
continue
|
||||
|
||||
def _call(obj=obj):
|
||||
obj = obj.copy()
|
||||
|
||||
k2 = key2
|
||||
_eq(t, o, a, obj, key1, k2)
|
||||
|
||||
_call()
|
||||
@@ -0,0 +1,23 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(libindex.Int64Engine, np.int64),
|
||||
(libindex.Int32Engine, np.int32),
|
||||
(libindex.Int16Engine, np.int16),
|
||||
(libindex.Int8Engine, np.int8),
|
||||
(libindex.UInt64Engine, np.uint64),
|
||||
(libindex.UInt32Engine, np.uint32),
|
||||
(libindex.UInt16Engine, np.uint16),
|
||||
(libindex.UInt8Engine, np.uint8),
|
||||
(libindex.Float64Engine, np.float64),
|
||||
(libindex.Float32Engine, np.float32),
|
||||
],
|
||||
ids=lambda x: x[0].__name__,
|
||||
)
|
||||
def numeric_indexing_engine_type_and_dtype(request):
|
||||
return request.param
|
||||
@@ -0,0 +1,114 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, IntervalIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_getitem_with_scalar(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s[:3])
|
||||
tm.assert_series_equal(expected, s[:2.5])
|
||||
tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s[s >= 2])
|
||||
|
||||
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
|
||||
def test_nonoverlapping_monotonic(self, direction, closed):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == "decreasing":
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
s = Series(list("abc"), idx)
|
||||
|
||||
for key, expected in zip(idx.left, s):
|
||||
if idx.closed_left:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
s[key]
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.right, s):
|
||||
if idx.closed_right:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
s[key]
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.mid, s):
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
|
||||
def test_non_matching(self):
|
||||
s = self.s
|
||||
|
||||
# this is a departure from our current
|
||||
# indexin scheme, but simpler
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
s.loc[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
s.loc[[-1, 3]]
|
||||
|
||||
def test_large_series(self):
|
||||
s = Series(
|
||||
np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001))
|
||||
)
|
||||
|
||||
result1 = s.loc[:80000]
|
||||
result2 = s.loc[0:80000]
|
||||
result3 = s.loc[0:80000:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
|
||||
df = DataFrame({"A": range(10)})
|
||||
s = pd.cut(df.A, 5)
|
||||
df["B"] = s
|
||||
df = df.set_index("B")
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="10"):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
df.loc[[10, 4]]
|
||||
@@ -0,0 +1,248 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Interval, IntervalIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = 0
|
||||
result = s.loc[Interval(0, 1)]
|
||||
assert result == expected
|
||||
result = s[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
|
||||
s.loc[Interval(3, 5, closed="left")]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
|
||||
s[Interval(3, 5, closed="left")]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
|
||||
):
|
||||
s.loc[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
|
||||
):
|
||||
s[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
|
||||
s.loc[Interval(5, 6)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
|
||||
s[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
assert s.loc[1] == 0
|
||||
assert s.loc[1.5] == 1
|
||||
assert s.loc[2] == 1
|
||||
|
||||
assert s[1] == 0
|
||||
assert s[1.5] == 1
|
||||
assert s[2] == 1
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s.loc[s >= 2])
|
||||
|
||||
def test_loc_with_slices(self):
|
||||
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
s = self.s
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = s.iloc[:3]
|
||||
result = s.loc[Interval(0, 1) : Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(0, 1) : Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[3:]
|
||||
result = s.loc[Interval(3, 4) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(3, 4) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "Interval objects are not currently supported"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
s.loc[Interval(3, 6) :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
s[Interval(3, 6) :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
s.loc[Interval(3, 4, closed="left") :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
s[Interval(3, 4, closed="left") :]
|
||||
|
||||
# TODO with non-existing intervals ?
|
||||
# s.loc[Interval(-1, 0):Interval(2, 3)]
|
||||
|
||||
# slice of scalar
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s.loc[:3])
|
||||
tm.assert_series_equal(expected, s.loc[:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[0.1:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[-1:3])
|
||||
|
||||
tm.assert_series_equal(expected, s[:3])
|
||||
tm.assert_series_equal(expected, s[:2.5])
|
||||
tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
|
||||
# slice of scalar with step != 1
|
||||
with pytest.raises(ValueError):
|
||||
s[0:4:2]
|
||||
|
||||
def test_loc_with_overlap(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = s
|
||||
result = s.loc[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = s.loc[Interval(1, 5)]
|
||||
result == expected
|
||||
|
||||
result = s[Interval(1, 5)]
|
||||
result == expected
|
||||
|
||||
expected = s
|
||||
result = s.loc[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
s.loc[[Interval(3, 5)]]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError, match="^$"):
|
||||
s[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 5) : Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(1, 5) : Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "'can only get slices from an IntervalIndex if bounds are"
|
||||
" non-overlapping and all monotonic increasing or decreasing'"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.loc[Interval(1, 6) : Interval(3, 8)]
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s[Interval(1, 6) : Interval(3, 8)]
|
||||
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.loc[1:4]
|
||||
|
||||
def test_non_unique(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = s.loc[[Interval(1, 3)]]
|
||||
expected = s.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s.loc[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 3) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s[Interval(1, 3) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
@@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_dataframe_random_data():
|
||||
"""DataFrame with 2 level MultiIndex with random data"""
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
return DataFrame(
|
||||
np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_year_month_day_dataframe_random_data():
|
||||
"""DataFrame with 3 level MultiIndex (year, month, day) covering
|
||||
first 100 business days from 2000-01-01 with random data"""
|
||||
tdf = tm.makeTimeDataFrame(100)
|
||||
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
|
||||
# use Int64Index, to make sure things work
|
||||
ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
|
||||
ymd.index.set_names(["year", "month", "day"], inplace=True)
|
||||
return ymd
|
||||
@@ -0,0 +1,64 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.core import common as com
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment():
|
||||
# Inplace ops, originally from:
|
||||
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
|
||||
events = {
|
||||
("eyes", "left"): a,
|
||||
("eyes", "right"): b,
|
||||
("ears", "left"): c,
|
||||
("ears", "right"): d,
|
||||
}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
|
||||
zed = DataFrame(events, index=["a", "b"], columns=multiind)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
def test_cache_updating():
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.rand(10, 3)
|
||||
df = DataFrame(a, columns=["x", "y", "z"])
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
df.loc[0]["z"].iloc[0] = 1.0
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), "z"] = 2
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching():
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
n = 1000001
|
||||
arrays = (range(n), range(n))
|
||||
index = MultiIndex.from_tuples(zip(*arrays))
|
||||
s = Series(np.zeros(n), index=index)
|
||||
str(s)
|
||||
|
||||
# setitem
|
||||
expected = Series(np.ones(n), index=index)
|
||||
s = Series(np.zeros(n), index=index)
|
||||
s[s == 0] = 1
|
||||
tm.assert_series_equal(s, expected)
|
||||
@@ -0,0 +1,22 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index, Period, Series, period_range
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(["a", "a", "a", "b", "b"])
|
||||
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
|
||||
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc["a", Period("2012-01")]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc["a", datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
@@ -0,0 +1,252 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.util import testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"access_method",
|
||||
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"level1_value, expected",
|
||||
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
|
||||
)
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
|
||||
result = access_method(s, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0_value", ["D", "A"])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
arr = np.random.randn(len(index), 1)
|
||||
df = DataFrame(arr, index=index, columns=["val"])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != "A":
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val["A"]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val["X"]
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(
|
||||
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]])
|
||||
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]]
|
||||
)
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer(s)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error,expected_error_msg",
|
||||
[
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
|
||||
(lambda s: s.__getitem__(len(s)), IndexError, "index out of bounds"),
|
||||
(lambda s: s[len(s)], IndexError, "index out of bounds"),
|
||||
(
|
||||
lambda s: s.iloc[len(s)],
|
||||
IndexError,
|
||||
"single positional indexer is out-of-bounds",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
indexer,
|
||||
expected_error,
|
||||
expected_error_msg,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df["foo", "one"].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error_msg",
|
||||
[
|
||||
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df["foobar"], r"^'foobar'$"),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
|
||||
df.columns = [
|
||||
["level1 item1", "level1 item2"],
|
||||
["", "level2 item2"],
|
||||
["level3 item1", "level3 item2"],
|
||||
]
|
||||
|
||||
result = df["level1 item1"]
|
||||
expected = DataFrame(
|
||||
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_slice",
|
||||
[
|
||||
(lambda df: df["foo"], slice(3)),
|
||||
(lambda df: df["bar"], slice(3, 5)),
|
||||
(lambda df: df.loc[:, "bar"], slice(3, 5)),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mixed_depth_get():
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.randn(4, 6), columns=index)
|
||||
|
||||
result = df["a"]
|
||||
expected = df["a", "", ""].rename("a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["routine1", "result1"]
|
||||
expected = df["routine1", "result1", ""]
|
||||
expected = expected.rename(("routine1", "result1"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
|
||||
index = ["h1", "h3", "h5"]
|
||||
columns = MultiIndex(
|
||||
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=["main", "sub"],
|
||||
)
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
|
||||
)
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(["h1", "h3", "h5"])
|
||||
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
|
||||
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
|
||||
result = df["A"]["A1"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame(
|
||||
[["d", 4, 4], ["e", 5, 5]],
|
||||
index=Index(["B2", "B2"], name="sub"),
|
||||
columns=["h1", "h3", "h5"],
|
||||
).T
|
||||
result = df["A"]["B2"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
|
||||
def _simple_multiindex_dataframe(data=None):
|
||||
if data is None:
|
||||
data = np.random.randn(3, 3)
|
||||
return DataFrame(
|
||||
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
|
||||
)
|
||||
|
||||
return _simple_multiindex_dataframe
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(
|
||||
lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe()
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.randn(4, 4), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs("b", drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.randn(4, 3)
|
||||
df = DataFrame(
|
||||
arr,
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
|
||||
)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
|
||||
)
|
||||
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ("Ohio", "Red")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [
|
||||
["str00", "str01"],
|
||||
["str10", "str11"],
|
||||
["str20", "srt21"],
|
||||
["str30", "str31"],
|
||||
["str40", "str41"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
|
||||
)
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, indexes, values, expected_k",
|
||||
[
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
|
||||
],
|
||||
)
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=["i", "j", "k"])
|
||||
df = df.set_index(["i", "j"])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df["k"] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,92 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_multiindex_get_loc(): # GH7724, GH2646
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
|
||||
# test indexing into a multi-index before & past the lexsort depth
|
||||
from numpy.random import randint, choice, randn
|
||||
|
||||
cols = ["jim", "joe", "jolie", "joline", "jolia"]
|
||||
|
||||
def validate(mi, df, key):
|
||||
mask = np.ones(len(df)).astype("bool")
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[: i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[: i + 1] in mi.index
|
||||
right = df[mask].copy()
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
right.drop(cols[: i + 1], axis=1, inplace=True)
|
||||
right.set_index(cols[i + 1 : -1], inplace=True)
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
right.set_index(cols[:-1], inplace=True)
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(
|
||||
right["jolia"].values, name=right.index[0], index=["jolia"]
|
||||
)
|
||||
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
def loop(mi, df, keys):
|
||||
for key in keys:
|
||||
validate(mi, df, key)
|
||||
|
||||
n, m = 1000, 50
|
||||
|
||||
vals = [
|
||||
randint(0, 10, n),
|
||||
choice(list("abcdefghij"), n),
|
||||
choice(pd.date_range("20141009", periods=10).tolist(), n),
|
||||
choice(list("ZYXWVUTSRQ"), n),
|
||||
randn(n),
|
||||
]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
|
||||
# bunch of keys for testing
|
||||
keys = [
|
||||
randint(0, 11, m),
|
||||
choice(list("abcdefghijk"), m),
|
||||
choice(pd.date_range("20141009", periods=11).tolist(), m),
|
||||
choice(list("ZYXWVUTSRQP"), m),
|
||||
]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += list(map(lambda t: t[:-1], vals[:: n // m]))
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
df = DataFrame(vals, columns=cols)
|
||||
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
for frame in a, b:
|
||||
for i in range(5): # lexsort depth
|
||||
df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i])
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index.lexsort_depth < i
|
||||
loop(mi, df, keys)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_mi_dataframe_indexing():
|
||||
# GH10645
|
||||
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
|
||||
assert not (10 ** 6, 0) in result
|
||||
@@ -0,0 +1,75 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
|
||||
class TestMultiIndexIx:
|
||||
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[("bar", "two"), "B"] = 5
|
||||
assert frame.loc[("bar", "two"), "B"] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.loc[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", FutureWarning)
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.ix[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
def test_ix_general(self):
|
||||
|
||||
# ix general issues
|
||||
|
||||
# GH 2817
|
||||
data = {
|
||||
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
"year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012},
|
||||
}
|
||||
df = DataFrame(data).set_index(keys=["col", "year"])
|
||||
key = 4.0, 2012
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
df.sort_index(inplace=True)
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be Float64Index
|
||||
index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"])
|
||||
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_ix_multiindex_missing_label_raises(self):
|
||||
# GH 21593
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.ix[2]
|
||||
|
||||
def test_series_ix_getitem_fancy(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[49:51])
|
||||
result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,392 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.randn(6, 2), index=index)
|
||||
|
||||
|
||||
class TestMultiIndexLoc:
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series(
|
||||
[], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64)
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series(
|
||||
[], index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64)
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_labels(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j"], ["X", "X", "Y"]],
|
||||
)
|
||||
|
||||
# the first 2 rows
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc["i"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# 2nd (last) column
|
||||
expected = df.iloc[:, [2]].droplevel(0, axis=1)
|
||||
result = df.loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# bottom right corner
|
||||
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
|
||||
result = df.loc["j"].loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with a tuple
|
||||
expected = df.iloc[[0, 1]]
|
||||
result = df.loc[("i", "X")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_ints(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc[4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_missing_label_raises(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
|
||||
def test_loc_multiindex_list_missing_label(self, key, pos):
|
||||
# GH 27148 - lists with missing labels do not raise:
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
expected = df.iloc[pos]
|
||||
result = df.loc[key]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_too_many_dims_raises(self):
|
||||
# GH 14885
|
||||
s = Series(
|
||||
range(8),
|
||||
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
|
||||
s.loc["a", "b"]
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
|
||||
s.loc["a", "d", "g"]
|
||||
with pytest.raises(IndexingError, match="Too many indexers"):
|
||||
s.loc["a", "d", "g", "j"]
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ["Attribute" + str(i) for i in range(1)]
|
||||
attribute_values = ["Value" + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
|
||||
)
|
||||
|
||||
expected = df.loc[([1, 2],), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.loc[:, "a":"c"]
|
||||
|
||||
result = s.loc[0:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer gettitem with list of indexers skips wrong element
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(np.random.randn(len(single_level)), index=single_level)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[t for t in itertools.product([6, 7, 8], ["a", "b"])]
|
||||
)
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[t for t in itertools.product([10, 20, 30], ["a", "b"])]
|
||||
)
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
|
||||
)
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys)
|
||||
)
|
||||
|
||||
result = df.loc[indexer, "Data"]
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, pos",
|
||||
[
|
||||
([], []), # empty ok
|
||||
(["A"], slice(3)),
|
||||
(["A", "D"], slice(3)),
|
||||
(["D", "E"], []), # no values found - fine
|
||||
(["D"], []), # same, with single item list: GH 27148
|
||||
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
|
||||
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
|
||||
)
|
||||
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
|
||||
expected = s.iloc[pos]
|
||||
result = s.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[49:51])
|
||||
result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
|
||||
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame(
|
||||
[[np.mean, np.median], ["mean", "median"]],
|
||||
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
|
||||
index=["function", "name"],
|
||||
)
|
||||
result = df.loc["function", ("functs", "mean")]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.arange(10),
|
||||
"b": np.arange(10),
|
||||
"c": np.random.randn(10),
|
||||
"d": np.random.randn(10),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
|
||||
df.loc[("bar", "three"), "B"]
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[("bar", "three"), "B"] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[("bar", "three"), "B"]
|
||||
assert result == expected
|
||||
@@ -0,0 +1,106 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as _index
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexBasic:
|
||||
def test_multiindex_perf_warn(self):
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.rand(4),
|
||||
}
|
||||
).set_index(["jim", "joe"])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning, clear=[pd.core.index]):
|
||||
df.loc[(1, "z")]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0,)]
|
||||
|
||||
def test_multiindex_contains_dropped(self):
|
||||
# GH 19027
|
||||
# test that dropped MultiIndex levels are not in the MultiIndex
|
||||
# despite continuing to be in the MultiIndex's levels
|
||||
idx = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
assert 2 in idx
|
||||
idx = idx.drop(2)
|
||||
|
||||
# drop implementation keeps 2 in the levels
|
||||
assert 2 in idx.levels[0]
|
||||
# but it should no longer be in the index itself
|
||||
assert 2 not in idx
|
||||
|
||||
# also applies to strings
|
||||
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
|
||||
assert "a" in idx
|
||||
idx = idx.drop("a")
|
||||
assert "a" in idx.levels[0]
|
||||
assert "a" not in idx
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected",
|
||||
[
|
||||
(MultiIndex.from_product([(), ()]), True),
|
||||
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
|
||||
(MultiIndex.from_product([("a", "b"), (1, 2)]), False),
|
||||
],
|
||||
)
|
||||
def test_multiindex_is_homogeneous_type(self, data, expected):
|
||||
assert data._is_homogeneous_type is expected
|
||||
|
||||
def test_indexing_over_hashtable_size_cutoff(self):
|
||||
n = 10000
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
_index._SIZE_CUTOFF = 20000
|
||||
|
||||
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
|
||||
# GH 3588
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
result = df.set_index(["a", "b"], drop=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index(["R1", "R2", np.nan, "R4"], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_contains(self):
|
||||
# GH 24570
|
||||
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
|
||||
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
|
||||
assert tx[0] in idx
|
||||
assert "element_not_exit" not in idx
|
||||
assert "0 day 09:30:00" in idx
|
||||
@@ -0,0 +1,201 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial:
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ["a", "b"]
|
||||
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2), columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(
|
||||
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
|
||||
)
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs("foo")
|
||||
result2 = frame.loc["foo"]
|
||||
expected = frame.T["foo"].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1],
|
||||
[0, 1, 0, 1, 0, 1, 0, 1],
|
||||
],
|
||||
)
|
||||
df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd"))
|
||||
|
||||
result = df.xs(["foo", "one"])
|
||||
expected = df.loc["foo", "one"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc["bar":"baz"]
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(
|
||||
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
|
||||
)
|
||||
df = DataFrame(np.random.rand(3, 2), index=idx)
|
||||
|
||||
result = df.loc[("a", "y"), :]
|
||||
expected = df.loc[("a", "y")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[("a", "y"), [1, 0]]
|
||||
expected = df.loc[("a", "y")][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", FutureWarning)
|
||||
result = df.ix[("a", "y"), [1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
|
||||
df.loc[("a", "foo"), :]
|
||||
|
||||
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.loc[2000, 4].values[:] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df["A"].loc[2000, 4] = 1
|
||||
exp["A"].loc[2000, 4].values[:] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.loc[2000].values[:] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
df["A"].iloc[14] = 5
|
||||
assert df["A"][14] == 5
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AMBIGUOUS CASES!
|
||||
|
||||
def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
pytest.skip("skipping for now")
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[2000, 0]
|
||||
expected = ymd.loc[2000]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# need to put in some work here
|
||||
|
||||
# self.ymd.loc[2000, 0] = 0
|
||||
# assert (self.ymd.loc[2000]['A'] == 0).all()
|
||||
|
||||
# Pretty sure the second (and maybe even the first) is already wrong.
|
||||
with pytest.raises(Exception):
|
||||
ymd.loc[(2000, 6)]
|
||||
with pytest.raises(Exception):
|
||||
ymd.loc[(2000, 6), 0]
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_partial_both_axis():
|
||||
# gh-12660
|
||||
iterables = [["a", "b"], [2, 1]]
|
||||
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
|
||||
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
|
||||
df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns)
|
||||
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
|
||||
result = df.loc["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,41 @@
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSetOps:
|
||||
def test_multiindex_symmetric_difference(self):
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
|
||||
result = idx ^ idx
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(["A", "B"])
|
||||
result = idx ^ idx2
|
||||
assert result.names == [None, None]
|
||||
|
||||
def test_mixed_depth_insert(self):
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(randn(4, 6), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result["b"] = [1, 2, 3, 4]
|
||||
expected["b", "", ""] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dataframe_insert_column_all_na(self):
|
||||
# GH #1534
|
||||
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df["new"] = s
|
||||
assert df["new"].isna().all()
|
||||
@@ -0,0 +1,475 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
|
||||
class TestMultiIndexSetItem:
|
||||
def test_setitem_multiindex(self):
|
||||
with catch_warnings(record=True):
|
||||
|
||||
for index_fn in ("ix", "loc"):
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
def check(target, indexers, value, compare_fn, expected=None):
|
||||
fn = getattr(target, index_fn)
|
||||
fn.__setitem__(indexers, value)
|
||||
result = fn.__getitem__(indexers)
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
|
||||
# GH7190
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
|
||||
)
|
||||
t, n = 0, 2
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
|
||||
index=index,
|
||||
)
|
||||
check(
|
||||
target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
-999,
|
||||
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
|
||||
index=index,
|
||||
)
|
||||
check(
|
||||
target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index
|
||||
)
|
||||
check(
|
||||
target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal
|
||||
)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(
|
||||
-999,
|
||||
columns=["A", "w", "l", "a", "x", "X", "d", "profit"],
|
||||
index=index,
|
||||
)
|
||||
check(
|
||||
target=df,
|
||||
indexers=((t, n), "X"),
|
||||
value=np.array(3),
|
||||
compare_fn=assert_equal,
|
||||
expected=3,
|
||||
)
|
||||
|
||||
# GH5206
|
||||
df = DataFrame(
|
||||
np.arange(25).reshape(5, 5),
|
||||
columns="A,B,C,D,E".split(","),
|
||||
dtype=float,
|
||||
)
|
||||
df["F"] = 99
|
||||
row_selection = df["A"] % 2 == 0
|
||||
col_selection = ["B", "C"]
|
||||
with catch_warnings(record=True):
|
||||
df.ix[row_selection, col_selection] = df["F"]
|
||||
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_frame_equal(df.ix[row_selection, col_selection], output)
|
||||
check(
|
||||
target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df["F"],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output,
|
||||
)
|
||||
|
||||
# GH11372
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
|
||||
)
|
||||
cols = MultiIndex.from_product(
|
||||
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols)
|
||||
|
||||
subidx = MultiIndex.from_tuples(
|
||||
[("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))]
|
||||
)
|
||||
subcols = MultiIndex.from_tuples(
|
||||
[("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))]
|
||||
)
|
||||
|
||||
vals = DataFrame(
|
||||
np.random.random((2, 2)), index=subidx, columns=subcols
|
||||
)
|
||||
check(
|
||||
target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# set all columns
|
||||
vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols)
|
||||
check(
|
||||
target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# identity
|
||||
copy = df.copy()
|
||||
check(
|
||||
target=df,
|
||||
indexers=(df.index, df.columns),
|
||||
value=df,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=copy,
|
||||
)
|
||||
|
||||
def test_multiindex_setitem(self):
|
||||
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [
|
||||
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
|
||||
np.array(["one", "two", "one", "one", "two", "one"]),
|
||||
np.arange(0, 6, 1),
|
||||
]
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"]
|
||||
).sort_index()
|
||||
|
||||
expected = df_orig.loc[["bar"]] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[["bar"]] *= 2
|
||||
tm.assert_frame_equal(df.loc[["bar"]], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
with pytest.raises(TypeError):
|
||||
df.loc["bar"] *= 2
|
||||
|
||||
# from SO
|
||||
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict(
|
||||
{
|
||||
"price": {
|
||||
("DE", "Coal", "Stock"): 2,
|
||||
("DE", "Gas", "Stock"): 4,
|
||||
("DE", "Elec", "Demand"): 1,
|
||||
("FR", "Gas", "Stock"): 5,
|
||||
("FR", "Solar", "SupIm"): 0,
|
||||
("FR", "Wind", "SupIm"): 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
df_orig.index = MultiIndex.from_tuples(
|
||||
df_orig.index, names=["Sit", "Com", "Type"]
|
||||
)
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 2, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], "price"] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(
|
||||
np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
df["d"] = np.nan
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, "d"] = arr
|
||||
tm.assert_series_equal(df.ix[4, "d"], Series(arr, index=[8, 10], name="d"))
|
||||
|
||||
# single dtype
|
||||
df = DataFrame(
|
||||
np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, "c"] = arr
|
||||
exp = Series(arr, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.ix[4, "c"], exp)
|
||||
|
||||
# scalar ok
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, "c"] = 10
|
||||
exp = Series(10, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.ix[4, "c"], exp)
|
||||
|
||||
# invalid assignments
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, "c"] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, "c"] = [0]
|
||||
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64,
|
||||
columns=col_names,
|
||||
)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df["new_col"] = np.nan
|
||||
|
||||
f_index = np.arange(5)
|
||||
|
||||
def f(name, df2):
|
||||
return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex(
|
||||
f_index
|
||||
)
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
with catch_warnings(record=True):
|
||||
df.ix[name, "new_col"] = new_vals
|
||||
|
||||
def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s[49])
|
||||
|
||||
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match="boolean values only"):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [["t1", "t2"], ["a", "b", "c"]]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
|
||||
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", FutureWarning)
|
||||
result = df.ix[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
result = df.loc[df.index[1:3], "value"]
|
||||
tm.assert_series_equal(df["value"][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, "value"] = 10
|
||||
result["value"] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]])
|
||||
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"]
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"].values
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# ---------------------------------------
|
||||
# #1803
|
||||
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df["A"] = 0.0 # Doesn't work
|
||||
assert (df["A"].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df["B", "1"] = [1, 2, 3]
|
||||
df["A"] = df["B", "1"]
|
||||
|
||||
sliced_a1 = df["A", "1"]
|
||||
sliced_a2 = df["A", "2"]
|
||||
sliced_b1 = df["B", "1"]
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ("A", "1")
|
||||
assert sliced_a2.name == ("A", "2")
|
||||
assert sliced_b1.name == ("B", "1")
|
||||
|
||||
def test_getitem_setitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
|
||||
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_setitem_slice_integers(self):
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
frame = DataFrame(
|
||||
np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"]
|
||||
)
|
||||
res = frame.loc[1:2]
|
||||
exp = frame.reindex(frame.index[2:])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
frame.loc[1:2] = 7
|
||||
assert (frame.loc[1:2] == 7).values.all()
|
||||
|
||||
series = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
res = series.loc[1:2]
|
||||
exp = series.reindex(series.index[2:])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
series.loc[1:2] = 7
|
||||
assert (series.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft["foo", "two"]
|
||||
dft["foo", "two"] = s > s.median()
|
||||
tm.assert_series_equal(dft["foo", "two"], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[("foo", "two")])
|
||||
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
col = frame["B"]
|
||||
col[subset] = 97
|
||||
assert (frame.loc[subset, "B"] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame(
|
||||
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
|
||||
)
|
||||
|
||||
df = df.set_index(["A", "B"])
|
||||
ix = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[ix, "C"] = "_"
|
||||
|
||||
assert (df.xs((1, 1))["C"] == "_").all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df["A"] = df["A"].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
|
||||
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,707 @@
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
|
||||
from pandas.core.indexing import _non_reducing_slice
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
|
||||
class TestMultiIndexSlicers:
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product(
|
||||
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
|
||||
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3")
|
||||
and (c == "C1" or c == "C2" or c == "C3")
|
||||
]
|
||||
]
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ["foo"])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["A", "a"]
|
||||
expected = DataFrame(
|
||||
dict(bar=[1, 5, 9], foo=[0, 4, 8]),
|
||||
index=Index([1, 2, 3], name="two"),
|
||||
columns=Index(["bar", "foo"], name="lvl1"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.to_numpy())), index=ix)
|
||||
result = s.loc["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = s.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in s.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
# ambiguous notation
|
||||
# this is interpreted as slicing on both axes (GH #16396)
|
||||
result = df.loc[slice(None), [1]]
|
||||
expected = df.iloc[:, []]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not lexsorted
|
||||
assert df.index.lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index.lexsort_depth == 0
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be "
|
||||
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice("bar")), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (
|
||||
DataFrame(
|
||||
dict(
|
||||
A=["foo", "foo", "foo", "foo"],
|
||||
B=["a", "a", "a", "a"],
|
||||
C=[1, 2, 1, 3],
|
||||
D=[1, 2, 3, 4],
|
||||
)
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3]))
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
dict(
|
||||
A=["foo", "foo", "foo", "foo"],
|
||||
B=["a", "a", "a", "a"],
|
||||
C=[1, 2, 1, 2],
|
||||
D=[1, 2, 3, 4],
|
||||
)
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3]))
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
200000,
|
||||
200000,
|
||||
]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([["a"] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
import datetime
|
||||
|
||||
dates = [
|
||||
datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i)
|
||||
for i in range(6)
|
||||
]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
|
||||
index=index,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
slice(1, 1),
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
idx[
|
||||
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
|
||||
],
|
||||
idx[1:1],
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
1,
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[
|
||||
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
|
||||
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
|
||||
"DATE": [
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
"2013-10-01",
|
||||
"2013-07-09",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
],
|
||||
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
|
||||
}
|
||||
)
|
||||
|
||||
df["DATE"] = pd.to_datetime(df["DATE"])
|
||||
df1 = df.set_index(["A", "B", "DATE"])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice("A1")), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice("A2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice("B1", "B2")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice("A2"), slice("B0")), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice("B2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
df.loc["A1", ("a", slice("foo"))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(
|
||||
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
|
||||
)
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc["A1", (slice(None), "foo")]
|
||||
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = (
|
||||
DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
.sort_index()
|
||||
.sort_index(axis=1)
|
||||
)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="columns")[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=-1)[:, :, ["C1", "C3"]]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=2)[:, :, ["C1", "C3"]]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis="foo")[:, :, ["C1", "C3"]]
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["A", "a"] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype="int64"
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100], [100, 100]], dtype="int64"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[100, 100, 100, 100], dtype="int64"
|
||||
)
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
|
||||
(slice(None), 1), (slice(None), ["foo"])
|
||||
]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
|
||||
rhs.loc[:, ("c", "bah")] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
s = Series(
|
||||
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
|
||||
)
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
|
||||
tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[::-1], SLC[::-1])
|
||||
|
||||
assert_slices_equivalent(SLC["d"::-1], SLC[15::-1])
|
||||
assert_slices_equivalent(SLC[("d",)::-1], SLC[15::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[:"d":-1], SLC[:11:-1])
|
||||
assert_slices_equivalent(SLC[:("d",):-1], SLC[:11:-1])
|
||||
|
||||
assert_slices_equivalent(SLC["d":"b":-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[("d",):"b":-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC["d":("b",):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[("d",):("b",):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC["b":"d":-1], SLC[:0])
|
||||
|
||||
assert_slices_equivalent(SLC[("c", 2)::-1], SLC[10::-1])
|
||||
assert_slices_equivalent(SLC[:("c", 2):-1], SLC[:9:-1])
|
||||
assert_slices_equivalent(SLC[("e", 0):("c", 2):-1], SLC[16:9:-1])
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ["a", "b", "c", "d"]
|
||||
idx = MultiIndex.from_product([freq, np.arange(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc["a"]
|
||||
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc["d"]
|
||||
expected = DataFrame(
|
||||
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
exp = ymd["A"].copy()
|
||||
s[5:] = 0
|
||||
exp.values[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_non_reducing_slice_on_multiindex(self):
|
||||
# GH 19861
|
||||
dic = {
|
||||
("a", "d"): [1, 4],
|
||||
("a", "c"): [2, 3],
|
||||
("b", "c"): [3, 2],
|
||||
("b", "d"): [4, 1],
|
||||
}
|
||||
df = pd.DataFrame(dic, index=[0, 1])
|
||||
idx = pd.IndexSlice
|
||||
slice_ = idx[:, idx["b", "d"]]
|
||||
tslice_ = _non_reducing_slice(slice_)
|
||||
|
||||
result = df.loc[tslice_]
|
||||
expected = pd.DataFrame({("b", "d"): [4, 1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,97 @@
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted:
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame(
|
||||
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
|
||||
)
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
|
||||
xp = Series(["x"], index=xp_idx, name="data")
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, : np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted2(self):
|
||||
# 13431
|
||||
df = DataFrame(
|
||||
{
|
||||
"col1": ["b", "d", "b", "a"],
|
||||
"col2": [3, 1, 1, 2],
|
||||
"data": ["one", "two", "three", "four"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.set_index(["col1", "col2"])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index.set_levels(["b", "d", "a"], level="col1", inplace=True)
|
||||
df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True)
|
||||
assert not df2.index.is_lexsorted()
|
||||
assert not df2.index.is_monotonic
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index()
|
||||
assert expected.index.is_lexsorted()
|
||||
assert expected.index.is_monotonic
|
||||
|
||||
result = df2.sort_index(level=0)
|
||||
assert result.index.is_lexsorted()
|
||||
assert result.index.is_monotonic
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df["foo", "four"] = "foo"
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df["foo"]
|
||||
result2 = df.loc[:, "foo"]
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs("foo")
|
||||
result2 = df.loc["foo"]
|
||||
expected = df.reindex(df.index[arrays[0] == "foo"])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(randn(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s["qux"]
|
||||
result2 = s.loc["qux"]
|
||||
expected = s[arrays[0] == "qux"]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
@@ -0,0 +1,249 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array(
|
||||
[
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
|
||||
]
|
||||
)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=["one", "two", "three", "four"],
|
||||
)
|
||||
return DataFrame(arr, index=index, columns=list("ABCDE"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key, level, exp_arr, exp_index",
|
||||
[
|
||||
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
|
||||
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
|
||||
],
|
||||
)
|
||||
def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index):
|
||||
# see gh-2903
|
||||
arr = np.random.randn(4, 4)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_values(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two")).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_loc_equality(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two"))
|
||||
expected = df.loc[("bar", "two")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_missing_values_in_index():
|
||||
# see gh-6574
|
||||
# missing values in returned index should be preserved
|
||||
acc = [
|
||||
("a", "abcde", 1),
|
||||
("b", "bbcde", 2),
|
||||
("y", "yzcde", 25),
|
||||
("z", "xbcde", 24),
|
||||
("z", None, 26),
|
||||
("z", "zbcde", 25),
|
||||
("z", "ybcde", 26),
|
||||
]
|
||||
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
|
||||
expected = DataFrame(
|
||||
{"cnt": [24, 26, 25, 26]},
|
||||
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
|
||||
)
|
||||
|
||||
result = df.xs("z", level="a1")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
|
||||
def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data):
|
||||
# see gh-13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs("one", level="second")] * 2)
|
||||
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
expected = df[df.index.get_level_values(1) == "two"]
|
||||
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_eq_2():
|
||||
arr = np.random.randn(3, 5)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
|
||||
)
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
|
||||
result = df.xs("c", level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
lambda df: df.xs(("a", 4), level=["one", "four"]),
|
||||
lambda df: df.xs("a").xs(4, level="four"),
|
||||
],
|
||||
)
|
||||
def test_xs_level_multiple(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
|
||||
)
|
||||
expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE"))
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error(multiindex_dataframe_random_data):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
result = df.xs(("a", 4), level=["one", "four"])
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_integer_key():
|
||||
# see gh-2107
|
||||
dates = range(20111201, 20111205)
|
||||
ids = "abcde"
|
||||
index = MultiIndex.from_tuples(
|
||||
[x for x in product(dates, ids)], names=["date", "secid"]
|
||||
)
|
||||
df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"])
|
||||
|
||||
result = df.xs(20111201, level="date")
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
|
||||
)
|
||||
def test_xs_level0(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=["two", "three", "four"],
|
||||
)
|
||||
expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE"))
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series(multiindex_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_dataframe_random_data
|
||||
s = df["A"]
|
||||
result = s[:, "two"]
|
||||
expected = df.xs("two", level=1)["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df["A"]
|
||||
result = s[2000, 5]
|
||||
expected = df.loc[2000, 5]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df["A"]
|
||||
|
||||
msg = r"\(2000, slice\(3, 4, None\)\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[2000, 3:4]
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs():
|
||||
# GH6258
|
||||
dt = list(date_range("20130903", periods=3))
|
||||
idx = MultiIndex.from_product([list("AB"), dt])
|
||||
s = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list("AB"))
|
||||
|
||||
result = s.xs("20130903", level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label():
|
||||
# GH5684
|
||||
idx = MultiIndex.from_tuples(
|
||||
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
|
||||
)
|
||||
s = Series([1, 2, 3, 4], index=idx)
|
||||
s.index.set_names(["L1", "L2"], inplace=True)
|
||||
expected = Series([1, 3], index=["a", "b"])
|
||||
expected.index.set_names(["L1"], inplace=True)
|
||||
|
||||
result = s.xs("one", level="L2")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,254 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIndexingCallable:
|
||||
def test_frame_loc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
|
||||
# iloc cannot use boolean Series (see GH3635)
|
||||
|
||||
# return bool indexer
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2,] # noqa: E231
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231
|
||||
|
||||
res = df.loc[lambda x: x.A > 2,] # noqa: E231
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231
|
||||
|
||||
res = df.loc[lambda x: x.B == "b", :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == "b", :])
|
||||
|
||||
res = df.loc[lambda x: x.B == "b", :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == "b", :])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: "B"]
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: "B"]
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
|
||||
|
||||
# scalar
|
||||
res = df.loc[lambda x: 1, lambda x: "A"]
|
||||
assert res == df.loc[1, "A"]
|
||||
|
||||
res = df.loc[lambda x: 1, lambda x: "A"]
|
||||
assert res == df.loc[1, "A"]
|
||||
|
||||
def test_frame_loc_callable_mixture(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ["A", "B"]]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
|
||||
|
||||
res = df.loc[3, lambda x: ["A", "B"]]
|
||||
tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
|
||||
|
||||
res = df.loc[3, lambda x: ["A", "B"]]
|
||||
tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
|
||||
|
||||
def test_frame_loc_callable_labels(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
|
||||
|
||||
# return label
|
||||
res = df.loc[lambda x: ["A", "C"]]
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"]])
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"],] # noqa: E231
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"], :]
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"], :])
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"], lambda x: "X"]
|
||||
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]]
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
|
||||
|
||||
# mixture
|
||||
res = df.loc[["A", "C"], lambda x: "X"]
|
||||
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
|
||||
|
||||
res = df.loc[["A", "C"], lambda x: ["X"]]
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"], "X"]
|
||||
tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
|
||||
|
||||
res = df.loc[lambda x: ["A", "C"], ["X"]]
|
||||
tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
|
||||
|
||||
def test_frame_loc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
|
||||
|
||||
# return label
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"]] = -20
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"]] = -20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"], :] = 20
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], :] = 20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], "X"] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10]
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], ["X"]] = [5, 10]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2])
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], "X"] = np.array([-1, -2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[["A", "C"], lambda x: ["X"]] = 10
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], ["X"]] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"], "X"] = -2
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], "X"] = -2
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ["A", "C"], ["X"]] = -4
|
||||
exp = df.copy()
|
||||
exp.loc[["A", "C"], ["X"]] = -4
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_frame_iloc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
|
||||
|
||||
# return location
|
||||
res = df.iloc[lambda x: [1, 3]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], :]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], :])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
# mixture
|
||||
res = df.iloc[[1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[[1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
def test_frame_iloc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
|
||||
|
||||
# return location
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3]] = 0
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3]] = 0
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], :] = -1
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], :] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: 0] = 5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = 25
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: 0] = -3
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = -3
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: [0]] = -5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = -5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], 0] = 10
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = [-5, -5]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
@@ -0,0 +1,760 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
from pandas.api.types import CategoricalDtype as CDT
|
||||
from pandas.util import testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
def setup_method(self, method):
|
||||
|
||||
self.df = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
"B": Series(list("aabbca")).astype(CDT(list("cab"))),
|
||||
}
|
||||
).set_index("B")
|
||||
self.df2 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
"B": Series(list("aabbca")).astype(CDT(list("cabe"))),
|
||||
}
|
||||
).set_index("B")
|
||||
self.df3 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
"B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))),
|
||||
}
|
||||
).set_index("B")
|
||||
self.df4 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
"B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
|
||||
}
|
||||
).set_index("B")
|
||||
|
||||
def test_loc_scalar(self):
|
||||
result = self.df.loc["a"]
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df.copy()
|
||||
df.loc["a"] = 20
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20],
|
||||
"B": (Series(list("aabbca")).astype(CDT(list("cab")))),
|
||||
}
|
||||
).set_index("B")
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
with pytest.raises(KeyError, match=r"^'d'$"):
|
||||
df.loc["d"]
|
||||
|
||||
msg = "cannot append a non-category item to a CategoricalIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["d"] = 10
|
||||
|
||||
msg = (
|
||||
"cannot insert an item into a CategoricalIndex that is not"
|
||||
" already an existing category"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["d", "A"] = 10
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["d", "C"] = 10
|
||||
|
||||
def test_getitem_scalar(self):
|
||||
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
s = Series([1, 2], index=cats)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_slicing_directly(self):
|
||||
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||||
sliced = cat[3]
|
||||
assert sliced == "d"
|
||||
sliced = cat[3:5]
|
||||
expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
|
||||
tm.assert_index_equal(sliced.categories, expected.categories)
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reversed = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reversed.__array__(), exp)
|
||||
|
||||
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
|
||||
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
{"value": np.arange(11, 21).astype("int64")},
|
||||
index=np.arange(10, 20).astype("int64"),
|
||||
)
|
||||
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name="cats")
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# ix
|
||||
# frame
|
||||
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
|
||||
# GH 7918
|
||||
cats = Categorical(
|
||||
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
|
||||
"values": [2, 2],
|
||||
},
|
||||
index=["j", "k"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(["category", "int64"], ["cats", "values"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
|
||||
index=["h", "i", "j"],
|
||||
name="cats",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
|
||||
index=["h", "i", "j"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_category_type(self):
|
||||
# GH 14580
|
||||
# test iloc() on Series with Categorical data
|
||||
|
||||
s = Series([1, 2, 3]).astype("category")
|
||||
|
||||
# get slice
|
||||
result = s.iloc[0:2]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get list of indexes
|
||||
result = s.iloc[[0, 1]]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get boolean array
|
||||
result = s.iloc[[True, False, False]]
|
||||
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_listlike(self):
|
||||
|
||||
# list of labels
|
||||
result = self.df.loc[["c", "a"]]
|
||||
expected = self.df.iloc[[4, 0, 1, 5]]
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=r"^'e'$"):
|
||||
self.df2.loc["e"]
|
||||
|
||||
# assign is ok
|
||||
df = self.df2.copy()
|
||||
df.loc["e"] = 20
|
||||
result = df.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df2.copy()
|
||||
result = df.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# not all labels in the categories
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match="'a list-indexer must only include values that are in the"
|
||||
" categories'",
|
||||
):
|
||||
self.df2.loc[["a", "d"]]
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(["a", "b", "c"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
|
||||
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = "a list-indexer must only include values that are in the categories"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(["a", "b", "a"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = "a list-indexer must only include values that are in the categories"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
# contains unused category
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
res = df.loc[["a", "e"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, np.nan], "B": [5, 7, np.nan]},
|
||||
index=CategoricalIndex(["a", "a", "e"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = "a list-indexer must only include values that are in the categories"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array(
|
||||
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
||||
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_getitem_with_listlike(self):
|
||||
# GH 16115
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
expected = DataFrame(
|
||||
[[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
|
||||
)
|
||||
dummies = pd.get_dummies(cats)
|
||||
result = dummies[[c for c in dummies.columns]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(
|
||||
np.random.randint(0, 5, size=150000).astype(np.int8)
|
||||
).add_categories([-1000])
|
||||
indexer = np.array([100000]).astype(np.int64)
|
||||
c[indexer] = -1000
|
||||
|
||||
# we are asserting the code result here
|
||||
# which maps to the -1000 category
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ"))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
|
||||
assert_series_equal(cdf.loc["A", :], expect)
|
||||
|
||||
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
|
||||
assert_series_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
|
||||
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
|
||||
assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
|
||||
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
# non-unique
|
||||
df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX"))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
|
||||
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
|
||||
assert_frame_equal(cdf.loc["A", :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
|
||||
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[["A", "B"], :],
|
||||
columns=cdf.columns,
|
||||
index=CategoricalIndex(list("AAB")),
|
||||
)
|
||||
assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[:, ["X", "Y"]],
|
||||
index=cdf.index,
|
||||
columns=CategoricalIndex(list("XXY")),
|
||||
)
|
||||
assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
def test_read_only_source(self):
|
||||
# GH 10043
|
||||
rw_array = np.eye(10)
|
||||
rw_df = DataFrame(rw_array)
|
||||
|
||||
ro_array = np.eye(10)
|
||||
ro_array.setflags(write=False)
|
||||
ro_df = DataFrame(ro_array)
|
||||
|
||||
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
|
||||
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
|
||||
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
|
||||
|
||||
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
|
||||
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
|
||||
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
|
||||
|
||||
def test_reindexing(self):
|
||||
|
||||
# reindexing
|
||||
# convert to a regular index
|
||||
result = self.df2.reindex(["a", "b", "e"])
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["e"])
|
||||
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["d"])
|
||||
expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# since we are actually reindexing with a Categorical
|
||||
# then return a Categorical
|
||||
cats = list("cabe")
|
||||
|
||||
result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(["a"], categories=cats))
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["a", "b", "e"])
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(["e"])
|
||||
expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# give back the type of categorical that we received
|
||||
result = self.df2.reindex(
|
||||
Categorical(["a", "d"], categories=cats, ordered=True)
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [0, 1, 5, np.nan],
|
||||
"B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
|
||||
}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
|
||||
expected = DataFrame(
|
||||
{"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
|
||||
).set_index("B")
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# passed duplicate indexers are not allowed
|
||||
msg = "cannot reindex with a non-unique indexer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.df2.reindex(["a", "a"])
|
||||
|
||||
# args NotImplemented ATM
|
||||
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
|
||||
with pytest.raises(NotImplementedError, match=msg.format("method")):
|
||||
self.df2.reindex(["a"], method="ffill")
|
||||
with pytest.raises(NotImplementedError, match=msg.format("level")):
|
||||
self.df2.reindex(["a"], level=1)
|
||||
with pytest.raises(NotImplementedError, match=msg.format("limit")):
|
||||
self.df2.reindex(["a"], limit=2)
|
||||
|
||||
def test_loc_slice(self):
|
||||
# slicing
|
||||
# not implemented ATM
|
||||
# GH9748
|
||||
|
||||
msg = (
|
||||
"cannot do slice indexing on {klass} with these "
|
||||
r"indexers \[1\] of {kind}".format(
|
||||
klass=str(CategoricalIndex), kind=str(int)
|
||||
)
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.df.loc[1:5]
|
||||
|
||||
# result = df.loc[1:5]
|
||||
# expected = df.iloc[[1,2,3,4]]
|
||||
# assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_and_at_with_categorical_index(self):
|
||||
# GH 20629
|
||||
s = Series([1, 2, 3], index=pd.CategoricalIndex(["A", "B", "C"]))
|
||||
assert s.loc["A"] == 1
|
||||
assert s.at["A"] == 1
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6]], index=pd.CategoricalIndex(["A", "B", "C"])
|
||||
)
|
||||
assert df.loc["B", 1] == 4
|
||||
assert df.at["B", 1] == 4
|
||||
|
||||
def test_boolean_selection(self):
|
||||
|
||||
df3 = self.df3
|
||||
df4 = self.df4
|
||||
|
||||
result = df3[df3.index == "a"]
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == "a"]
|
||||
expected = df4.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name='B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name='B')
|
||||
msg = "Unordered Categoricals can only compare equality or not"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index < 2]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index > 1]
|
||||
|
||||
def test_indexing_with_category(self):
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/12564
|
||||
# consistent result if comparing as Dataframe
|
||||
|
||||
cat = DataFrame({"A": ["foo", "bar", "baz"]})
|
||||
exp = DataFrame({"A": [True, False, False]})
|
||||
|
||||
res = cat[["A"]] == "foo"
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
cat["A"] = cat["A"].astype("category")
|
||||
|
||||
res = cat[["A"]] == "foo"
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_map_with_dict_or_series(self):
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cur_index = pd.CategoricalIndex(orig_values, name="XXX")
|
||||
expected = pd.CategoricalIndex(
|
||||
new_values, name="XXX", categories=[3.0, 2, "one"]
|
||||
)
|
||||
|
||||
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
|
||||
mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])}
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
@@ -0,0 +1,406 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp, date_range, option_context
|
||||
from pandas.core import common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestCaching:
|
||||
def test_slice_consolidate_invalidate_item_cache(self):
|
||||
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context("chained_assignment", None):
|
||||
|
||||
# #3970
|
||||
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# repr machinery triggers consolidation
|
||||
repr(df)
|
||||
|
||||
# Assignment to wrong series
|
||||
df["bb"].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
tm.assert_almost_equal(df["bb"][0], 0.17)
|
||||
|
||||
def test_setitem_cache_updating(self):
|
||||
# GH 5424
|
||||
cont = ["one", "two", "three", "four", "five", "six", "seven"]
|
||||
|
||||
for do_ref in [False, False]:
|
||||
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, "c"] = 1
|
||||
|
||||
assert df.loc[0, "c"] == 0.0
|
||||
assert df.loc[7, "c"] == 1.0
|
||||
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame(
|
||||
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
|
||||
)
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp("5/7/2014")
|
||||
eix = Timestamp("5/9/2014")
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row["C"]][six:eix] + row["D"]
|
||||
out[row["C"]][six:eix] = v
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] += row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
|
||||
class TestChaining:
|
||||
def test_setitem_chained_setfault(self):
|
||||
|
||||
# GH6026
|
||||
data = ["right", "left", "left", "left", "right", "left", "timeout"]
|
||||
mdata = ["right", "left", "left", "left", "right", "left", "none"]
|
||||
|
||||
df = DataFrame({"response": np.array(data)})
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=["response"])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
df = DataFrame({"response": data, "response1": data})
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame(dict(A=[np.nan, "bar", "bah", "foo", "bar"]))
|
||||
df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"])))
|
||||
df["A"].iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"])))
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_detect_chained_assignment(self):
|
||||
|
||||
pd.set_option("chained_assignment", "raise")
|
||||
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
|
||||
df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64")
|
||||
assert df._is_copy is None
|
||||
|
||||
df["A"][0] = -5
|
||||
df["A"][1] = -6
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# test with the chaining
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df["A"][0] = -5
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df["A"][1] = np.nan
|
||||
|
||||
assert df["A"]._is_copy is None
|
||||
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]["A"] = -5
|
||||
|
||||
# Doc example
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["one", "one", "two", "three", "two", "one", "six"],
|
||||
"c": Series(range(7), dtype="int64"),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
indexer = df.a.str.startswith("o")
|
||||
df[indexer]["c"] = 42
|
||||
|
||||
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df["A"][0] = 111
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
df.loc[0, "A"] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean("__tmp__pickle") as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
from string import ascii_letters as letters
|
||||
|
||||
def random_text(nobs=100):
|
||||
df = []
|
||||
for i in range(nobs):
|
||||
idx = np.random.randint(len(letters), size=2)
|
||||
idx.sort()
|
||||
|
||||
df.append([letters[idx[0] : idx[1]]])
|
||||
|
||||
return DataFrame(df, columns=["letters"])
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, "letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# Should be ok even though it's a copy!
|
||||
assert df._is_copy is None
|
||||
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
|
||||
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({"a": [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df["a"] += 1
|
||||
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(s, df[0].sort_values())
|
||||
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "b"
|
||||
str(df)
|
||||
|
||||
df = df[df["column2"] != 8]
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "c"
|
||||
str(df)
|
||||
|
||||
# from SO:
|
||||
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=["count"])
|
||||
df["group"] = "b"
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(
|
||||
dict(
|
||||
A=date_range("20130101", periods=5),
|
||||
B=np.random.randn(5),
|
||||
C=np.arange(5, dtype="int64"),
|
||||
D=list("abcde"),
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]["D"] = "foo"
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]["C"] = "foo"
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df["C"][2] = "foo"
|
||||
|
||||
def test_setting_with_copy_bug(self):
|
||||
|
||||
# operating on a copy
|
||||
df = DataFrame(
|
||||
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
||||
)
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
|
||||
df2 = df1[["x"]]
|
||||
|
||||
# this should not raise
|
||||
df2["y"] = ["g", "h", "i"]
|
||||
|
||||
def test_detect_chained_assignment_warnings(self):
|
||||
with option_context("chained_assignment", "warn"):
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
|
||||
# xref gh-13017.
|
||||
with option_context("chained_assignment", "warn"):
|
||||
df = pd.DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.c.loc[df.c > 0] = None
|
||||
|
||||
expected = pd.DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_chained_getitem_with_lists(self):
|
||||
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
def check(result, expected):
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
assert isinstance(result, np.ndarray)
|
||||
|
||||
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
|
||||
expected = df["A"].iloc[2]
|
||||
result = df.loc[2, "A"]
|
||||
check(result, expected)
|
||||
result2 = df.iloc[2]["A"]
|
||||
check(result2, expected)
|
||||
result3 = df["A"].loc[2]
|
||||
check(result3, expected)
|
||||
result4 = df["A"].iloc[2]
|
||||
check(result4, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = tm.makeDataFrame()
|
||||
df["A"] # cache series
|
||||
df.ix["Hello Friend"] = df.ix[0]
|
||||
assert "Hello Friend" in df["A"].index
|
||||
assert "Hello Friend" in df["B"].index
|
||||
|
||||
# 10264
|
||||
df = DataFrame(
|
||||
np.zeros((5, 5), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e"],
|
||||
index=range(5),
|
||||
)
|
||||
df["f"] = 0
|
||||
df.f.values[3] = 1
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# y = df.iloc[np.arange(2, len(df))]
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 6), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e", "f"],
|
||||
index=range(5),
|
||||
)
|
||||
expected.at[3, "f"] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name="f")
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_deprecate_is_copy(self):
|
||||
# GH18801
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# getter
|
||||
df.is_copy
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# setter
|
||||
df.is_copy = "test deprecated is_copy"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,358 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from dateutil import tz
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, Timestamp, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_setitem_with_datetime_tz(self):
|
||||
# 16889
|
||||
# support .loc with alignment and tz-aware DatetimeIndex
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
idx = date_range("20010101", periods=4, tz="UTC")
|
||||
df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
idx = date_range("20010101", periods=4)
|
||||
df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
|
||||
# GH#8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
|
||||
dr = date_range("20130110", periods=3)
|
||||
df = DataFrame({"A": idx, "B": dr})
|
||||
df["C"] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
expected = Series(
|
||||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
|
||||
index=list("ABC"),
|
||||
dtype="object",
|
||||
name=1,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
expected = Series(
|
||||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
|
||||
index=list("ABC"),
|
||||
dtype="object",
|
||||
name=1,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
|
||||
result = df.iloc[5]
|
||||
expected = Series(
|
||||
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# indexing - setting an element
|
||||
df = DataFrame(
|
||||
data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]),
|
||||
columns=["time"],
|
||||
)
|
||||
df["new_col"] = ["new", "old"]
|
||||
df.time = df.set_index("time").index.tz_localize("UTC")
|
||||
v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific")
|
||||
|
||||
# trying to set a single element on a part of a different timezone
|
||||
# this converts to object
|
||||
df2 = df.copy()
|
||||
df2.loc[df2.new_col == "new", "time"] = v
|
||||
|
||||
expected = Series([v[0], df.loc[1, "time"]], name="time")
|
||||
tm.assert_series_equal(df2.time, expected)
|
||||
|
||||
v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s")
|
||||
df.loc[df.new_col == "new", "time"] = v
|
||||
tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self):
|
||||
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range("2015-01-01", periods=2, tz="utc")
|
||||
|
||||
ser = Series(range(2), index=index, dtype="int64")
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
tm.assert_series_equal(ser[sel], ser)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
tm.assert_series_equal(ser.loc[sel], ser)
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert ser[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
assert ser.loc[index[1]] == 1
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_partial_setting_with_datetimelike_dtype(self):
|
||||
|
||||
# GH9478
|
||||
# a datetimeindex alignment issue with partial setting
|
||||
df = DataFrame(
|
||||
np.arange(6.0).reshape(3, 2),
|
||||
columns=list("AB"),
|
||||
index=date_range("1/1/2000", periods=3, freq="1H"),
|
||||
)
|
||||
expected = df.copy()
|
||||
expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT]
|
||||
|
||||
mask = df.A < 1
|
||||
df.loc[mask, "C"] = df.loc[mask].index
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_datetime(self):
|
||||
|
||||
# GH 9516
|
||||
dt1 = Timestamp("20130101 09:00:00")
|
||||
dt2 = Timestamp("20130101 10:00:00")
|
||||
|
||||
for conv in [
|
||||
lambda x: x,
|
||||
lambda x: x.to_datetime64(),
|
||||
lambda x: x.to_pydatetime(),
|
||||
lambda x: np.datetime64(x),
|
||||
]:
|
||||
|
||||
df = DataFrame()
|
||||
df.loc[conv(dt1), "one"] = 100
|
||||
df.loc[conv(dt2), "one"] = 200
|
||||
|
||||
expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set_datetime(self):
|
||||
# GH 11497
|
||||
|
||||
idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
result = ser.loc[[Timestamp("2011-01-01"), Timestamp("2011-01-02")]]
|
||||
exp = Series([0.1, 0.2], index=idx, name="s")
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [
|
||||
Timestamp("2011-01-02"),
|
||||
Timestamp("2011-01-02"),
|
||||
Timestamp("2011-01-01"),
|
||||
]
|
||||
exp = Series(
|
||||
[0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s"
|
||||
)
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [
|
||||
Timestamp("2011-01-03"),
|
||||
Timestamp("2011-01-02"),
|
||||
Timestamp("2011-01-03"),
|
||||
]
|
||||
exp = Series(
|
||||
[np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_period(self):
|
||||
# GH 11497
|
||||
|
||||
idx = pd.period_range("2011-01-01", "2011-01-02", freq="D", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
result = ser.loc[
|
||||
[pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-02", freq="D")]
|
||||
]
|
||||
exp = Series([0.1, 0.2], index=idx, name="s")
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [
|
||||
pd.Period("2011-01-02", freq="D"),
|
||||
pd.Period("2011-01-02", freq="D"),
|
||||
pd.Period("2011-01-01", freq="D"),
|
||||
]
|
||||
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s")
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [
|
||||
pd.Period("2011-01-03", freq="D"),
|
||||
pd.Period("2011-01-02", freq="D"),
|
||||
pd.Period("2011-01-03", freq="D"),
|
||||
]
|
||||
exp = Series(
|
||||
[np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[keys]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_nanosecond_getitem_setitem_with_tz(self):
|
||||
# GH 11679
|
||||
data = ["2016-06-28 08:30:00.123456789"]
|
||||
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
|
||||
df = DataFrame({"a": [10]}, index=index)
|
||||
result = df.loc[df.index[0]]
|
||||
expected = Series(10, index=["a"], name=df.index[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[df.index[0], "a"] = -1
|
||||
expected = DataFrame(-1, index=index, columns=["a"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_across_dst(self):
|
||||
# GH 21846
|
||||
idx = pd.date_range(
|
||||
"2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
|
||||
)
|
||||
series2 = pd.Series([0, 1, 2, 3, 4], index=idx)
|
||||
|
||||
t_1 = pd.Timestamp(
|
||||
"2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min"
|
||||
)
|
||||
t_2 = pd.Timestamp(
|
||||
"2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min"
|
||||
)
|
||||
result = series2.loc[t_1:t_2]
|
||||
expected = pd.Series([2, 3], index=idx[2:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series2[t_1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
def test_loc_incremental_setitem_with_dst(self):
|
||||
# GH 20724
|
||||
base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
|
||||
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
|
||||
result = pd.Series([0], index=[idxs[0]])
|
||||
for ts in idxs:
|
||||
result.loc[ts] = 1
|
||||
expected = pd.Series(1, index=idxs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_setitem_with_existing_dst(self):
|
||||
# GH 18308
|
||||
start = pd.Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
|
||||
end = pd.Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
|
||||
ts = pd.Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
|
||||
idx = pd.date_range(start, end, closed="left", freq="H")
|
||||
result = pd.DataFrame(index=idx, columns=["value"])
|
||||
result.loc[ts, "value"] = 12
|
||||
expected = pd.DataFrame(
|
||||
[np.nan] * len(idx) + [12],
|
||||
index=idx.append(pd.DatetimeIndex([ts])),
|
||||
columns=["value"],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_str_slicing(self):
|
||||
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
|
||||
ser = ix.to_series()
|
||||
result = ser.loc[:"2017-12"]
|
||||
expected = ser.iloc[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_label_slicing(self):
|
||||
ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
|
||||
ser = ix.to_series()
|
||||
result = ser.loc[: ix[-2]]
|
||||
expected = ser.iloc[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,819 @@
|
||||
""" test positional based indexing with iloc """
|
||||
|
||||
from warnings import catch_warnings, filterwarnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, concat, date_range, isna
|
||||
from pandas.api.types import is_scalar
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestiLoc(Base):
|
||||
def test_iloc_exceeds_bounds(self):
|
||||
|
||||
# GH6296
|
||||
# iloc should allow indexers that exceed the bounds
|
||||
df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE"))
|
||||
|
||||
# lists of positions should raise IndexError!
|
||||
msg = "positional indexers are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[:, [0, 1, 2, 3, 4, 5]]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[[1, 30]]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[[1, -30]]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[[100]]
|
||||
|
||||
s = df["A"]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[[100]]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[[-100]]
|
||||
|
||||
# still raise on a single indexer
|
||||
msg = "single positional indexer is out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[30]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[-30]
|
||||
|
||||
# GH10779
|
||||
# single positive/negative indexer exceeding Series bounds should raise
|
||||
# an IndexError
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[30]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[-30]
|
||||
|
||||
# slices are ok
|
||||
result = df.iloc[:, 4:10] # 0 < start < len < stop
|
||||
expected = df.iloc[:, 4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -4:-10] # stop < 0 < start < len
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
|
||||
expected = df.iloc[:, :4:-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
|
||||
expected = df.iloc[:, 4::-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:4] # start < 0 < stop < len
|
||||
expected = df.iloc[:, :4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4] # 0 < stop < len < start
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:11] # 0 < len < start < stop
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice bounds exceeding is ok
|
||||
result = s.iloc[18:30]
|
||||
expected = s.iloc[18:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30:]
|
||||
expected = s.iloc[:0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30::-1]
|
||||
expected = s.iloc[::-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# doc example
|
||||
def check(result, expected):
|
||||
str(result)
|
||||
result.dtypes
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
|
||||
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
|
||||
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
|
||||
check(dfl.iloc[4:6], dfl.iloc[[4]])
|
||||
|
||||
msg = "positional indexers are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
dfl.iloc[[4, 5, 6]]
|
||||
msg = "single positional indexer is out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
dfl.iloc[:, 4]
|
||||
|
||||
@pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))])
|
||||
@pytest.mark.parametrize(
|
||||
"index_vals,column_vals",
|
||||
[
|
||||
([slice(None), ["A", "D"]]),
|
||||
(["1", "2"], slice(None)),
|
||||
([pd.datetime(2019, 1, 1)], slice(None)),
|
||||
],
|
||||
)
|
||||
def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals):
|
||||
# GH 25753
|
||||
df = DataFrame(
|
||||
np.random.randn(len(index), len(columns)), index=index, columns=columns
|
||||
)
|
||||
msg = ".iloc requires numeric indexers, got"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[index_vals, column_vals]
|
||||
|
||||
def test_iloc_getitem_int(self):
|
||||
|
||||
# integer
|
||||
self.check_result(
|
||||
"integer", "iloc", 2, "ix", {0: 4, 1: 6, 2: 8}, typs=["ints", "uints"]
|
||||
)
|
||||
self.check_result(
|
||||
"integer",
|
||||
"iloc",
|
||||
2,
|
||||
"indexer",
|
||||
2,
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
def test_iloc_getitem_neg_int(self):
|
||||
|
||||
# neg integer
|
||||
self.check_result(
|
||||
"neg int", "iloc", -1, "ix", {0: 6, 1: 9, 2: 12}, typs=["ints", "uints"]
|
||||
)
|
||||
self.check_result(
|
||||
"neg int",
|
||||
"iloc",
|
||||
-1,
|
||||
"indexer",
|
||||
-1,
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("dims", [1, 2])
|
||||
def test_iloc_getitem_invalid_scalar(self, dims):
|
||||
# GH 21982
|
||||
|
||||
if dims == 1:
|
||||
s = Series(np.arange(10))
|
||||
else:
|
||||
s = DataFrame(np.arange(100).reshape(10, 10))
|
||||
|
||||
with pytest.raises(TypeError, match="Cannot index by location index"):
|
||||
s.iloc["a"]
|
||||
|
||||
def test_iloc_array_not_mutating_negative_indices(self):
|
||||
|
||||
# GH 21867
|
||||
array_with_neg_numbers = np.array([1, 2, -1])
|
||||
array_copy = array_with_neg_numbers.copy()
|
||||
df = pd.DataFrame(
|
||||
{"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]},
|
||||
index=[1, 2, 3],
|
||||
)
|
||||
df.iloc[array_with_neg_numbers]
|
||||
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
|
||||
df.iloc[:, array_with_neg_numbers]
|
||||
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
|
||||
|
||||
def test_iloc_getitem_list_int(self):
|
||||
|
||||
# list of ints
|
||||
self.check_result(
|
||||
"list int",
|
||||
"iloc",
|
||||
[0, 1, 2],
|
||||
"ix",
|
||||
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
self.check_result(
|
||||
"list int",
|
||||
"iloc",
|
||||
[2],
|
||||
"ix",
|
||||
{0: [4], 1: [6], 2: [8]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
self.check_result(
|
||||
"list int",
|
||||
"iloc",
|
||||
[0, 1, 2],
|
||||
"indexer",
|
||||
[0, 1, 2],
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
# array of ints (GH5006), make sure that a single indexer is returning
|
||||
# the correct type
|
||||
self.check_result(
|
||||
"array int",
|
||||
"iloc",
|
||||
np.array([0, 1, 2]),
|
||||
"ix",
|
||||
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
self.check_result(
|
||||
"array int",
|
||||
"iloc",
|
||||
np.array([2]),
|
||||
"ix",
|
||||
{0: [4], 1: [6], 2: [8]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
self.check_result(
|
||||
"array int",
|
||||
"iloc",
|
||||
np.array([0, 1, 2]),
|
||||
"indexer",
|
||||
[0, 1, 2],
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
def test_iloc_getitem_neg_int_can_reach_first_index(self):
|
||||
# GH10547 and GH10779
|
||||
# negative integers should be able to reach index 0
|
||||
df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]})
|
||||
s = df["A"]
|
||||
|
||||
expected = df.iloc[0]
|
||||
result = df.iloc[-3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = df.iloc[[0]]
|
||||
result = df.iloc[[-3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s.iloc[-3]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[[0]]
|
||||
result = s.iloc[[-3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# check the length 1 Series case highlighted in GH10547
|
||||
expected = Series(["a"], index=["A"])
|
||||
result = expected.iloc[[-1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_dups(self):
|
||||
|
||||
self.check_result(
|
||||
"list int (dups)",
|
||||
"iloc",
|
||||
[0, 1, 1, 3],
|
||||
"ix",
|
||||
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
|
||||
objs=["series", "frame"],
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
|
||||
# GH 6766
|
||||
df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
|
||||
df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
# cross-sectional indexing
|
||||
result = df.iloc[0, 0]
|
||||
assert isna(result)
|
||||
|
||||
result = df.iloc[0, :]
|
||||
expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_array(self):
|
||||
|
||||
# array like
|
||||
s = Series(index=range(1, 4))
|
||||
self.check_result(
|
||||
"array like",
|
||||
"iloc",
|
||||
s.index,
|
||||
"ix",
|
||||
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
|
||||
def test_iloc_getitem_bool(self):
|
||||
|
||||
# boolean indexers
|
||||
b = [True, False, True, False]
|
||||
self.check_result("bool", "iloc", b, "ix", b, typs=["ints", "uints"])
|
||||
self.check_result(
|
||||
"bool",
|
||||
"iloc",
|
||||
b,
|
||||
"ix",
|
||||
b,
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
|
||||
def test_iloc_getitem_bool_diff_len(self, index):
|
||||
# GH26658
|
||||
s = Series([1, 2, 3])
|
||||
with pytest.raises(
|
||||
IndexError,
|
||||
match=("Item wrong length {} instead of {}.".format(len(index), len(s))),
|
||||
):
|
||||
_ = s.iloc[index]
|
||||
|
||||
def test_iloc_getitem_slice(self):
|
||||
|
||||
# slices
|
||||
self.check_result(
|
||||
"slice",
|
||||
"iloc",
|
||||
slice(1, 3),
|
||||
"ix",
|
||||
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
|
||||
typs=["ints", "uints"],
|
||||
)
|
||||
self.check_result(
|
||||
"slice",
|
||||
"iloc",
|
||||
slice(1, 3),
|
||||
"indexer",
|
||||
slice(1, 3),
|
||||
typs=["labels", "mixed", "ts", "floats", "empty"],
|
||||
fails=IndexError,
|
||||
)
|
||||
|
||||
def test_iloc_getitem_slice_dups(self):
|
||||
|
||||
df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])
|
||||
df2 = DataFrame(
|
||||
np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
|
||||
)
|
||||
|
||||
# axis=1
|
||||
df = concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :4], df1)
|
||||
tm.assert_frame_equal(df.iloc[:, 4:], df2)
|
||||
|
||||
df = concat([df2, df1], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[:, 2:], df1)
|
||||
|
||||
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
|
||||
|
||||
# axis=0
|
||||
df = concat([df, df], axis=0)
|
||||
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
|
||||
tm.assert_frame_equal(df.iloc[10:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
|
||||
|
||||
def test_iloc_setitem(self):
|
||||
df = self.frame_ints
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH5771
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.iloc[1:2] += 1
|
||||
expected = Series([0, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_iloc_setitem_list(self):
|
||||
|
||||
# setitem with an iloc list
|
||||
df = DataFrame(
|
||||
np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
|
||||
)
|
||||
df.iloc[[0, 1], [1, 2]]
|
||||
df.iloc[[0, 1], [1, 2]] += 100
|
||||
|
||||
expected = DataFrame(
|
||||
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
|
||||
index=["A", "B", "C"],
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_setitem_pandas_object(self):
|
||||
# GH 17193
|
||||
s_orig = Series([0, 1, 2, 3])
|
||||
expected = Series([0, -1, -2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[Series([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[pd.Index([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_iloc_setitem_dups(self):
|
||||
|
||||
# GH 6766
|
||||
# iloc with a mask aligning from another iloc
|
||||
df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
|
||||
df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
expected = df.fillna(3)
|
||||
expected["A"] = expected["A"].astype("float64")
|
||||
inds = np.isnan(df.iloc[:, 0])
|
||||
mask = inds[inds].index
|
||||
df.iloc[mask, 0] = df.iloc[mask, 2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# del a dup column across blocks
|
||||
expected = DataFrame({0: [1, 2], 1: [3, 4]})
|
||||
expected.columns = ["B", "B"]
|
||||
del df["A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# assign back to self
|
||||
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# reversed x 2
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_getitem_frame(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2)
|
||||
)
|
||||
|
||||
result = df.iloc[2]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
exp = df.ix[4]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = df.iloc[2, 2]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
exp = df.ix[4, 4]
|
||||
assert result == exp
|
||||
|
||||
# slice
|
||||
result = df.iloc[4:8]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[8:14]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[:, 4:5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list of integers
|
||||
result = df.iloc[[0, 1, 3]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[[0, 2, 6]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[[0, 1, 3], [0, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[[0, 2, 6], [0, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# neg indices
|
||||
result = df.iloc[[-1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[[18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dups indices
|
||||
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[[18, 18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with index-like
|
||||
s = Series(index=range(1, 5))
|
||||
result = df.iloc[s.index]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", FutureWarning)
|
||||
expected = df.ix[[2, 4, 6, 8]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_labelled_frame(self):
|
||||
# try with labelled frame
|
||||
df = DataFrame(
|
||||
np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
|
||||
)
|
||||
|
||||
result = df.iloc[1, 1]
|
||||
exp = df.loc["b", "B"]
|
||||
assert result == exp
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
expected = df.loc[:, ["C"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indexing
|
||||
result = df.iloc[-1, -1]
|
||||
exp = df.loc["j", "D"]
|
||||
assert result == exp
|
||||
|
||||
# out-of-bounds exception
|
||||
msg = "single positional indexer is out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[10, 5]
|
||||
|
||||
# trying to use a label
|
||||
msg = (
|
||||
r"Location based indexing can only have \[integer, integer"
|
||||
r" slice \(START point is INCLUDED, END point is EXCLUDED\),"
|
||||
r" listlike of integers, boolean array\] types"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.iloc["j", "D"]
|
||||
|
||||
def test_iloc_getitem_doc_issue(self):
|
||||
|
||||
# multi axis slicing issue with single block
|
||||
# surfaced in GH 6059
|
||||
|
||||
arr = np.random.randn(6, 4)
|
||||
index = date_range("20130101", periods=6)
|
||||
columns = list("ABCD")
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
# defines ref_locs
|
||||
df.describe()
|
||||
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# for dups
|
||||
df.columns = list("aaaa")
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# related
|
||||
arr = np.random.randn(6, 4)
|
||||
index = list(range(0, 12, 2))
|
||||
columns = list(range(0, 8, 2))
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
df._data.blocks[0].mgr_locs
|
||||
result = df.iloc[1:5, 2:4]
|
||||
str(result)
|
||||
result.dtypes
|
||||
expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_series(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
|
||||
)
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = Series(np.random.randn(10), index=range(0, 20, 2))
|
||||
|
||||
s.iloc[1] = 1
|
||||
result = s.iloc[1]
|
||||
assert result == 1
|
||||
|
||||
s.iloc[:4] = 0
|
||||
expected = s.iloc[:4]
|
||||
result = s.iloc[:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series([-1] * 6)
|
||||
s.iloc[0::2] = [0, 2, 4]
|
||||
s.iloc[1::2] = [1, 3, 5]
|
||||
result = s
|
||||
expected = Series([0, 1, 2, 3, 4, 5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_list_of_lists(self):
|
||||
|
||||
# GH 7551
|
||||
# list-of-list is set incorrectly in mixed vs. single dtyped frames
|
||||
df = DataFrame(
|
||||
dict(A=np.arange(5, dtype="int64"), B=np.arange(5, 10, dtype="int64"))
|
||||
)
|
||||
df.iloc[2:4] = [[10, 11], [12, 13]]
|
||||
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(dict(A=list("abcde"), B=np.arange(5, 10, dtype="int64")))
|
||||
df.iloc[2:4] = [["x", 11], ["y", 13]]
|
||||
expected = DataFrame(dict(A=["a", "b", "x", "y", "e"], B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
|
||||
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
|
||||
def test_iloc_setitem_with_scalar_index(self, indexer, value):
|
||||
# GH #19474
|
||||
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
|
||||
# elementwisely, not using "setter('A', ['Z'])".
|
||||
|
||||
df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
|
||||
df.iloc[0, indexer] = value
|
||||
result = df.iloc[0, 0]
|
||||
|
||||
assert is_scalar(result) and result == "Z"
|
||||
|
||||
def test_iloc_mask(self):
|
||||
|
||||
# GH 3631, iloc with a mask (of a series) should raise
|
||||
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
|
||||
mask = df.a % 2 == 0
|
||||
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.iloc[mask]
|
||||
mask.index = range(len(mask))
|
||||
msg = "iLocation based boolean indexing on an integer type is not available"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
df.iloc[mask]
|
||||
|
||||
# ndarray ok
|
||||
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# the possibilities
|
||||
locs = np.arange(4)
|
||||
nums = 2 ** locs
|
||||
reps = [bin(num) for num in nums]
|
||||
df = DataFrame({"locs": locs, "nums": nums}, reps)
|
||||
|
||||
expected = {
|
||||
(None, ""): "0b1100",
|
||||
(None, ".loc"): "0b1100",
|
||||
(None, ".iloc"): "0b1100",
|
||||
("index", ""): "0b11",
|
||||
("index", ".loc"): "0b11",
|
||||
("index", ".iloc"): (
|
||||
"iLocation based boolean indexing cannot use an indexable as a mask"
|
||||
),
|
||||
("locs", ""): "Unalignable boolean Series provided as indexer "
|
||||
"(index of the boolean Series and of the indexed "
|
||||
"object do not match).",
|
||||
("locs", ".loc"): "Unalignable boolean Series provided as indexer "
|
||||
"(index of the boolean Series and of the "
|
||||
"indexed object do not match).",
|
||||
("locs", ".iloc"): (
|
||||
"iLocation based boolean indexing on an "
|
||||
"integer type is not available"
|
||||
),
|
||||
}
|
||||
|
||||
# UserWarnings from reindex of a boolean mask
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", UserWarning)
|
||||
result = dict()
|
||||
for idx in [None, "index", "locs"]:
|
||||
mask = (df.nums > 2).values
|
||||
if idx:
|
||||
mask = Series(mask, list(reversed(getattr(df, idx))))
|
||||
for method in ["", ".loc", ".iloc"]:
|
||||
try:
|
||||
if method:
|
||||
accessor = getattr(df, method[1:])
|
||||
else:
|
||||
accessor = df
|
||||
ans = str(bin(accessor[mask]["nums"].sum()))
|
||||
except Exception as e:
|
||||
ans = str(e)
|
||||
|
||||
key = tuple([idx, method])
|
||||
r = expected.get(key)
|
||||
if r != ans:
|
||||
raise AssertionError(
|
||||
"[{key}] does not match [{ans}], received [{r}]".format(
|
||||
key=key, ans=ans, r=r
|
||||
)
|
||||
)
|
||||
|
||||
def test_iloc_non_unique_indexing(self):
|
||||
|
||||
# GH 4017, non-unique indexing (on the axis)
|
||||
df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000})
|
||||
idx = np.arange(30) * 99
|
||||
expected = df.iloc[idx]
|
||||
|
||||
df3 = concat([df, 2 * df, 3 * df])
|
||||
result = df3.iloc[idx]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
|
||||
df2 = concat([df2, 2 * df2, 3 * df2])
|
||||
|
||||
sidx = df2.index.to_series()
|
||||
expected = df2.iloc[idx[idx <= sidx.max()]]
|
||||
|
||||
new_list = []
|
||||
for r, s in expected.iterrows():
|
||||
new_list.append(s)
|
||||
new_list.append(s * 2)
|
||||
new_list.append(s * 3)
|
||||
|
||||
expected = DataFrame(new_list)
|
||||
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = df2.loc[idx]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=False)
|
||||
|
||||
def test_iloc_empty_list_indexer_is_ok(self):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(
|
||||
df.iloc[:, []],
|
||||
df.iloc[:, :0],
|
||||
check_index_type=True,
|
||||
check_column_type=True,
|
||||
)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(
|
||||
df.iloc[[], :],
|
||||
df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True,
|
||||
)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(
|
||||
df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
def test_identity_slice_returns_new_object(self):
|
||||
# GH13873
|
||||
original_df = DataFrame({"a": [1, 2, 3]})
|
||||
sliced_df = original_df.iloc[:]
|
||||
assert sliced_df is not original_df
|
||||
|
||||
# should be a shallow copy
|
||||
original_df["a"] = [4, 4, 4]
|
||||
assert (sliced_df["a"] == 4).all()
|
||||
|
||||
original_series = Series([1, 2, 3, 4, 5, 6])
|
||||
sliced_series = original_series.iloc[:]
|
||||
assert sliced_series is not original_series
|
||||
|
||||
# should also be a shallow copy
|
||||
original_series[:3] = [7, 8, 9]
|
||||
assert all(sliced_series[:3] == [7, 8, 9])
|
||||
|
||||
def test_indexing_zerodim_np_array(self):
|
||||
# GH24919
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
result = df.iloc[np.array(0)]
|
||||
s = pd.Series([1, 2], name=0)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
def test_series_indexing_zerodim_np_array(self):
|
||||
# GH24919
|
||||
s = Series([1, 2])
|
||||
result = s.iloc[np.array(0)]
|
||||
assert result == 1
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,163 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import algos as libalgos, index as libindex
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestNumericEngine:
|
||||
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
num = 1000
|
||||
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
|
||||
|
||||
# monotonic increasing
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_monotonic_increasing is True
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
# monotonic decreasing
|
||||
engine = engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is True
|
||||
|
||||
# neither monotonic increasing or decreasing
|
||||
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
# unique
|
||||
arr = np.array([1, 3, 2], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is True
|
||||
|
||||
# not unique
|
||||
arr = np.array([1, 2, 1], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is False
|
||||
|
||||
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
# unique
|
||||
arr = np.array([1, 2, 3], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc(2) == 1
|
||||
|
||||
# monotonic
|
||||
num = 1000
|
||||
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc(2) == slice(1000, 2000)
|
||||
|
||||
# not monotonic
|
||||
arr = np.array([1, 2, 3] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
expected = np.array([False, True, False] * num, dtype=bool)
|
||||
result = engine.get_loc(2)
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
arr = np.array([1, 5, 10], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.arange(12, dtype=dtype)
|
||||
result = engine.get_backfill_indexer(new)
|
||||
|
||||
expected = libalgos.backfill(arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
arr = np.array([1, 5, 10], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.arange(12, dtype=dtype)
|
||||
result = engine.get_pad_indexer(new)
|
||||
|
||||
expected = libalgos.pad(arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestObjectEngine:
|
||||
engine_type = libindex.ObjectEngine
|
||||
dtype = np.object_
|
||||
values = list("abc")
|
||||
|
||||
def test_is_monotonic(self):
|
||||
|
||||
num = 1000
|
||||
arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype)
|
||||
|
||||
# monotonic increasing
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_monotonic_increasing is True
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
# monotonic decreasing
|
||||
engine = self.engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is True
|
||||
|
||||
# neither monotonic increasing or decreasing
|
||||
arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
def test_is_unique(self):
|
||||
# unique
|
||||
arr = np.array(self.values, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is True
|
||||
|
||||
# not unique
|
||||
arr = np.array(["a", "b", "a"], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is False
|
||||
|
||||
def test_get_loc(self):
|
||||
# unique
|
||||
arr = np.array(self.values, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc("b") == 1
|
||||
|
||||
# monotonic
|
||||
num = 1000
|
||||
arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc("b") == slice(1000, 2000)
|
||||
|
||||
# not monotonic
|
||||
arr = np.array(self.values * num, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
expected = np.array([False, True, False] * num, dtype=bool)
|
||||
result = engine.get_loc("b")
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_get_backfill_indexer(self):
|
||||
arr = np.array(["a", "e", "j"], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(list("abcdefghij"), dtype=self.dtype)
|
||||
result = engine.get_backfill_indexer(new)
|
||||
|
||||
expected = libalgos.backfill["object"](arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_pad_indexer(self):
|
||||
arr = np.array(["a", "e", "j"], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(list("abcdefghij"), dtype=self.dtype)
|
||||
result = engine.get_pad_indexer(new)
|
||||
|
||||
expected = libalgos.pad["object"](arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIndexingSlow:
|
||||
@pytest.mark.slow
|
||||
def test_large_dataframe_indexing(self):
|
||||
# GH10692
|
||||
result = DataFrame({"x": range(10 ** 6)}, dtype="int64")
|
||||
result.loc[len(result)] = len(result) + 1
|
||||
expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,345 @@
|
||||
""" test indexing with ix """
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, option_context
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
def test_ix_deprecation():
|
||||
# GH 15114
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
|
||||
df.ix[1, "A"]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
|
||||
class TestIX:
|
||||
def test_ix_loc_setitem_consistency(self):
|
||||
|
||||
# GH 5771
|
||||
# loc with slice and series
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.loc[4:5] += 1
|
||||
expected = Series([1, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 5928
|
||||
# chained indexing assignment
|
||||
df = DataFrame({"a": [0, 1, 2]})
|
||||
expected = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
expected.ix[[0, 1, 2], "a"] = -expected.ix[[0, 1, 2], "a"]
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2]})
|
||||
with catch_warnings(record=True):
|
||||
df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]].astype("float64") + 0.5
|
||||
expected = DataFrame({"a": [0.5, -0.5, -1.5], "b": [0, 1, 2]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8607
|
||||
# ix setitem consistency
|
||||
df = DataFrame(
|
||||
{
|
||||
"delta": [1174, 904, 161],
|
||||
"elapsed": [7673, 9277, 1470],
|
||||
"timestamp": [1413840976, 1413842580, 1413760580],
|
||||
}
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"delta": [1174, 904, 161],
|
||||
"elapsed": [7673, 9277, 1470],
|
||||
"timestamp": pd.to_datetime(
|
||||
[1413840976, 1413842580, 1413760580], unit="s"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.copy()
|
||||
df2["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
df2.loc[:, "timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
df2.ix[:, 2] = pd.to_datetime(df["timestamp"], unit="s")
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_ix_loc_consistency(self):
|
||||
|
||||
# GH 8613
|
||||
# some edge cases where ix/loc should return the same
|
||||
# this is not an exhaustive case
|
||||
|
||||
def compare(result, expected):
|
||||
if is_scalar(expected):
|
||||
assert result == expected
|
||||
else:
|
||||
assert expected.equals(result)
|
||||
|
||||
# failure cases for .loc, but these work for .ix
|
||||
df = DataFrame(np.random.randn(5, 4), columns=list("ABCD"))
|
||||
for key in [
|
||||
slice(1, 3),
|
||||
tuple([slice(0, 2), slice(0, 2)]),
|
||||
tuple([slice(0, 2), df.columns[0:2]]),
|
||||
]:
|
||||
|
||||
for index in [
|
||||
tm.makeStringIndex,
|
||||
tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex,
|
||||
tm.makePeriodIndex,
|
||||
tm.makeTimedeltaIndex,
|
||||
]:
|
||||
df.index = index(len(df.index))
|
||||
with catch_warnings(record=True):
|
||||
df.ix[key]
|
||||
|
||||
msg = (
|
||||
r"cannot do slice indexing"
|
||||
r" on {klass} with these indexers \[(0|1)\] of"
|
||||
r" {kind}".format(klass=type(df.index), kind=str(int))
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc[key]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randn(5, 4),
|
||||
columns=list("ABCD"),
|
||||
index=pd.date_range("2012-01-01", periods=5),
|
||||
)
|
||||
|
||||
for key in [
|
||||
"2012-01-03",
|
||||
"2012-01-31",
|
||||
slice("2012-01-03", "2012-01-03"),
|
||||
slice("2012-01-03", "2012-01-04"),
|
||||
slice("2012-01-03", "2012-01-06", 2),
|
||||
slice("2012-01-03", "2012-01-31"),
|
||||
tuple([[True, True, True, False, True]]),
|
||||
]:
|
||||
|
||||
# getitem
|
||||
|
||||
# if the expected raises, then compare the exceptions
|
||||
try:
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[key]
|
||||
except KeyError:
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-31'$"):
|
||||
df.loc[key]
|
||||
continue
|
||||
|
||||
result = df.loc[key]
|
||||
compare(result, expected)
|
||||
|
||||
# setitem
|
||||
df1 = df.copy()
|
||||
df2 = df.copy()
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df1.ix[key] = 10
|
||||
df2.loc[key] = 10
|
||||
compare(df2, df1)
|
||||
|
||||
# edge cases
|
||||
s = Series([1, 2, 3, 4], index=list("abde"))
|
||||
|
||||
result1 = s["a":"c"]
|
||||
with catch_warnings(record=True):
|
||||
result2 = s.ix["a":"c"]
|
||||
result3 = s.loc["a":"c"]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
# now work rather than raising KeyError
|
||||
s = Series(range(5), [-2, -1, 1, 2, 3])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[-10:3]
|
||||
result2 = s.loc[-10:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[0:3]
|
||||
result2 = s.loc[0:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
def test_ix_weird_slicing(self):
|
||||
# http://stackoverflow.com/q/17056560/1240268
|
||||
df = DataFrame({"one": [1, 2, 3, np.nan, np.nan], "two": [1, 2, 3, 4, 5]})
|
||||
df.loc[df["one"] > 1, "two"] = -df["two"]
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"one": {0: 1.0, 1: 2.0, 2: 3.0, 3: np.nan, 4: np.nan},
|
||||
"two": {0: 1, 1: -2, 2: -3, 3: 4, 4: 5},
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_assign_column_mixed(self, float_frame):
|
||||
# GH #1142
|
||||
df = float_frame
|
||||
df["foo"] = "bar"
|
||||
|
||||
orig = df.loc[:, "B"].copy()
|
||||
df.loc[:, "B"] = df.loc[:, "B"] + 1
|
||||
tm.assert_series_equal(df.B, orig + 1)
|
||||
|
||||
# GH 3668, mixed frame with series value
|
||||
df = DataFrame({"x": np.arange(10), "y": np.arange(10, 20), "z": "bar"})
|
||||
expected = df.copy()
|
||||
|
||||
for i in range(5):
|
||||
indexer = i * 2
|
||||
v = 1000 + i * 200
|
||||
expected.loc[indexer, "y"] = v
|
||||
assert expected.loc[indexer, "y"] == v
|
||||
|
||||
df.loc[df.x % 2 == 0, "y"] = df.loc[df.x % 2 == 0, "y"] * 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 4508, making sure consistency of assignments
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
|
||||
df.loc[[0, 2], "b"] = [100, -100]
|
||||
expected = DataFrame({"a": [1, 2, 3], "b": [100, 1, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({"a": list(range(4))})
|
||||
df["b"] = np.nan
|
||||
df.loc[[1, 3], "b"] = [100, -100]
|
||||
expected = DataFrame({"a": [0, 1, 2, 3], "b": [np.nan, 100, np.nan, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# ok, but chained assignments are dangerous
|
||||
# if we turn off chained assignment it will work
|
||||
with option_context("chained_assignment", None):
|
||||
df = DataFrame({"a": list(range(4))})
|
||||
df["b"] = np.nan
|
||||
df["b"].loc[[1, 3]] = [100, -100]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_get_set_consistency(self):
|
||||
|
||||
# GH 4544
|
||||
# ix/loc get/set not consistent when
|
||||
# a mixed int/string index
|
||||
df = DataFrame(
|
||||
np.arange(16).reshape((4, 4)),
|
||||
columns=["a", "b", 8, "c"],
|
||||
index=["e", 7, "f", "g"],
|
||||
)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix["e", 8] == 2
|
||||
assert df.loc["e", 8] == 2
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix["e", 8] = 42
|
||||
assert df.ix["e", 8] == 42
|
||||
assert df.loc["e", 8] == 42
|
||||
|
||||
df.loc["e", 8] = 45
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix["e", 8] == 45
|
||||
assert df.loc["e", 8] == 45
|
||||
|
||||
def test_ix_slicing_strings(self):
|
||||
# see gh-3836
|
||||
data = {
|
||||
"Classification": ["SA EQUITY CFD", "bbb", "SA EQUITY", "SA SSF", "aaa"],
|
||||
"Random": [1, 2, 3, 4, 5],
|
||||
"X": ["correct", "wrong", "correct", "correct", "wrong"],
|
||||
}
|
||||
df = DataFrame(data)
|
||||
x = df[~df.Classification.isin(["SA EQUITY CFD", "SA EQUITY", "SA SSF"])]
|
||||
with catch_warnings(record=True):
|
||||
df.ix[x.index, "X"] = df["Classification"]
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"Classification": {
|
||||
0: "SA EQUITY CFD",
|
||||
1: "bbb",
|
||||
2: "SA EQUITY",
|
||||
3: "SA SSF",
|
||||
4: "aaa",
|
||||
},
|
||||
"Random": {0: 1, 1: 2, 2: 3, 3: 4, 4: 5},
|
||||
"X": {0: "correct", 1: "bbb", 2: "correct", 3: "correct", 4: "aaa"},
|
||||
}
|
||||
) # bug was 4: 'bbb'
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_0(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(2, 5),
|
||||
index=["row{i}".format(i=i) for i in range(2)],
|
||||
columns=["col{i}".format(i=i) for i in range(5)],
|
||||
)
|
||||
with catch_warnings(record=True):
|
||||
msg = "cannot set by positional indexing with enlargement"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.ix[2, 0] = 100
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_1(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(5, 2),
|
||||
index=["row{i}".format(i=i) for i in range(5)],
|
||||
columns=["col{i}".format(i=i) for i in range(2)],
|
||||
)
|
||||
with catch_warnings(record=True):
|
||||
msg = "cannot set by positional indexing with enlargement"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.ix[0, 2] = 100
|
||||
|
||||
def test_ix_empty_list_indexer_is_ok(self):
|
||||
with catch_warnings(record=True):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(
|
||||
df.ix[:, []],
|
||||
df.iloc[:, :0],
|
||||
check_index_type=True,
|
||||
check_column_type=True,
|
||||
)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(
|
||||
df.ix[[], :],
|
||||
df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True,
|
||||
)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(
|
||||
df.ix[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
def test_ix_duplicate_returns_series(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc")
|
||||
)
|
||||
with catch_warnings(record=True):
|
||||
r = df.ix[0.2, "a"]
|
||||
e = df.loc[0.2, "a"]
|
||||
tm.assert_series_equal(r, e)
|
||||
1083
venv/lib/python3.6/site-packages/pandas/tests/indexing/test_loc.py
Normal file
1083
venv/lib/python3.6/site-packages/pandas/tests/indexing/test_loc.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,583 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TOD: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestPartialSetting:
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
|
||||
def test_partial_setting(self):
|
||||
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
s.iloc[3] = 5.0
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
s.iat[3] = 5.0
|
||||
|
||||
# ## frame ##
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
|
||||
)
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
df.iloc[4, 2] = 5.0
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
df.iat[4, 2] = 5.0
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, "B"] = df.ix[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
|
||||
df = df_orig.copy()
|
||||
df["B"] = df["B"].astype(np.float64)
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, "B"] = df.ix[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, "C"] = df.ix[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, "C"] = df.ix[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8473
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df_orig = DataFrame(
|
||||
np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]
|
||||
)
|
||||
|
||||
expected = pd.concat(
|
||||
[df_orig, DataFrame({"A": 7}, index=[dates[-1] + dates.freq])], sort=True
|
||||
)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq])
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = df.append(s)
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=["B"])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
# TODO: #15657, these are left as object and not coerced
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[3] = [6, 7]
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, "x"]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3, "x"])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, "x", 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, "x", 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
msg = (
|
||||
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are"
|
||||
r" in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype="int64", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
# loc
|
||||
exp_idx = Index([3, 2, 3], dtype="int64", name="idx")
|
||||
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name="s")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 2, 3, "x"], dtype="object", name="idx")
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name="s")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, "x"]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, "x", 1], dtype="object", name="idx")
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name="s")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, "x", 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
msg = (
|
||||
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64',"
|
||||
r" name='idx'\)\] are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
exp_idx = Index([2, 2, 3], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name="s")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 4, 4], dtype="int64", name="idx")
|
||||
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name="s")
|
||||
idx = Index([1, 2, 3], dtype="int64", name="idx")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 3, 3], dtype="int64", name="idx")
|
||||
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name="s")
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 4, 4], dtype="int64", name="idx")
|
||||
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name="s")
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([7, 2, 2], dtype="int64", name="idx")
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s")
|
||||
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([4, 5, 5], dtype="int64", name="idx")
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s")
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix")
|
||||
def test_partial_set_invalid(self):
|
||||
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = tm.makeTimeDataFrame()
|
||||
df = orig.copy()
|
||||
|
||||
# don't allow not string inserts
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100.0, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100.0, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100, :] = df.ix[0]
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.loc["a", :] = df.ix[0]
|
||||
exp = orig.append(Series(df.ix[0], name="a"))
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
|
||||
assert df.index.dtype == "object"
|
||||
|
||||
def test_partial_set_empty_series(self):
|
||||
|
||||
# GH5226
|
||||
|
||||
# partially set with an empty object series
|
||||
s = Series()
|
||||
s.loc[1] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=[1]))
|
||||
s.loc[3] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc[1] = 1.0
|
||||
tm.assert_series_equal(s, Series([1.0], index=[1]))
|
||||
s.loc[3] = 3.0
|
||||
tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc["foo"] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=["foo"]))
|
||||
s.loc["bar"] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"]))
|
||||
s.loc[3] = 4
|
||||
tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3]))
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[1] = 1
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[1] = Series([1], index=["foo"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH5632
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df["foo"] = Series([], dtype="object")
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df["foo"] = Series(df.index)
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df["foo"] = df.index
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||||
expected["foo"] = expected["foo"].astype("float64")
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df["foo"] = []
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df["foo"] = Series(np.arange(len(df)), dtype="float64")
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.index, Index([], dtype="object"))
|
||||
df["foo"] = range(len(df))
|
||||
return df
|
||||
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||||
expected["foo"] = expected["foo"].astype("float64")
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, Index([], dtype=object))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=["foo"])
|
||||
df.loc[:, 1] = Series([1], index=["foo"])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df[0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH5720, GH5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["New"] = expected["New"].astype("float64")
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y["New"] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
# tm.assert_frame_equal(y,expected)
|
||||
|
||||
expected = DataFrame(columns=["a", "b", "c c", "d"])
|
||||
expected["d"] = expected["d"].astype("int64")
|
||||
df = DataFrame(columns=["a", "b", "c c"])
|
||||
df["d"] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=["A", "B", "C"])
|
||||
expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["C"] = expected["C"].astype("float64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH 5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series())
|
||||
tm.assert_frame_equal(df, DataFrame({0: Series()}))
|
||||
|
||||
df = DataFrame(Series(name="foo"))
|
||||
tm.assert_frame_equal(df, DataFrame({"foo": Series()}))
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH 5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df["a"] = 0
|
||||
expected = DataFrame(0, index=[0], columns=["a"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self):
|
||||
# GH 6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = [1, 2]
|
||||
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = ["1", "2"]
|
||||
expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df.loc[0, "x"] = 1
|
||||
expected = DataFrame(dict(x=[1], y=[np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
@@ -0,0 +1,247 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series, Timedelta, Timestamp, date_range
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestScalar(Base):
|
||||
def test_at_and_iat_get(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
for o in self._objs:
|
||||
|
||||
d = getattr(self, o)
|
||||
|
||||
# iat
|
||||
for f in [d["ints"], d["uints"]]:
|
||||
_check(f, "iat", values=True)
|
||||
|
||||
for f in [d["labels"], d["ts"], d["floats"]]:
|
||||
if f is not None:
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.check_values(f, "iat")
|
||||
|
||||
# at
|
||||
for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
|
||||
_check(f, "at")
|
||||
|
||||
def test_at_and_iat_set(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
getattr(f, func)[i] = 1
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
for t in self._objs:
|
||||
|
||||
d = getattr(self, t)
|
||||
|
||||
# iat
|
||||
for f in [d["ints"], d["uints"]]:
|
||||
_check(f, "iat", values=True)
|
||||
|
||||
for f in [d["labels"], d["ts"], d["floats"]]:
|
||||
if f is not None:
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_check(f, "iat")
|
||||
|
||||
# at
|
||||
for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
|
||||
_check(f, "at")
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"])
|
||||
s = df["A"]
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
s = Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]")
|
||||
expected = Timestamp("2014-02-02")
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
s = Series(["1 days", "2 days"], dtype="timedelta64[ns]")
|
||||
expected = Timedelta("2 days")
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
def test_iat_invalid_args(self):
|
||||
pass
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
msg = "index 10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[10]
|
||||
msg = "index -10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[-10]
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_at_to_fail(self):
|
||||
# at should not fallback
|
||||
# GH 7814
|
||||
s = Series([1, 2, 3], index=list("abc"))
|
||||
result = s.at["a"]
|
||||
assert result == 1
|
||||
msg = (
|
||||
"At based indexing on an non-integer index can only have"
|
||||
" non-integer indexers"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.at[0]
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
|
||||
result = df.at["a", "A"]
|
||||
assert result == 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at["a", 0]
|
||||
|
||||
s = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = s.at[1]
|
||||
assert result == 3
|
||||
msg = "At based indexing on an integer index can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.at["a"]
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
result = df.at[1, 0]
|
||||
assert result == 3
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at["a", 0]
|
||||
|
||||
# GH 13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
|
||||
df.columns = ["x", "x", "z"]
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||||
df[["x", "y", "z"]]
|
||||
|
||||
def test_at_with_tz(self):
|
||||
# gh-15822
|
||||
df = DataFrame(
|
||||
{
|
||||
"name": ["John", "Anderson"],
|
||||
"date": [
|
||||
Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3),
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
|
||||
|
||||
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
|
||||
|
||||
result = df.loc[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
def test_series_set_tz_timestamp(self, tz_naive_fixture):
|
||||
# GH 25506
|
||||
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
|
||||
result = Series(ts)
|
||||
result.at[1] = ts
|
||||
expected = Series([ts, ts])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_series(self):
|
||||
# GH 19860
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
for el, item in s.items():
|
||||
assert s.at[el] == s.loc[el] == item
|
||||
for i in range(len(s)):
|
||||
assert s.iat[i] == s.iloc[i] == i + 1
|
||||
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.at[4]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.loc[4]
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
|
||||
# GH 19860
|
||||
df = DataFrame(
|
||||
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
|
||||
)
|
||||
for rowIdx, row in df.iterrows():
|
||||
for el, item in row.items():
|
||||
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
|
||||
|
||||
for row in range(2):
|
||||
for i in range(5):
|
||||
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
|
||||
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.at[0, 3]
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.loc[0, 3]
|
||||
|
||||
def test_iat_setter_incompatible_assignment(self):
|
||||
# GH 23236
|
||||
result = DataFrame({"a": [0, 1], "b": [4, 5]})
|
||||
result.iat[0, 0] = None
|
||||
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_zerodim_np_array(self):
|
||||
# GH24924
|
||||
# dataframe __getitem__
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
result = df[np.array(0)]
|
||||
expected = Series([1, 3], name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series __getitem__
|
||||
s = Series([1, 2])
|
||||
result = s[np.array(0)]
|
||||
assert result == 1
|
||||
@@ -0,0 +1,120 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaIndexing:
|
||||
def test_boolean_indexing(self):
|
||||
# GH 14946
|
||||
df = pd.DataFrame({"x": range(10)})
|
||||
df.index = pd.to_timedelta(range(10), unit="s")
|
||||
conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3]
|
||||
expected_data = [
|
||||
[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
|
||||
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
|
||||
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9],
|
||||
]
|
||||
for cond, data in zip(conditions, expected_data):
|
||||
result = df.assign(x=df.mask(cond, 10).astype("int64"))
|
||||
expected = pd.DataFrame(
|
||||
data,
|
||||
index=pd.to_timedelta(range(10), unit="s"),
|
||||
columns=["x"],
|
||||
dtype="int64",
|
||||
)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
|
||||
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]),
|
||||
],
|
||||
)
|
||||
def test_list_like_indexing(self, indexer, expected):
|
||||
# GH 16637
|
||||
df = pd.DataFrame({"x": range(10)}, dtype="int64")
|
||||
df.index = pd.to_timedelta(range(10), unit="s")
|
||||
|
||||
df.loc[df.index[indexer], "x"] = 20
|
||||
|
||||
expected = pd.DataFrame(
|
||||
expected,
|
||||
index=pd.to_timedelta(range(10), unit="s"),
|
||||
columns=["x"],
|
||||
dtype="int64",
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
def test_string_indexing(self):
|
||||
# GH 16896
|
||||
df = pd.DataFrame({"x": range(3)}, index=pd.to_timedelta(range(3), unit="days"))
|
||||
expected = df.iloc[0]
|
||||
sliced = df.loc["0 days"]
|
||||
tm.assert_series_equal(sliced, expected)
|
||||
|
||||
@pytest.mark.parametrize("value", [None, pd.NaT, np.nan])
|
||||
def test_masked_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype="timedelta64[ns]")
|
||||
series[series == series[0]] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize("value", [None, pd.NaT, np.nan])
|
||||
def test_listlike_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype="timedelta64[ns]")
|
||||
series.iloc[0] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,stop, expected_slice",
|
||||
[
|
||||
[np.timedelta64(0, "ns"), None, slice(0, 11)],
|
||||
[np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)],
|
||||
[None, np.timedelta64(4, "D"), slice(0, 5)],
|
||||
],
|
||||
)
|
||||
def test_numpy_timedelta_scalar_indexing(self, start, stop, expected_slice):
|
||||
# GH 20393
|
||||
s = pd.Series(range(11), pd.timedelta_range("0 days", "10 days"))
|
||||
result = s.loc[slice(start, stop)]
|
||||
expected = s.iloc[expected_slice]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_roundtrip_thru_setitem(self):
|
||||
# PR 23462
|
||||
dt1 = pd.Timedelta(0)
|
||||
dt2 = pd.Timedelta(28767471428571405)
|
||||
df = pd.DataFrame({"dt": pd.Series([dt1, dt2])})
|
||||
df_copy = df.copy()
|
||||
s = pd.Series([dt1])
|
||||
|
||||
expected = df["dt"].iloc[1].value
|
||||
df.loc[[True, False]] = s
|
||||
result = df["dt"].iloc[1].value
|
||||
|
||||
assert expected == result
|
||||
tm.assert_frame_equal(df, df_copy)
|
||||
|
||||
def test_loc_str_slicing(self):
|
||||
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
|
||||
ser = ix.to_series()
|
||||
result = ser.loc[:"1 days"]
|
||||
expected = ser.iloc[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_slicing(self):
|
||||
ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H")
|
||||
ser = ix.to_series()
|
||||
result = ser.loc[: ix[-2]]
|
||||
expected = ser.iloc[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
Reference in New Issue
Block a user