8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,85 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.fixture
def index_names():
# names that match those in the idx fixture for testing equality of
# names assigned to the idx
return ["first", "second"]
@pytest.fixture
def holder():
# the MultiIndex constructor used to base compatibility with pickle
return MultiIndex
@pytest.fixture
def compat_props():
# a MultiIndex must have these properties associated with it
return ["shape", "ndim", "size"]
@pytest.fixture
def narrow_multi_index():
"""
Return a MultiIndex that is narrower than the display (<80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
@pytest.fixture
def wide_multi_index():
"""
Return a MultiIndex that is wider than the display (>80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
levels = [ci, ci.codes + 9, dti, dti, dti]
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
return pd.MultiIndex.from_arrays(levels, names=names)

View File

@@ -0,0 +1,356 @@
import numpy as np
import pytest
from pandas.compat.numpy import _np_version_under1p17
import pandas as pd
from pandas import Index, MultiIndex, date_range, period_range
import pandas.util.testing as tm
def test_shift(idx):
# GH8083 test the base class for shift
msg = "Not supported for type MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate():
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
result = index.truncate(before=1)
assert "foo" not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
msg = "after < before"
with pytest.raises(ValueError, match=msg):
index.truncate(3, 1)
def test_where():
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
i.where(True)
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
def test_where_array_like(klass):
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
cond = [False, True]
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
i.where(klass(cond))
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match="^Too many levels"):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range("2011-01-01", freq="M", periods=3)
dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern")
pi = period_range("2011-01", freq="M", periods=3)
mi = MultiIndex.from_arrays(
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
)
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
["a", "b", "c", "a", "b", "c"],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi),
]
)
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays(
[
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
]
)
res = mi.append(other)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, "x", "y", "z"],
[1.1, np.nan, 3.3, "x", "y", "z"],
["a", "b", "c", "x", "y", "z"],
dti.append(pd.Index(["x", "y", "z"])),
dti_tz.append(pd.Index(["x", "y", "z"])),
pi.append(pd.Index(["x", "y", "z"])),
]
)
tm.assert_index_equal(res, exp)
def test_take(idx):
indexer = [4, 3, 0, 2]
result = idx.take(indexer)
expected = idx[indexer]
assert result.equals(expected)
# TODO: Remove Commented Code
# if not isinstance(idx,
# (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
msg = "'MultiIndex' object has no attribute 'freq'"
with pytest.raises(AttributeError, match=msg):
idx.freq
def test_take_invalid_kwargs(idx):
idx = idx
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_take_fill_value():
# GH 12631
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
(np.nan, pd.NaT),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for size 4"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_iter(idx):
result = list(idx)
expected = [
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
msg = "cannot perform __sub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first - idx[-3:]
with pytest.raises(TypeError, match=msg):
idx[-3:] - first
with pytest.raises(TypeError, match=msg):
idx[-3:] - first.tolist()
msg = "cannot perform __rsub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype("int64")
else:
expected = index
result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx),
],
)
def test_map_dictlike(idx, mapper):
if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(idx)))
identity = mapper(idx.values, idx)
# we don't infer to UInt64 for a dict
if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
expected = idx.astype("int64")
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = pd.Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"func",
[
np.exp,
np.exp2,
np.expm1,
np.log,
np.log2,
np.log10,
np.log1p,
np.sqrt,
np.sin,
np.cos,
np.tan,
np.arcsin,
np.arccos,
np.arctan,
np.sinh,
np.cosh,
np.tanh,
np.arcsinh,
np.arccosh,
np.arctanh,
np.deg2rad,
np.rad2deg,
],
ids=lambda func: func.__name__,
)
def test_numpy_ufuncs(idx, func):
# test ufuncs of numpy. see:
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
if _np_version_under1p17:
expected_exception = AttributeError
msg = "'tuple' object has no attribute '{}'".format(func.__name__)
else:
expected_exception = TypeError
msg = (
"loop of ufunc does not support argument 0 of type tuple which"
" has no callable {} method"
).format(func.__name__)
with pytest.raises(expected_exception, match=msg):
func(idx)
@pytest.mark.parametrize(
"func",
[np.isfinite, np.isinf, np.isnan, np.signbit],
ids=lambda func: func.__name__,
)
def test_numpy_type_funcs(idx, func):
msg = (
"ufunc '{}' not supported for the input types, and the inputs"
" could not be safely coerced to any supported types according to"
" the casting rule ''safe''"
).format(func.__name__)
with pytest.raises(TypeError, match=msg):
func(idx)

View File

@@ -0,0 +1,30 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.util.testing import assert_copy
def test_astype(idx):
expected = idx.copy()
actual = idx.astype("O")
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize("ordered", [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = "> 1 ndim Categorical are not supported at this time"
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype("category")

View File

@@ -0,0 +1,123 @@
import numpy as np
import pytest
from pandas import MultiIndex
import pandas.util.testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = "cannot perform __truediv__"
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any"])
def test_logical_compat(idx, method):
msg = "cannot perform {method}".format(method=method)
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_boolean_context_compat(idx):
with pytest.raises(ValueError):
bool(idx)
def test_boolean_context_compat2():
# boolean context compat
# GH7897
i1 = MultiIndex.from_tuples([("A", 1), ("A", 2)])
i2 = MultiIndex.from_tuples([("A", 1), ("A", 3)])
common = i1.intersection(i2)
with pytest.raises(ValueError):
bool(common)
def test_inplace_mutation_resets_values():
levels = [["a", "b", "c"], [4]]
levels2 = [[1, 2, 3], ["a"]]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
vals = mi1.values.copy()
vals2 = mi2.values.copy()
assert mi1._tuples is not None
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Non-inplace doesn't kill _tuples [implementation detail]
tm.assert_almost_equal(mi1._tuples, vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Inplace should kill _tuples
mi1.set_levels(levels2, inplace=True)
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(1, "a")] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_values = mi2.set_codes(codes2).values
# Not inplace shouldn't change
tm.assert_almost_equal(mi2._tuples, vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should kill _tuples, etc
mi2.set_codes(codes2, inplace=True)
tm.assert_almost_equal(mi2.values, new_values)
def test_ndarray_compat_properties(idx, compat_props):
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
idx.nbytes
idx.values.nbytes
def test_compat(indices):
assert indices.tolist() == list(indices)
def test_pickle_compat_construction(holder):
# this is testing for pickle compat
# need an object to create with
with pytest.raises(TypeError, match="Must pass both levels and codes"):
holder()

View File

@@ -0,0 +1,672 @@
from collections import OrderedDict
import numpy as np
import pytest
from pandas._libs.tslib import Timestamp
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import Index, MultiIndex, date_range
import pandas.util.testing as tm
def test_constructor_single_level():
result = MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=msg):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], ["one", "two"]]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (["foo"], ["bar"])
msg = r"MultiIndex\.name must be a hashable type"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=("foo", "bar"),
)
renamed = [["foor"], ["barr"]]
with pytest.raises(TypeError, match=msg):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=msg):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = (
r"On level 0, code max \(3\) >= length of level \(1\)\."
" NOTE: this index is in an inconsistent state"
)
label_error = r"Unequal code lengths: \[4, 2\]"
code_value_error = r"On level 0, code value \(-2\) < -1"
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([["a"], ["b"]])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
# test set_codes with verify_integrity=False
# the setting should not raise any value error
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
# code value smaller than -1
with pytest.raises(ValueError, match=code_value_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
def test_na_levels():
# GH26408
# test if codes are re-assigned value -1 for levels
# with mising values (NaN, NaT, None)
result = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
)
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
)
tm.assert_index_equal(result, expected)
# verify set_levels and set_codes
result = MultiIndex(
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
).set_codes([[0, -1, 1, 2, 3, 4]])
tm.assert_index_equal(result, expected)
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
MultiIndex(
levels=[["foo", "bar", "baz", "qux"]],
labels=[[0, 1, 2, 3]],
names=["first"],
)
with tm.assert_produces_warning(FutureWarning):
idx.labels
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
assert result.levels[0].equals(Index([Timestamp("20130101")]))
assert result.levels[1].equals(Index(["a", "b"]))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_tuples(idx):
arrays = tuple(
tuple(np.asarray(lev).take(level_codes))
for lev, level_codes in zip(idx.levels, idx.codes)
)
# tuple of tuples as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
def test_from_arrays_index_series_datetimetz():
idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo")
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_timedelta():
idx1 = pd.timedelta_range("1 days", freq="D", periods=3)
idx2 = pd.timedelta_range("2 hours", freq="H", periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_period():
idx1 = pd.period_range("2011-01-01", freq="D", periods=3)
idx2 = pd.period_range("2015-01-01", freq="H", periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3)
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = pd.MultiIndex.from_arrays(
[pd.Series(idx1), pd.Series(idx2), pd.Series(idx3), pd.Series(idx4)]
)
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list("ABC")[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_sequence_of_arrays",
[
1,
[1],
[1, 2],
[[1], 2],
[1, [2]],
"a",
["a"],
["a", "b"],
[["a"], "b"],
(1,),
(1, 2),
([1], 2),
(1, [2]),
"a",
("a",),
("a", "b"),
(["a"], "b"),
[(1,), 2],
[1, (2,)],
[("a",), "b"],
((1,), 2),
(1, (2,)),
(("a",), "b"),
],
)
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
msg = "Input must be a list / sequence of array-likes"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
@pytest.mark.parametrize(
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
)
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = "^all arrays must be same length$"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = "Cannot infer number of levels from empty list"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = "Input must be a list / sequence of tuple-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=["a", "b"])
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
msg = "Names should be list-like for a MultiIndex"
with pytest.raises(ValueError, match=msg):
pd.Index(li, name="abc")
with pytest.raises(ValueError, match=msg):
pd.Index(li, name="a")
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame(
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
).set_index(["a", "b"])
idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = pd.Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
@pytest.mark.parametrize(
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
)
def test_from_product_empty_two_levels(first, second):
names = ["A", "B"]
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("N", list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ["A", "B", "C"]
lvl2 = list(range(N))
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
)
def test_from_product_invalid_input(invalid_input):
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range("2000-01-01", periods=2)
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike(
[
(1, pd.Timestamp("2000-01-01")),
(1, pd.Timestamp("2000-01-02")),
(2, pd.Timestamp("2000-01-01")),
(2, pd.Timestamp("2000-01-02")),
]
)
tm.assert_numpy_array_equal(mi.values, etalon)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize("f", [lambda x: x, lambda x: pd.Series(x), lambda x: x.values])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ["foo", "bar"]
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
expected = pd.CategoricalIndex(
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
)
result = pd.MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
result = MultiIndex.from_product([first, second], names=names)
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ["foo", "bar"]
result = pd.Index(index)
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
),
names=["foo", "bar"],
)
tm.assert_index_equal(result, expected)
result = pd.Index(index, names=["A", "B"])
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
),
names=["A", "B"],
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
)
expected = pd.MultiIndex.from_tuples(
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
)
result = pd.MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
@pytest.mark.parametrize(
"non_frame",
[
pd.Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
pd.Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27,
],
)
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match="Input must be a DataFrame"):
pd.MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(
OrderedDict(
[
("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")),
("a", [1, 1, 1, 2, 2, 2]),
("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)),
("c", ["x", "x", "y", "z", "x", "y"]),
]
)
)
original_dtypes = df.dtypes.to_dict()
expected_mi = pd.MultiIndex.from_arrays(
[
pd.date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
mi = pd.MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize(
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
)
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
mi = pd.MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize(
"names,expected_error_msg",
[
("bad_input", "Names should be list-like for a MultiIndex"),
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
],
)
def test_from_frame_invalid_names(names, expected_error_msg):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
with pytest.raises(ValueError, match=expected_error_msg):
pd.MultiIndex.from_frame(df, names=names)

View File

@@ -0,0 +1,100 @@
import numpy as np
import pytest
from pandas.compat import PYPY
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def test_contains_top_level():
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
assert "A" in midx
assert "A" not in midx._engine
def test_contains_with_nat():
# MI with a NaT
mi = MultiIndex(
levels=[["C"], pd.date_range("2012-01-01", periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, "B"],
)
assert ("C", pd.Timestamp("2012-01-01")) in mi
for val in mi.values:
assert val in mi
def test_contains(idx):
assert ("foo", "two") in idx
assert ("bar", "two") not in idx
assert None not in idx
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_pypy():
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
tm.assert_numpy_array_equal(
idx.isin([("bar", float("nan"))]), np.array([False, True])
)
def test_isin():
values = [("foo", 2), ("bar", 3), ("quux", 4)]
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_not_pypy():
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, False]))
tm.assert_numpy_array_equal(
idx.isin([("bar", float("nan"))]), np.array([False, False])
)
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
vals_0 = ["foo", "bar", "quux"]
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
msg = "Too many levels: Index has only 2 levels, not 6"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=5)
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=-5)
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
idx.isin(vals_0, level=1.0)
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
idx.isin(vals_1, level=-1.0)
with pytest.raises(KeyError, match="'Level A not found'"):
idx.isin(vals_1, level="A")
idx.names = ["A", "B"]
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
with pytest.raises(KeyError, match="'Level C not found'"):
idx.isin(vals_1, level="C")

View File

@@ -0,0 +1,252 @@
from collections import OrderedDict
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_tolist(idx):
result = idx.tolist()
exp = list(idx.values)
assert result == exp
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_to_frame():
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name="first")
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=["first"])
# Tests for datetime index
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{
0: np.repeat(np.arange(5, dtype="int64"), 3),
1: np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(
{
"first": np.repeat(np.arange(5, dtype="int64"), 3),
"second": np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = pd.MultiIndex.from_arrays(
[
pd.date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
expected_df = pd.DataFrame(
OrderedDict(
[
("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")),
("a", [1, 1, 1, 2, 2, 2]),
("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)),
("c", ["x", "x", "y", "z", "x", "y"]),
]
)
)
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ["z", 0, "a"]
mi = pd.MultiIndex.from_arrays(
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, "one"), (1, "two"), (2, "one"), (2, "two")])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = index.to_hierarchical(3)
expected = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1],
],
)
tm.assert_index_equal(result, expected)
assert result.names == index.names
# K > 1
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = index.to_hierarchical(3, 2)
expected = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
],
)
tm.assert_index_equal(result, expected)
assert result.names == index.names
# non-sorted
index = MultiIndex.from_tuples(
[(2, "c"), (1, "b"), (2, "a"), (2, "b")], names=["N1", "N2"]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = index.to_hierarchical(2)
expected = MultiIndex.from_tuples(
[
(2, "c"),
(2, "c"),
(1, "b"),
(1, "b"),
(2, "a"),
(2, "a"),
(2, "b"),
(2, "b"),
],
names=["N1", "N2"],
)
tm.assert_index_equal(result, expected)
assert result.names == index.names
def test_roundtrip_pickle_with_tz():
return
# GH 8367
# round-trip of timezone
index = MultiIndex.from_product(
[[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
names=["one", "two", "three"],
)
unpickled = tm.round_trip_pickle(index)
assert index.equal_levels(unpickled)
def test_pickle(indices):
return
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
original_name, indices.name = indices.name, "foo"
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
indices.name = original_name
def test_to_series(idx):
# assert that we are creating a copy of the index
s = idx.to_series()
assert s.values is not idx.values
assert s.index is not idx
assert s.name == idx.name
def test_to_series_with_arguments(idx):
# GH18699
# index kwarg
s = idx.to_series(index=idx)
assert s.values is not idx.values
assert s.index is idx
assert s.name == idx.name
# name kwarg
idx = idx
s = idx.to_series(name="__test")
assert s.values is not idx.values
assert s.index is not idx
assert s.name != idx.name
def test_to_flat_index(idx):
expected = pd.Index(
(
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
),
tupleize_cols=False,
)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)

View File

@@ -0,0 +1,94 @@
from copy import copy, deepcopy
import pytest
from pandas import MultiIndex
import pandas.util.testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._shallow_copy()
assert_multiindex_copied(i_copy, idx)
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
idx.copy(labels=idx.codes)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize("func", [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
@pytest.mark.parametrize(
"kwarg, value",
[
("names", ["thrid", "fourth"]),
("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]),
("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]),
],
)
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
return
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
if kwarg == "names":
assert getattr(idx_copy, kwarg) == value
else:
assert [list(i) for i in getattr(idx_copy, kwarg)] == value

View File

@@ -0,0 +1,141 @@
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_drop(idx):
dropped = idx.drop([("foo", "two"), ("qux", "one")])
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(["bar"])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop("foo")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop([("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop(index)
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(["foo", "two"])
# partially correct argument
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop(mixed_index)
# error='ignore'
dropped = idx.drop(index, errors="ignore")
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(["foo", "two"], errors="ignore")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(["foo", ("qux", "one")])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ["foo", ("qux", "one"), "two"]
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(mixed_index)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc("foo")]
dropped = index.droplevel(0)
assert dropped.name == "second"
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index.droplevel(0)
assert dropped.names == ("two", "three")
dropped = index.droplevel("two")
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index[:2].droplevel(["three", "one"])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
msg = (
"Cannot remove 3 levels from an index with 3 levels: at least one"
" level must be left"
)
with pytest.raises(ValueError, match=msg):
index[:2].droplevel(["one", "two", "three"])
with pytest.raises(KeyError, match="'Level four not found'"):
index[:2].droplevel(["one", "four"])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
assert lexsorted_mi.is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
)
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi.is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))

View File

@@ -0,0 +1,283 @@
from itertools import product
import numpy as np
import pytest
from pandas._libs import hashtable
from pandas import DatetimeIndex, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize("names", [None, ["first", "second"]])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
)
idx2 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
tz="Asia/Tokyo",
)
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
eidx2 = DatetimeIndex(
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
)
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
@pytest.mark.parametrize("dropna", [True, False])
def test_get_unique_index(idx, dropna):
mi = idx[[0, 1, 0, 1, 1, 0, 0]]
expected = mi._shallow_copy(mi[[0, 1]])
result = mi._get_unique_index(dropna=dropna)
assert result.unique
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
with pytest.raises(ValueError):
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
with pytest.raises(ValueError):
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
for idx in [
mi,
mi.set_names([None, None]),
mi.set_names([None, "Num"]),
mi.set_names(["Upper", "Num"]),
]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
assert mi.is_unique is False
assert mi.has_duplicates is True
# single instance of NaN
mi_nan = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
)
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False
# multiple instances of NaN
mi_nan_dup = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
)
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [
("x", "out", "z", 5, "y", "in", "z", 169),
("x", "out", "z", 7, "y", "in", "z", 119),
("x", "out", "z", 9, "y", "in", "z", 135),
("x", "out", "z", 13, "y", "in", "z", 145),
("x", "out", "z", 14, "y", "in", "z", 158),
("x", "out", "z", 16, "y", "in", "z", 122),
("x", "out", "z", 17, "y", "in", "z", 160),
("x", "out", "z", 18, "y", "in", "z", 180),
("x", "out", "z", 20, "y", "in", "z", 143),
("x", "out", "z", 21, "y", "in", "z", 128),
("x", "out", "z", 22, "y", "in", "z", 129),
("x", "out", "z", 25, "y", "in", "z", 111),
("x", "out", "z", 28, "y", "in", "z", 114),
("x", "out", "z", 29, "y", "in", "z", 121),
("x", "out", "z", 31, "y", "in", "z", 126),
("x", "out", "z", 32, "y", "in", "z", 155),
("x", "out", "z", 33, "y", "in", "z", 123),
("x", "out", "z", 12, "y", "in", "z", 144),
]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
def test_has_duplicates_overflow():
# handle int64 overflow if possible
def check(nlevels, with_nulls):
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
# no overflow
check(4, False)
check(4, True)
# overflow possible
check(8, False)
check(8, True)
@pytest.mark.parametrize(
"keep, expected",
[
("first", np.array([False, False, False, True, True, False])),
("last", np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False])),
],
)
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("keep", ["first", "last", False])
def test_duplicated_large(keep):
# GH 9125
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
codes = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated_object(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
def test_get_duplicates():
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
tm.assert_numpy_array_equal(
mi.duplicated(), np.zeros(len(mi), dtype="bool")
)

View File

@@ -0,0 +1,223 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex, Series
import pandas.util.testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(pd.Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(["new1", "new2"])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)
mi3 = Index(mi.tolist(), names=mi.names)
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = pd.MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_is_():
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert not mi.is_(mi.set_names(["C", "D"]))
mi2 = mi.view()
mi2.set_names(["E", "F"], inplace=True)
assert mi.is_(mi2)
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx.is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not idx.is_numeric()
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = pd.MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = pd.Series([True, True])
result = pd.Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = pd.Series([False, False])
result = pd.Series(midx > midx)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,204 @@
import warnings
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def test_dtype_str(indices):
with tm.assert_produces_warning(FutureWarning):
dtype = indices.dtype_str
assert isinstance(dtype, str)
assert dtype == str(indices.dtype)
def test_format(idx):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
)
index.format(names=True)
def test_format_sparse_config(idx):
warn_filters = warnings.filters
warnings.filterwarnings("ignore", category=FutureWarning, module=".*format")
# GH1538
pd.set_option("display.multi_sparse", False)
result = idx.format()
assert result[1] == "foo two"
tm.reset_display_options()
warnings.filters = warn_filters
def test_format_sparse_display():
index = MultiIndex(
levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[
[0, 0, 0, 1, 1, 1],
[0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
],
)
result = index.format()
assert result[3] == "1 0 0 0"
def test_repr_with_unicode_data():
with pd.option_context("display.encoding", "UTF-8"):
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\" not in repr(index) # we don't want unicode-escaped
def test_repr_roundtrip_raises():
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
with pytest.raises(TypeError):
eval(repr(mi))
def test_unicode_string_with_unicode():
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert "..." not in str(idx)
class TestRepr:
def test_repr(self, idx):
result = idx[:1].__repr__()
expected = """\
MultiIndex([('foo', 'one')],
names=['first', 'second'])"""
assert result == expected
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
with pd.option_context("display.max_seq_items", 5):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
...
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'], length=6)"""
assert result == expected
def test_rjust(self, narrow_multi_index):
mi = narrow_multi_index
result = mi[:1].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi[::500].__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:08:20'),
('abc', 10, '2000-01-01 00:16:40'),
('abc', 10, '2000-01-01 00:25:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:00:01'),
( 'a', 9, '2000-01-01 00:00:02'),
( 'a', 9, '2000-01-01 00:00:03'),
( 'a', 9, '2000-01-01 00:00:04'),
( 'a', 9, '2000-01-01 00:00:05'),
( 'a', 9, '2000-01-01 00:00:06'),
( 'a', 9, '2000-01-01 00:00:07'),
( 'a', 9, '2000-01-01 00:00:08'),
( 'a', 9, '2000-01-01 00:00:09'),
...
('abc', 10, '2000-01-01 00:33:10'),
('abc', 10, '2000-01-01 00:33:11'),
('abc', 10, '2000-01-01 00:33:12'),
('abc', 10, '2000-01-01 00:33:13'),
('abc', 10, '2000-01-01 00:33:14'),
('abc', 10, '2000-01-01 00:33:15'),
('abc', 10, '2000-01-01 00:33:16'),
('abc', 10, '2000-01-01 00:33:17'),
('abc', 10, '2000-01-01 00:33:18'),
('abc', 10, '2000-01-01 00:33:19')],
names=['a', 'b', 'dti'], length=2000)"""
assert result == expected
def test_tuple_width(self, wide_multi_index):
mi = wide_multi_index
result = mi[:1].__repr__()
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi[:10].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
...
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa
assert result == expected

View File

@@ -0,0 +1,438 @@
import numpy as np
import pytest
import pandas as pd
from pandas import CategoricalIndex, Index, MultiIndex
import pandas.util.testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
msg = "Too many levels: Index has only 2 levels, not 3"
with pytest.raises(IndexError, match=msg):
idx._get_level_number(2)
with pytest.raises(KeyError, match="Level fourth not found"):
idx._get_level_number("fourth")
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
tm.assert_index_equal(result, expected)
assert result.name == "first"
result = idx.get_level_values("first")
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
)
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_value_duplicates():
index = MultiIndex(
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
assert index.get_loc("D") == slice(0, 3)
with pytest.raises(KeyError, match=r"^'D'$"):
index._engine.get_value(np.array([]), "D")
def test_get_level_values_all_na():
# GH 17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(["a", np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH 17924
arrays = [["a", "b", "b"], [1, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(["a", np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_set_name_methods(idx, index_names):
# so long as these are synonyms, we don't need to test set_names
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
msg = "Length of names must match number of levels in MultiIndex"
with pytest.raises(ValueError, match=msg):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
msg = "can't set attribute"
with pytest.raises(AttributeError, match=msg):
idx.levels = new_levels
with pytest.raises(AttributeError, match=msg):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
for inplace in [True, False]:
with pytest.raises(ValueError, match="^On"):
idx.set_levels(["c"], level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(ValueError, match="^On"):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
idx.set_levels("c", level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
idx.set_codes(1, level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# changing label w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing specific level w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples([(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
result.set_codes(codes=new_codes, level=1, inplace=True)
assert result.equals(expected)
with tm.assert_produces_warning(FutureWarning):
ind.set_codes(labels=new_codes, level=1)
def test_set_labels_deprecated():
# GH23752
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_labels = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples([(0, i) for i in new_labels])
# [w/o mutation]
with tm.assert_produces_warning(FutureWarning):
result = ind.set_labels(labels=new_labels, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
with tm.assert_produces_warning(FutureWarning):
result.set_labels(labels=new_labels, level=1, inplace=True)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match="Length of levels"):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match="Length of codes"):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match="Length of names"):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list-like"):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match="Length of names"):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match="Names must be a"):
idx.set_names(names, level=0)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
m = pd.MultiIndex.from_product([[0, 1]])
result = m.set_names("first", level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("ordered", [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, 0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
lev2 = ["1", "2", "3"] * 2
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
df = pd.DataFrame(
np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx
)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
df.at[("grethe", "4"), "one"] = 99.34
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ["black"] * 3
index = pd.MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
expected_sizes = [3, 2, 1]
expected = pd.MultiIndex.from_arrays(
[expected_sizes, colors], names=["size", "color"]
)
tm.assert_index_equal(result, expected)

View File

@@ -0,0 +1,441 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
Index,
IntervalIndex,
MultiIndex,
date_range,
)
from pandas.core.indexes.base import InvalidIndexError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
def test_slice_locs_partial(idx):
sorted_idx, _ = idx.sortlevel(0)
result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one"))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ("qux", "one"))
assert result == (0, 5)
result = sorted_idx.slice_locs(("foo", "two"), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs("bar", "baz")
assert result == (2, 4)
def test_slice_locs():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(
*idx.slice_locs(
df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30)
)
)
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs((1, 3))
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted():
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
with pytest.raises(KeyError, match=msg):
index.slice_locs((1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_not_contained():
# some searchsorted action
index = MultiIndex(
levels=[[0, 2, 4, 6], [0, 2, 4]],
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
sortorder=0,
)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
def test_putmask_with_wrong_mask(idx):
# GH18368
msg = "putmask: mask and data must be the same size"
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) + 1, np.bool), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) - 1, np.bool), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask("foo", 1)
def test_get_indexer():
major_axis = Index(np.arange(4))
minor_axis = Index(np.arange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method="pad")
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="pad")
assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method="ffill")
assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method="backfill")
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="backfill")
assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method="bfill")
assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(list(range(10)) + list(range(10)))
idx2 = Index(list(range(20)))
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest():
midx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = "method='nearest' not implemented yet for MultiIndex; see GitHub issue 9365"
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="nearest")
msg = "tolerance not implemented yet for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="pad", tolerance=2)
def test_getitem(idx):
# scalar
assert idx[2] == ("bar", "one")
# slice
result = idx[2:5]
expected = idx[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = idx[[True, False, True, False, True, True]]
result2 = idx[np.array([True, False, True, False, True, True])]
expected = idx[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(idx):
sorted_idx, _ = idx.sortlevel(0)
assert sorted_idx.get_loc("baz") == slice(3, 4)
assert sorted_idx.get_loc("foo") == slice(0, 2)
def test_get_indexer_consistency(idx):
# See GH 16819
if isinstance(idx, IntervalIndex):
pass
if idx.is_unique or isinstance(idx, CategoricalIndex):
indexer = idx.get_indexer(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
else:
e = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=e):
idx.get_indexer(idx[0:2])
indexer, _ = idx.get_indexer_non_unique(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
@pytest.mark.parametrize("ind1", [[True] * 5, pd.Index([True] * 5)])
@pytest.mark.parametrize(
"ind2",
[[True, False, True, False, False], pd.Index([True, False, True, False, False])],
)
def test_getitem_bool_index_all(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
tm.assert_index_equal(idx[ind2], expected)
@pytest.mark.parametrize("ind1", [[True], pd.Index([True])])
@pytest.mark.parametrize("ind2", [[False], pd.Index([False])])
def test_getitem_bool_index_single(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1)])
tm.assert_index_equal(idx[ind1], idx)
expected = pd.MultiIndex(
levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)],
codes=[[], []],
)
tm.assert_index_equal(idx[ind2], expected)
def test_get_loc(idx):
assert idx.get_loc(("foo", "two")) == 1
assert idx.get_loc(("baz", "two")) == 3
with pytest.raises(KeyError, match=r"^10$"):
idx.get_loc(("bar", "two"))
with pytest.raises(KeyError, match=r"^'quux'$"):
idx.get_loc("quux")
msg = "only the default get_loc method is currently supported for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.get_loc("foo", method="nearest")
# 3 levels
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
with pytest.raises(KeyError, match=r"^\(1, 1\)$"):
index.get_loc((1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates():
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
# pytest.raises(Exception, index.get_loc, 2)
index = Index(["c", "a", "a", "b", "b"])
rs = index.get_loc("c")
xp = 0
assert rs == xp
def test_get_loc_level():
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
with pytest.raises(KeyError, match=r"^\(2, 2\)$"):
index.get_loc_level((2, 2))
# GH 22221: unused label
with pytest.raises(KeyError, match=r"^2$"):
index.drop(2).get_loc_level(2)
# Unused label on unsorted level:
with pytest.raises(KeyError, match=r"^2$"):
index.drop(1, level=2).get_loc_level(2, level=2)
index = MultiIndex(
levels=[[2000], list(range(4))],
codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])],
)
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize("dtype1", [int, float, bool, str])
@pytest.mark.parametrize("dtype2", [int, float, bool, str])
def test_get_loc_multiple_dtypes(dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)]
idx = pd.MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize("level", [0, 1])
@pytest.mark.parametrize("dtypes", [[int, float], [float, int]])
def test_get_loc_implicit_cast(level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool():
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype="int64")]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
with pytest.raises(KeyError, match=r"^\(False, True\)$"):
idx.get_loc((False, True))
with pytest.raises(KeyError, match=r"^\(True, False\)$"):
idx.get_loc((True, False))
@pytest.mark.parametrize("level", [0, 1])
def test_get_loc_nan(level, nulls_fixture):
# GH 18485 : NaN in MultiIndex
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
key[level] = nulls_fixture
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan():
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
with pytest.raises(KeyError, match=r"^3\.0$"):
idx.get_loc(3)
with pytest.raises(KeyError, match=r"^nan$"):
idx.get_loc(np.nan)
with pytest.raises(KeyError, match=r"^\[nan\]$"):
idx.get_loc([np.nan])
def test_get_indexer_categorical_time():
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[
Categorical(["a", "b", "c"]),
Categorical(date_range("2012-01-01", periods=3, freq="H")),
]
)
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
def test_timestamp_multiindex_indexer():
# https://github.com/pandas-dev/pandas/issues/26944
idx = pd.MultiIndex.from_product(
[
pd.date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"),
["x"],
[3],
]
)
df = pd.DataFrame({"foo": np.arange(len(idx))}, idx)
result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"]
qidx = pd.MultiIndex.from_product(
[
pd.date_range(
start="2019-01-02T00:15:33",
end="2019-01-05T02:15:33",
freq="H",
name="date",
),
["x"],
[3],
]
)
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
tm.assert_series_equal(result, should_be)

View File

@@ -0,0 +1,297 @@
import re
import numpy as np
import pytest
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import IntervalIndex, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
assert i.codes[0].dtype == "int8"
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(40)])
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(400)])
assert i.codes[1].dtype == "int16"
i = MultiIndex.from_product([["a"], range(40000)])
assert i.codes[1].dtype == "int32"
i = pd.MultiIndex.from_product([["a"], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [
(1, pd.Timestamp("2000-01-01")),
(2, pd.NaT),
(3, pd.Timestamp("2000-01-03")),
(1, pd.Timestamp("2000-01-04")),
(2, pd.Timestamp("2000-01-02")),
(3, pd.Timestamp("2000-01-03")),
]
result = pd.MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10 ** 18, 10 ** 18 + 5)
naive = pd.DatetimeIndex(ints)
# TODO(GH-24559): Remove the FutureWarning
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
aware = pd.DatetimeIndex(ints, tz="US/Central")
idx = pd.MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq="D")
idx = pd.MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = list(range(70000))
minor_axis = list(range(10))
major_codes = np.arange(70000)
minor_codes = np.repeat(range(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert index.is_unique is False
def test_hash_collisions():
# non-smoke test that we don't get hash collisions
index = MultiIndex.from_product(
[np.arange(1000), np.arange(1000)], names=["one", "two"]
)
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def take_invalid_kwargs():
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
pd.isna(idx)
def test_large_multiindex_error():
# GH12527
df_below_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"]
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_below_1000000.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_below_1000000.loc[(3, 0), "dest"]
df_above_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"]
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_above_1000000.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_above_1000000.loc[(3, 0), "dest"]
def test_million_record_attribute_error():
# GH 18165
r = list(range(1000000))
df = pd.DataFrame(
{"a": r, "b": r}, index=pd.MultiIndex.from_tuples([(x, x) for x in r])
)
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df["a"].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile("does not support mutable operations")
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(TypeError, match=mutable_regex):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = pd.MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert ind.is_monotonic
ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
def test_rangeindex_fallback_coercion_bug():
# GH 12893
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1)
df.index.names = ["fizz", "buzz"]
str(df)
expected = pd.DataFrame(
{"bar": np.arange(100), "foo": np.arange(100)},
index=pd.MultiIndex.from_product(
[range(10), range(10)], names=["fizz", "buzz"]
),
)
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values("fizz")
expected = pd.Int64Index(np.arange(10), name="fizz").repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values("buzz")
expected = pd.Int64Index(np.tile(np.arange(10), 10), name="buzz")
tm.assert_index_equal(result, expected)
def test_hash_error(indices):
index = indices
with pytest.raises(
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
):
hash(indices)
def test_mutability(indices):
if not len(indices):
return
msg = "Index does not support mutable operations"
with pytest.raises(TypeError, match=msg):
indices[0] = indices[0]
def test_wrong_number_names(indices):
with pytest.raises(ValueError, match="^Length"):
indices.names = ["apple", "banana", "carrot"]
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == "object":
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2

View File

@@ -0,0 +1,89 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize(
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
)
def test_join_level(idx, other, join_type):
join_index, lidx, ridx = other.join(
idx, how=join_type, level="second", return_indexers=True
)
exp_level = other.join(idx.levels[1], how=join_type)
assert join_index.levels[0].equals(idx.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
exp_values = idx.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ("outer", "inner"):
join_index2, ridx2, lidx2 = idx.join(
other, how=join_type, level="second", return_indexers=True
)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(idx):
# some corner cases
index = Index(["three", "one", "two"])
result = index.join(idx, level="second")
assert isinstance(result, MultiIndex)
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
idx.join(idx, level=1)
def test_join_self(idx, join_type):
joined = idx.join(idx, how=join_type)
assert idx is joined
def test_join_multi():
# GH 10665
midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
idx = pd.Index([1, 2, 5], name="b")
# inner
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
exp_ridx = np.array(
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_self_unique(idx, join_type):
if idx.is_unique:
joined = idx.join(idx, how=join_type)
assert (idx == joined).all()

View File

@@ -0,0 +1,143 @@
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
import pandas as pd
from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
import pandas.util.testing as tm
def test_fillna(idx):
# GH 11343
# TODO: Remove or Refactor. Not Implemented for MultiIndex
for name, index in [("idx", idx)]:
if len(index) == 0:
pass
elif isinstance(index, MultiIndex):
idx = index.copy()
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
else:
idx = index.copy()
result = idx.fillna(idx[0])
tm.assert_index_equal(result, idx)
assert result is not idx
msg = "'value' must be a scalar, passed: "
with pytest.raises(TypeError, match=msg):
idx.fillna([idx[0]])
idx = index.copy()
values = idx.values
if isinstance(index, DatetimeIndexOpsMixin):
values[1] = iNaT
elif isinstance(index, (Int64Index, UInt64Index)):
continue
else:
values[1] = np.nan
if isinstance(index, PeriodIndex):
idx = index.__class__(values, freq=index.freq)
else:
idx = index.__class__(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
def test_dropna():
# GH 6194
idx = pd.MultiIndex.from_arrays(
[
[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
["a", "b", "c", np.nan, "e"],
]
)
exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how="any"), exp)
exp = pd.MultiIndex.from_arrays(
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
)
tm.assert_index_equal(idx.dropna(how="all"), exp)
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
idx.dropna(how="xxx")
# GH26408
# test if missing values are dropped for multiindex constructed
# from codes and values
idx = MultiIndex(
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
)
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
tm.assert_index_equal(idx.dropna(), expected)
tm.assert_index_equal(idx.dropna(how="any"), expected)
expected = MultiIndex.from_arrays(
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
)
tm.assert_index_equal(idx.dropna(how="all"), expected)
def test_nulls(idx):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
@pytest.mark.xfail
def test_hasnans_isnans(idx):
# GH 11343, added tests for hasnans / isnans
index = idx.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(index), dtype=bool)
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is False
index = idx.copy()
values = index.values
values[1] = np.nan
index = idx.__class__(values)
expected = np.array([False] * len(index), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is True
def test_nan_stays_float():
# GH 7031
idx0 = pd.MultiIndex(
levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]
)
idx1 = pd.MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
idxm = idx0.join(idx1, how="outer")
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()

View File

@@ -0,0 +1,230 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, IntervalIndex, MultiIndex
from pandas.api.types import is_scalar
def test_is_monotonic_increasing():
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
assert i.is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
# string ordering
i = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic is False
assert Index(i.values).is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex(
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[1, 2, 3, 4],
[
"gb00b03mlx29",
"lu0197800237",
"nl0000289783",
"nl0000289965",
"nl0000301109",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_is_monotonic_decreasing():
i = MultiIndex.from_product(
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
# string ordering
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[4, 3, 2, 1],
[
"nl0000301109",
"nl0000289965",
"nl0000289783",
"lu0197800237",
"gb00b03mlx29",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_is_strictly_monotonic_increasing():
idx = pd.MultiIndex(
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
def test_is_strictly_monotonic_decreasing():
idx = pd.MultiIndex(
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
def test_searchsorted_monotonic(indices):
# GH17271
# not implemented for tuple searches in MultiIndex
# or Intervals searches in IntervalIndex
if isinstance(indices, (MultiIndex, IntervalIndex)):
return
# nothing to test if the index is empty
if indices.empty:
return
value = indices[0]
# determine the expected results (handle dupes for 'right')
expected_left, expected_right = 0, (indices == value).argmin()
if expected_right == 0:
# all values are the same, expected_right should be length
expected_right = len(indices)
# test _searchsorted_monotonic in all cases
# test searchsorted only for increasing
if indices.is_monotonic_increasing:
ssm_left = indices._searchsorted_monotonic(value, side="left")
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side="right")
assert is_scalar(ssm_right)
assert expected_right == ssm_right
ss_left = indices.searchsorted(value, side="left")
assert is_scalar(ss_left)
assert expected_left == ss_left
ss_right = indices.searchsorted(value, side="right")
assert is_scalar(ss_right)
assert expected_right == ss_right
elif indices.is_monotonic_decreasing:
ssm_left = indices._searchsorted_monotonic(value, side="left")
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side="right")
assert is_scalar(ssm_right)
assert expected_right == ssm_right
else:
# non-monotonic should raise.
with pytest.raises(ValueError):
indices._searchsorted_monotonic(value, side="left")

View File

@@ -0,0 +1,123 @@
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_slice_keep_name():
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
assert x[1:].names == x.names
def test_index_name_retained():
# GH9857
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
result = result.set_index("z")
result.loc[10] = [9, 10]
df_expected = pd.DataFrame(
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
)
df_expected = df_expected.set_index("z")
tm.assert_frame_equal(result, df_expected)
def test_changing_names(idx):
# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()
# changing names should change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
def test_take_preserve_name(idx):
taken = idx.take([3, 0, 1])
assert taken.names == idx.names
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx1.names == ["MyName1", "MyName2"]
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx2.names == ["NewName1", "NewName2"]
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx3.names == ["NewName1", "NewName2"]
def test_names(idx, index_names):
# names are assigned in setup
names = index_names
level_names = [level.name for level in idx.levels]
assert names == level_names
# setting bad names on existing
index = idx
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", list(index.names) + ["third"])
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = idx.levels
major_codes, minor_codes = idx.codes
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first"],
)
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first", "second", "third"],
)
# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
def test_duplicate_level_names_access_raises(idx):
# GH19029
idx.names = ["foo", "foo"]
with pytest.raises(ValueError, match="name foo occurs multiple times"):
idx._get_level_number("foo")

View File

@@ -0,0 +1,96 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_partial_string_timestamp_multiindex():
# GH10331
dr = pd.date_range("2016-01-01", "2016-01-03", freq="12H")
abc = ["a", "b", "c"]
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({"c1": range(0, 15)}, index=ix)
idx = pd.IndexSlice
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
# partial string matching on a single index
for df_swap in (df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)):
df_swap = df_swap.sort_index()
just_a = df_swap.loc["a"]
result = just_a.loc["2016-01-01"]
expected = df.loc[idx[:, "a"], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
# indexing with IndexSlice
result = df.loc[idx["2016-01-01":"2016-02-01", :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[idx[:, "2016-01-01":"2016-01-01"], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError, match="'2016-01-01'"):
df["2016-01-01"]
# partial string match on year only
result = df.loc["2016"]
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc["2016-01-01"]
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc["2016-01-02 12"]
expected = df.iloc[9:12]
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[idx[:, "2016-01-02"], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
result = df.loc[("2016-01-01", "a"), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course)
with pytest.raises(KeyError, match="'2016-01-01'"):
df_swap.loc["2016-01-01"]
# GH12685 (partial string with daily resolution or below)
dr = date_range("2013-01-01", periods=100, freq="D")
ix = MultiIndex.from_product([dr, ["a", "b"]])
df = DataFrame(np.random.randn(200, 1), columns=["A"], index=ix)
result = df.loc[idx["2013-03":"2013-03", :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,105 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
def test_reindex_level(idx):
index = Index(["one"])
target, indexer = idx.reindex(index, level="second")
target2, indexer2 = index.reindex(idx, level="second")
exp_index = idx.join(index, level="second", how="right")
exp_index2 = idx.join(index, level="second", how="left")
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
with pytest.raises(TypeError, match="Fill method not supported"):
idx.reindex(idx, method="pad", level="second")
with pytest.raises(TypeError, match="Fill method not supported"):
index.reindex(index, method="bfill", level="first")
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
# GH6552
idx = idx.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ["foo", "bar"]
assert idx.reindex([])[0].names == ["foo", "bar"]
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
def test_reindex_base(idx):
idx = idx
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")
def test_reindex_non_unique():
idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
a = pd.Series(np.arange(4), index=idx)
new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
msg = "cannot handle a non-unique multi-index!"
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)

View File

@@ -0,0 +1,129 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_insert(idx):
# key contained in all levels
new_index = idx.insert(0, ("bar", "two"))
assert new_index.equal_levels(idx)
assert new_index[0] == ("bar", "two")
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
tm.assert_index_equal(new_index.levels[0], exp0)
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")
# key wrong length
msg = "Item must have length equal to number of levels"
with pytest.raises(ValueError, match=msg):
idx.insert(0, ("foo2",))
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
left.set_index(["1st", "2nd"], inplace=True)
ts = left["3rd"].copy(deep=True)
left.loc[("b", "x"), "3rd"] = 2
left.loc[("b", "a"), "3rd"] = -1
left.loc[("b", "b"), "3rd"] = 3
left.loc[("a", "x"), "3rd"] = 4
left.loc[("a", "w"), "3rd"] = 5
left.loc[("a", "a"), "3rd"] = 6
ts.loc[("b", "x")] = 2
ts.loc["b", "a"] = -1
ts.loc[("b", "b")] = 3
ts.loc["a", "x"] = 4
ts.loc[("a", "w")] = 5
ts.loc["a", "a"] = 6
right = pd.DataFrame(
[
["a", "b", 0],
["b", "d", 1],
["b", "x", 2],
["b", "a", -1],
["b", "b", 3],
["a", "x", 4],
["a", "w", 5],
["a", "a", 6],
],
columns=["1st", "2nd", "3rd"],
)
right.set_index(["1st", "2nd"], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right["3rd"])
# GH9250
idx = (
[("test1", i) for i in range(5)]
+ [("test2", i) for i in range(6)]
+ [("test", 17), ("test", 18)]
)
left = pd.Series(np.linspace(0, 10, 11), pd.MultiIndex.from_tuples(idx[:-2]))
left.loc[("test", 17)] = 11
left.loc[("test", 18)] = 12
right = pd.Series(np.linspace(0, 12, 13), pd.MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_append(idx):
result = idx[:3].append(idx[3:])
assert result.equals(idx)
foos = [idx[:1], idx[1:3], idx[3:]]
result = foos[0].append(foos[1:])
assert result.equals(idx)
# empty
result = idx.append([])
assert result.equals(idx)
def test_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
def test_insert_base(idx):
result = idx[1:4]
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(idx):
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
with pytest.raises((IndexError, ValueError)):
# Exception raised depends on NumPy version.
idx.delete(len(idx))

View File

@@ -0,0 +1,363 @@
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex, Series
import pandas.util.testing as tm
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_set_ops_error_cases(idx, case, sort, method):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case, sort=sort)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_base(idx, sort):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second, sort=sort)
if sort is None:
tm.assert_index_equal(intersect, second.sort_values())
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case, sort=sort)
if sort is None:
tm.assert_index_equal(result, second.sort_values())
assert tm.equalContents(result, second)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [None, False])
def test_union_base(idx, sort):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second, sort=sort)
if sort is None:
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.union(case, sort=sort)
if sort is None:
tm.assert_index_equal(result, everything.sort_values())
assert tm.equalContents(result, everything)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [None, False])
def test_difference_base(idx, sort):
second = idx[4:]
answer = idx[:4]
result = idx.difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
assert result.equals(answer)
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = idx.difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
idx.difference([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [None, False])
def test_symmetric_difference(idx, sort):
first = idx[1:]
second = idx[:-1]
answer = idx[[-1, 0]]
result = first.symmetric_difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3], sort=sort)
def test_empty(idx):
# GH 15270
assert not idx.empty
assert idx[:0].empty
@pytest.mark.parametrize("sort", [None, False])
def test_difference(idx, sort):
first = idx
result = first.difference(idx[-3:], sort=sort)
vals = idx[:-3].values
if sort is None:
vals = sorted(vals)
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == idx.names
tm.assert_index_equal(result, expected)
# empty difference: reflexive
result = idx.difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: superset
result = idx[-3:].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: degenerate
result = idx[:0].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# names not the same
chunklet = idx[-3:]
chunklet.names = ["foo", "baz"]
result = first.difference(chunklet, sort=sort)
assert result.names == (None, None)
# empty, but non-equal
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values, sort=sort)
assert result.equals(first[:0])
# name from empty array
result = first.difference([], sort=sort)
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([("foo", "one")], sort=sort)
expected = pd.MultiIndex.from_tuples(
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
)
expected.names = first.names
assert first.names == result.names
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3, 4, 5], sort=sort)
def test_difference_sort_special():
# GH-24959
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
# sort=None, the default
result = idx.difference([])
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_difference_sort_special_true():
# TODO decide on True behaviour
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
result = idx.difference([], sort=True)
expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_difference_sort_incomparable():
# GH-24959
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
# sort=None, the default
# MultiIndex.difference deviates here from other difference
# implementations in not catching the TypeError
with pytest.raises(TypeError):
result = idx.difference(other)
# sort=False
result = idx.difference(other, sort=False)
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_difference_sort_incomparable_true():
# TODO decide on True behaviour
# # sort=True, raises
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
with pytest.raises(TypeError):
idx.difference(other, sort=True)
@pytest.mark.parametrize("sort", [None, False])
def test_union(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_union = piece1.union(piece2, sort=sort)
if sort is None:
tm.assert_index_equal(the_union, idx.sort_values())
assert tm.equalContents(the_union, idx)
# corner case, pass self or empty thing:
the_union = idx.union(idx, sort=sort)
assert the_union is idx
the_union = idx.union(idx[:0], sort=sort)
assert the_union is idx
# won't work in python 3
# tuples = _index.values
# result = _index[:4] | tuples[4:]
# assert result.equals(tuples)
# not valid for python 3
# def test_union_with_regular_index(self):
# other = Index(['A', 'B', 'C'])
# result = other.union(idx)
# assert ('foo', 'one') in result
# assert 'B' in result
# result2 = _index.union(other)
# assert result.equals(result2)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_int = piece1.intersection(piece2, sort=sort)
if sort is None:
tm.assert_index_equal(the_int, idx[3:5])
assert tm.equalContents(the_int, idx[3:5])
# corner case, pass self
the_int = idx.intersection(idx, sort=sort)
assert the_int is idx
# empty intersection: disjoint
empty = idx[:2].intersection(idx[2:], sort=sort)
expected = idx[:0]
assert empty.equals(expected)
# can't do in python 3
# tuples = _index.values
# result = _index & tuples
# assert result.equals(tuples)
def test_intersect_equal_sort():
# GH-24959
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_intersect_equal_sort_true():
# TODO decide on True behaviour
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
sorted_ = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_empty(slice_):
# https://github.com/pandas-dev/pandas/issues/24959
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
# MultiIndex does not special case empty.union(idx)
# tm.assert_index_equal(other.union(idx), idx)
# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_union_sort_other_empty_sort(slice_):
# TODO decide on True behaviour
# # sort=True
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
other = idx[:0]
result = idx.union(other, sort=True)
expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_union_sort_other_incomparable():
# https://github.com/pandas-dev/pandas/issues/24959
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
# default, sort=None
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_union_sort_other_incomparable_sort():
# TODO decide on True behaviour
# # sort=True
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
with pytest.raises(TypeError, match="Cannot compare"):
idx.union(idx[:1], sort=True)
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_disallow_true(method):
idx1 = pd.MultiIndex.from_product([["a", "b"], [1, 2]])
idx2 = pd.MultiIndex.from_product([["b", "c"], [1, 2]])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=True)

View File

@@ -0,0 +1,276 @@
import numpy as np
import pytest
from pandas.errors import PerformanceWarning, UnsortedIndexError
import pandas as pd
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_sortlevel(idx):
import random
tuples = list(idx)
random.shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining():
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic():
tuples = [
("bar", "one"),
("foo", "two"),
("qux", "two"),
("foo", "one"),
("baz", "two"),
("qux", "one"),
]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sort(indices):
with pytest.raises(TypeError):
indices.sort()
def test_numpy_argsort(idx):
result = np.argsort(idx)
expected = idx.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, kind="mergesort")
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, order=("a", "b"))
def test_unsortedindex():
# GH 11897
mi = pd.MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
df = pd.DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)["z", "a"]
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
with pytest.raises(UnsortedIndexError):
df.loc(axis=0)["z", slice("a")]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)["z", :]) == 2
with pytest.raises(KeyError, match="'q'"):
df.loc(axis=0)["q", :]
def test_unsortedindex_doc_examples():
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
dfm = DataFrame(
{"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
)
dfm = dfm.set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, "z")]
with pytest.raises(UnsortedIndexError):
dfm.loc[(0, "y"):(1, "z")]
assert not dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, "z")]
dfm.loc[(0, "y"):(1, "z")]
assert dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 2
def test_reconstruct_sort():
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert mi.is_lexsorted()
assert mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert recons.is_lexsorted()
assert recons.is_monotonic
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = pd.MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(
levels=[["b", "d", "a"], [1, 2, 3]],
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=["col1", "col2"],
)
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused():
# xref to GH 2770
df = DataFrame(
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
columns=["first", "second", "third"],
)
df2 = df.set_index(["first", "second"], drop=False)
df2 = df2[df2["first"] != "deleteMe"]
# removed levels are there
expected = MultiIndex(
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
codes=[[1, 2], [1, 2]],
names=["first", "second"],
)
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(
levels=[["keepMe", "keepMeToo"], [2, 3]],
codes=[[0, 1], [0, 1]],
names=["first", "second"],
)
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize(
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
)
def test_remove_unused_levels_large(first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.RandomState(4) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(
dict(
first=rng.randint(0, 1 << 13, size).astype(first_type),
second=rng.randint(0, 1 << 10, size).astype(second_type),
third=rng.rand(size),
)
)
df = df.groupby(["first", "second"]).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(["first", "second"]).index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
@pytest.mark.parametrize(
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
)
def test_remove_unused_nan(level0, level1):
# GH 18417
mi = pd.MultiIndex(
levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]
)
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert "unused" not in result.levels[level]
def test_argsort(idx):
result = idx.argsort()
expected = idx.values.argsort()
tm.assert_numpy_array_equal(result, expected)