8th day of python challenges 111-117
This commit is contained in:
910
venv/lib/python3.6/site-packages/pandas/tests/indexes/common.py
Normal file
910
venv/lib/python3.6/site-packages/pandas/tests/indexes/common.py
Normal file
@@ -0,0 +1,910 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Int64Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
PeriodIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
UInt64Index,
|
||||
isna,
|
||||
)
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class Base:
|
||||
""" base class for index sub-class tests """
|
||||
|
||||
_holder = None
|
||||
_compat_props = ["shape", "ndim", "size", "nbytes"]
|
||||
|
||||
def setup_indices(self):
|
||||
for name, idx in self.indices.items():
|
||||
setattr(self, name, idx)
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
# need an object to create with
|
||||
msg = (
|
||||
r"Index\(\.\.\.\) must be called with a collection of some"
|
||||
r" kind, None was passed|"
|
||||
r"__new__\(\) missing 1 required positional argument: 'data'|"
|
||||
r"__new__\(\) takes at least 2 arguments \(1 given\)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self._holder()
|
||||
|
||||
def test_to_series(self):
|
||||
# assert that we are creating a copy of the index
|
||||
|
||||
idx = self.create_index()
|
||||
s = idx.to_series()
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name == idx.name
|
||||
|
||||
def test_to_series_with_arguments(self):
|
||||
# GH18699
|
||||
|
||||
# index kwarg
|
||||
idx = self.create_index()
|
||||
s = idx.to_series(index=idx)
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is idx
|
||||
assert s.name == idx.name
|
||||
|
||||
# name kwarg
|
||||
idx = self.create_index()
|
||||
s = idx.to_series(name="__test")
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name != idx.name
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "new_name"])
|
||||
def test_to_frame(self, name):
|
||||
# see GH-15230, GH-22580
|
||||
idx = self.create_index()
|
||||
|
||||
if name:
|
||||
idx_name = name
|
||||
else:
|
||||
idx_name = idx.name or 0
|
||||
|
||||
df = idx.to_frame(name=idx_name)
|
||||
|
||||
assert df.index is idx
|
||||
assert len(df.columns) == 1
|
||||
assert df.columns[0] == idx_name
|
||||
assert df[idx_name].values is not idx.values
|
||||
|
||||
df = idx.to_frame(index=False, name=idx_name)
|
||||
assert df.index is not idx
|
||||
|
||||
def test_to_frame_datetime_tz(self):
|
||||
# GH 25809
|
||||
idx = pd.date_range(start="2019-01-01", end="2019-01-30", freq="D")
|
||||
idx = idx.tz_localize("UTC")
|
||||
result = idx.to_frame()
|
||||
expected = pd.DataFrame(idx, index=idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_shift(self):
|
||||
|
||||
# GH8083 test the base class for shift
|
||||
idx = self.create_index()
|
||||
msg = "Not supported for type {}".format(type(idx).__name__)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
def test_create_index_existing_name(self):
|
||||
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
expected = self.create_index()
|
||||
if not isinstance(expected, MultiIndex):
|
||||
expected.name = "foo"
|
||||
result = pd.Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = pd.Index(expected, name="bar")
|
||||
expected.name = "bar"
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
expected.names = ["foo", "bar"]
|
||||
result = pd.Index(expected)
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["foo", "bar"],
|
||||
),
|
||||
)
|
||||
|
||||
result = pd.Index(expected, names=["A", "B"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["A", "B"],
|
||||
),
|
||||
)
|
||||
|
||||
def test_numeric_compat(self):
|
||||
|
||||
idx = self.create_index()
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
def test_logical_compat(self):
|
||||
idx = self.create_index()
|
||||
with pytest.raises(TypeError, match="cannot perform all"):
|
||||
idx.all()
|
||||
with pytest.raises(TypeError, match="cannot perform any"):
|
||||
idx.any()
|
||||
|
||||
def test_boolean_context_compat(self):
|
||||
|
||||
# boolean context compat
|
||||
idx = self.create_index()
|
||||
|
||||
with pytest.raises(ValueError, match="The truth value of a"):
|
||||
if idx:
|
||||
pass
|
||||
|
||||
def test_reindex_base(self):
|
||||
idx = self.create_index()
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
def test_get_indexer_consistency(self):
|
||||
# See GH 16819
|
||||
for name, index in self.indices.items():
|
||||
if isinstance(index, IntervalIndex):
|
||||
continue
|
||||
|
||||
if index.is_unique or isinstance(index, CategoricalIndex):
|
||||
indexer = index.get_indexer(index[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
else:
|
||||
e = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=e):
|
||||
index.get_indexer(index[0:2])
|
||||
|
||||
indexer, _ = index.get_indexer_non_unique(index[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
|
||||
def test_ndarray_compat_properties(self):
|
||||
idx = self.create_index()
|
||||
assert idx.T.equals(idx)
|
||||
assert idx.transpose().equals(idx)
|
||||
|
||||
values = idx.values
|
||||
for prop in self._compat_props:
|
||||
assert getattr(idx, prop) == getattr(values, prop)
|
||||
|
||||
# test for validity
|
||||
idx.nbytes
|
||||
idx.values.nbytes
|
||||
|
||||
def test_repr_roundtrip(self):
|
||||
|
||||
idx = self.create_index()
|
||||
tm.assert_index_equal(eval(repr(idx)), idx)
|
||||
|
||||
def test_str(self):
|
||||
|
||||
# test the string repr
|
||||
idx = self.create_index()
|
||||
idx.name = "foo"
|
||||
assert "'foo'" in str(idx)
|
||||
assert idx.__class__.__name__ in str(idx)
|
||||
|
||||
def test_repr_max_seq_item_setting(self):
|
||||
# GH10182
|
||||
idx = self.create_index()
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
def test_copy_name(self):
|
||||
# gh-12309: Check that the "name" argument
|
||||
# passed at initialization is honored.
|
||||
|
||||
for name, index in self.indices.items():
|
||||
if isinstance(index, MultiIndex):
|
||||
continue
|
||||
|
||||
first = index.__class__(index, copy=True, name="mario")
|
||||
second = first.__class__(first, copy=False)
|
||||
|
||||
# Even though "copy=False", we want a new object.
|
||||
assert first is not second
|
||||
|
||||
# Not using tm.assert_index_equal() since names differ.
|
||||
assert index.equals(first)
|
||||
|
||||
assert first.name == "mario"
|
||||
assert second.name == "mario"
|
||||
|
||||
s1 = Series(2, index=first)
|
||||
s2 = Series(3, index=second[:-1])
|
||||
|
||||
if not isinstance(index, CategoricalIndex):
|
||||
# See gh-13365
|
||||
s3 = s1 * s2
|
||||
assert s3.index.name == "mario"
|
||||
|
||||
def test_ensure_copied_data(self):
|
||||
# Check the "copy" argument of each Index.__new__ is honoured
|
||||
# GH12309
|
||||
for name, index in self.indices.items():
|
||||
init_kwargs = {}
|
||||
if isinstance(index, PeriodIndex):
|
||||
# Needs "freq" specification:
|
||||
init_kwargs["freq"] = index.freq
|
||||
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
|
||||
# RangeIndex cannot be initialized from data
|
||||
# MultiIndex and CategoricalIndex are tested separately
|
||||
continue
|
||||
|
||||
index_type = index.__class__
|
||||
result = index_type(index.values, copy=True, **init_kwargs)
|
||||
tm.assert_index_equal(index, result)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._ndarray_values, result._ndarray_values, check_same="copy"
|
||||
)
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
# .values an object array of Period, thus copied
|
||||
result = index_type(ordinal=index.asi8, copy=False, **init_kwargs)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._ndarray_values, result._ndarray_values, check_same="same"
|
||||
)
|
||||
elif isinstance(index, IntervalIndex):
|
||||
# checked in test_interval.py
|
||||
pass
|
||||
else:
|
||||
result = index_type(index.values, copy=False, **init_kwargs)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.values, result.values, check_same="same"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._ndarray_values, result._ndarray_values, check_same="same"
|
||||
)
|
||||
|
||||
def test_memory_usage(self):
|
||||
for name, index in self.indices.items():
|
||||
result = index.memory_usage()
|
||||
if len(index):
|
||||
index.get_loc(index[0])
|
||||
result2 = index.memory_usage()
|
||||
result3 = index.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(index, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if index.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
def test_argsort(self):
|
||||
for k, ind in self.indices.items():
|
||||
|
||||
# separately tested
|
||||
if k in ["catIndex"]:
|
||||
continue
|
||||
|
||||
result = ind.argsort()
|
||||
expected = np.array(ind).argsort()
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
for k, ind in self.indices.items():
|
||||
result = np.argsort(ind)
|
||||
expected = ind.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(ind), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(ind, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(ind, kind="mergesort")
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(ind, order=("a", "b"))
|
||||
|
||||
def test_take(self):
|
||||
indexer = [4, 3, 0, 2]
|
||||
for k, ind in self.indices.items():
|
||||
|
||||
# separate
|
||||
if k in ["boolIndex", "tuples", "empty"]:
|
||||
continue
|
||||
|
||||
result = ind.take(indexer)
|
||||
expected = ind[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
|
||||
# GH 10791
|
||||
with pytest.raises(AttributeError):
|
||||
ind.freq
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = self.create_index()
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
def test_repeat(self):
|
||||
rep = 2
|
||||
i = self.create_index()
|
||||
expected = pd.Index(i.values.repeat(rep), name=i.name)
|
||||
tm.assert_index_equal(i.repeat(rep), expected)
|
||||
|
||||
i = self.create_index()
|
||||
rep = np.arange(len(i))
|
||||
expected = pd.Index(i.values.repeat(rep), name=i.name)
|
||||
tm.assert_index_equal(i.repeat(rep), expected)
|
||||
|
||||
def test_numpy_repeat(self):
|
||||
rep = 2
|
||||
i = self.create_index()
|
||||
expected = i.repeat(rep)
|
||||
tm.assert_index_equal(np.repeat(i, rep), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(i, rep, axis=0)
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where(self, klass):
|
||||
i = self.create_index()
|
||||
|
||||
cond = [True] * len(i)
|
||||
result = i.where(klass(cond))
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * len(i[1:])
|
||||
expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_ops_error_cases(self, case, method):
|
||||
for name, idx in self.indices.items():
|
||||
# non-iterable input
|
||||
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case)
|
||||
|
||||
def test_intersection_base(self):
|
||||
for name, idx in self.indices.items():
|
||||
first = idx[:5]
|
||||
second = idx[:3]
|
||||
intersect = first.intersection(second)
|
||||
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
assert tm.equalContents(intersect, second)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
result = first.intersection(case)
|
||||
assert tm.equalContents(result, second)
|
||||
|
||||
if isinstance(idx, MultiIndex):
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3])
|
||||
|
||||
def test_union_base(self):
|
||||
for name, idx in self.indices.items():
|
||||
first = idx[3:]
|
||||
second = idx[:5]
|
||||
everything = idx
|
||||
union = first.union(second)
|
||||
assert tm.equalContents(union, everything)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
result = first.union(case)
|
||||
assert tm.equalContents(result, everything)
|
||||
|
||||
if isinstance(idx, MultiIndex):
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3])
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_base(self, sort):
|
||||
for name, idx in self.indices.items():
|
||||
first = idx[2:]
|
||||
second = idx[:4]
|
||||
answer = idx[4:]
|
||||
result = first.difference(second, sort)
|
||||
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
assert tm.equalContents(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
|
||||
assert result.__class__ == answer.__class__
|
||||
tm.assert_numpy_array_equal(
|
||||
result.sort_values().asi8, answer.sort_values().asi8
|
||||
)
|
||||
else:
|
||||
result = first.difference(case, sort)
|
||||
assert tm.equalContents(result, answer)
|
||||
|
||||
if isinstance(idx, MultiIndex):
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3], sort)
|
||||
|
||||
def test_symmetric_difference(self):
|
||||
for name, idx in self.indices.items():
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
answer = idx[[0, -1]]
|
||||
result = first.symmetric_difference(second)
|
||||
assert tm.equalContents(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
pass
|
||||
else:
|
||||
result = first.symmetric_difference(case)
|
||||
assert tm.equalContents(result, answer)
|
||||
|
||||
if isinstance(idx, MultiIndex):
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3])
|
||||
|
||||
def test_insert_base(self):
|
||||
|
||||
for name, idx in self.indices.items():
|
||||
result = idx[1:4]
|
||||
|
||||
if not len(idx):
|
||||
continue
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
def test_delete_base(self):
|
||||
|
||||
for name, idx in self.indices.items():
|
||||
|
||||
if not len(idx):
|
||||
continue
|
||||
|
||||
if isinstance(idx, RangeIndex):
|
||||
# tested in class
|
||||
continue
|
||||
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
with pytest.raises((IndexError, ValueError)):
|
||||
# either depending on numpy version
|
||||
idx.delete(len(idx))
|
||||
|
||||
def test_equals(self):
|
||||
|
||||
for name, idx in self.indices.items():
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
# Cannot pass in non-int64 dtype to RangeIndex
|
||||
if not isinstance(idx, RangeIndex):
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
def test_equals_op(self):
|
||||
# GH9947, GH10637
|
||||
index_a = self.create_index()
|
||||
if isinstance(index_a, PeriodIndex):
|
||||
pytest.skip("Skip check for PeriodIndex")
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
|
||||
msg = "Lengths must match|could not be broadcast"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
def test_hasnans_isnans(self):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
|
||||
for name, index in self.indices.items():
|
||||
if isinstance(index, MultiIndex):
|
||||
pass
|
||||
else:
|
||||
idx = index.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index.copy()
|
||||
values = np.asarray(idx.values)
|
||||
|
||||
if len(index) == 0:
|
||||
continue
|
||||
elif isinstance(index, DatetimeIndexOpsMixin):
|
||||
values[1] = iNaT
|
||||
elif isinstance(index, (Int64Index, UInt64Index)):
|
||||
continue
|
||||
else:
|
||||
values[1] = np.nan
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
idx = index.__class__(values, freq=index.freq)
|
||||
else:
|
||||
idx = index.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
def test_fillna(self):
|
||||
# GH 11343
|
||||
for name, index in self.indices.items():
|
||||
if len(index) == 0:
|
||||
pass
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy()
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
else:
|
||||
idx = index.copy()
|
||||
result = idx.fillna(idx[0])
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
msg = "'value' must be a scalar, passed: "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.fillna([idx[0]])
|
||||
|
||||
idx = index.copy()
|
||||
values = np.asarray(idx.values)
|
||||
|
||||
if isinstance(index, DatetimeIndexOpsMixin):
|
||||
values[1] = iNaT
|
||||
elif isinstance(index, (Int64Index, UInt64Index)):
|
||||
continue
|
||||
else:
|
||||
values[1] = np.nan
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
idx = index.__class__(values, freq=index.freq)
|
||||
else:
|
||||
idx = index.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
def test_nulls(self):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
for name, index in self.indices.items():
|
||||
if len(index) == 0:
|
||||
tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool))
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy()
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
else:
|
||||
|
||||
if not index.hasnans:
|
||||
tm.assert_numpy_array_equal(
|
||||
index.isna(), np.zeros(len(index), dtype=bool)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.notna(), np.ones(len(index), dtype=bool)
|
||||
)
|
||||
else:
|
||||
result = isna(index)
|
||||
tm.assert_numpy_array_equal(index.isna(), result)
|
||||
tm.assert_numpy_array_equal(index.notna(), ~result)
|
||||
|
||||
def test_empty(self):
|
||||
# GH 15270
|
||||
index = self.create_index()
|
||||
assert not index.empty
|
||||
assert index[:0].empty
|
||||
|
||||
def test_join_self_unique(self, join_type):
|
||||
index = self.create_index()
|
||||
if index.is_unique:
|
||||
joined = index.join(index, how=join_type)
|
||||
assert (index == joined).all()
|
||||
|
||||
def test_map(self):
|
||||
# callable
|
||||
index = self.create_index()
|
||||
|
||||
# we don't infer UInt64
|
||||
if isinstance(index, pd.UInt64Index):
|
||||
expected = index.astype("int64")
|
||||
else:
|
||||
expected = index
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, index: {i: e for e, i in zip(values, index)},
|
||||
lambda values, index: pd.Series(values, index),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(self, mapper):
|
||||
|
||||
index = self.create_index()
|
||||
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
|
||||
pytest.skip("skipping tests for {}".format(type(index)))
|
||||
|
||||
identity = mapper(index.values, index)
|
||||
|
||||
# we don't infer to UInt64 for a dict
|
||||
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
|
||||
expected = index.astype("int64")
|
||||
else:
|
||||
expected = index
|
||||
|
||||
result = index.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = pd.Index([np.nan] * len(index))
|
||||
result = index.map(mapper(expected, index))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_putmask_with_wrong_mask(self):
|
||||
# GH18368
|
||||
index = self.create_index()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
index.putmask(np.ones(len(index) + 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
index.putmask(np.ones(len(index) - 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
index.putmask("foo", 1)
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False])
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(self, copy, name, ordered):
|
||||
# GH 18630
|
||||
index = self.create_index()
|
||||
if name:
|
||||
index = index.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=ordered)
|
||||
result = index.astype(dtype, copy=copy)
|
||||
expected = CategoricalIndex(index.values, name=name, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
|
||||
result = index.astype(dtype, copy=copy)
|
||||
expected = CategoricalIndex(index.values, name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
result = index.astype("category", copy=copy)
|
||||
expected = CategoricalIndex(index.values, name=name)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_unique(self):
|
||||
# initialize a unique index
|
||||
index = self.create_index().drop_duplicates()
|
||||
assert index.is_unique is True
|
||||
|
||||
# empty index should be unique
|
||||
index_empty = index[:0]
|
||||
assert index_empty.is_unique is True
|
||||
|
||||
# test basic dupes
|
||||
index_dup = index.insert(0, index[0])
|
||||
assert index_dup.is_unique is False
|
||||
|
||||
# single NA should be unique
|
||||
index_na = index.insert(0, np.nan)
|
||||
assert index_na.is_unique is True
|
||||
|
||||
# multiple NA should not be unique
|
||||
index_na_dup = index_na.insert(0, np.nan)
|
||||
assert index_na_dup.is_unique is False
|
||||
@@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.indexes.api import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
indices_list = [
|
||||
tm.makeUnicodeIndex(100),
|
||||
tm.makeStringIndex(100),
|
||||
tm.makeDateIndex(100),
|
||||
tm.makePeriodIndex(100),
|
||||
tm.makeTimedeltaIndex(100),
|
||||
tm.makeIntIndex(100),
|
||||
tm.makeUIntIndex(100),
|
||||
tm.makeRangeIndex(100),
|
||||
tm.makeFloatIndex(100),
|
||||
Index([True, False]),
|
||||
tm.makeCategoricalIndex(100),
|
||||
tm.makeIntervalIndex(100),
|
||||
Index([]),
|
||||
MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
|
||||
Index([0, 0, 1, 1, 2, 2]),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=indices_list, ids=lambda x: type(x).__name__)
|
||||
def indices(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
|
||||
def one(request):
|
||||
# zero-dim integer array behaves like an integer
|
||||
return request.param
|
||||
|
||||
|
||||
zeros = [
|
||||
box([0] * 5, dtype=dtype)
|
||||
for box in [pd.Index, np.array]
|
||||
for dtype in [np.int64, np.uint64, np.float64]
|
||||
]
|
||||
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
|
||||
zeros.extend([0, 0.0])
|
||||
|
||||
|
||||
@pytest.fixture(params=zeros)
|
||||
def zero(request):
|
||||
# For testing division by (or of) zero for Index with length 5, this
|
||||
# gives several scalar-zeros and length-5 vector-zeros
|
||||
return request.param
|
||||
@@ -0,0 +1,102 @@
|
||||
""" generic datetimelike tests """
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import Base
|
||||
|
||||
|
||||
class DatetimeLike(Base):
|
||||
def test_argmax_axis_invalid(self):
|
||||
# GH#23081
|
||||
rng = self.create_index()
|
||||
with pytest.raises(ValueError):
|
||||
rng.argmax(axis=1)
|
||||
with pytest.raises(ValueError):
|
||||
rng.argmin(axis=2)
|
||||
with pytest.raises(ValueError):
|
||||
rng.min(axis=-2)
|
||||
with pytest.raises(ValueError):
|
||||
rng.max(axis=-3)
|
||||
|
||||
def test_can_hold_identifiers(self):
|
||||
idx = self.create_index()
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is False
|
||||
|
||||
def test_shift_identity(self):
|
||||
|
||||
idx = self.create_index()
|
||||
tm.assert_index_equal(idx, idx.shift(0))
|
||||
|
||||
def test_str(self):
|
||||
|
||||
# test the string repr
|
||||
idx = self.create_index()
|
||||
idx.name = "foo"
|
||||
assert not "length={}".format(len(idx)) in str(idx)
|
||||
assert "'foo'" in str(idx)
|
||||
assert idx.__class__.__name__ in str(idx)
|
||||
|
||||
if hasattr(idx, "tz"):
|
||||
if idx.tz is not None:
|
||||
assert idx.tz in str(idx)
|
||||
if hasattr(idx, "freq"):
|
||||
assert "freq='{idx.freqstr}'".format(idx=idx) in str(idx)
|
||||
|
||||
def test_view(self):
|
||||
i = self.create_index()
|
||||
|
||||
i_view = i.view("i8")
|
||||
result = self._holder(i)
|
||||
tm.assert_index_equal(result, i)
|
||||
|
||||
i_view = i.view(self._holder)
|
||||
result = self._holder(i)
|
||||
tm.assert_index_equal(result, i_view)
|
||||
|
||||
def test_map_callable(self):
|
||||
expected = self.index + self.index.freq
|
||||
result = self.index.map(lambda x: x + x.freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# map to NaT
|
||||
result = self.index.map(lambda x: pd.NaT if x == self.index[0] else x)
|
||||
expected = pd.Index([pd.NaT] + self.index[1:].tolist())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, index: {i: e for e, i in zip(values, index)},
|
||||
lambda values, index: pd.Series(values, index),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(self, mapper):
|
||||
expected = self.index + self.index.freq
|
||||
|
||||
# don't compare the freqs
|
||||
if isinstance(expected, pd.DatetimeIndex):
|
||||
expected.freq = None
|
||||
|
||||
result = self.index.map(mapper(expected, self.index))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = pd.Index([pd.NaT] + self.index[1:].tolist())
|
||||
result = self.index.map(mapper(expected, self.index))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty map; these map to np.nan because we cannot know
|
||||
# to re-infer things
|
||||
expected = pd.Index([np.nan] * len(self.index))
|
||||
result = self.index.map(mapper([], []))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_asobject_deprecated(self):
|
||||
# GH18572
|
||||
d = self.create_index()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
i = d.asobject
|
||||
assert isinstance(i, pd.Index)
|
||||
@@ -0,0 +1,123 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas.errors import NullFrequencyError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Series, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexArithmetic:
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# DatetimeIndex.shift is used in integer addition
|
||||
|
||||
def test_dti_shift_tzaware(self, tz_naive_fixture):
|
||||
# GH#9903
|
||||
tz = tz_naive_fixture
|
||||
idx = pd.DatetimeIndex([], name="xxx", tz=tz)
|
||||
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
|
||||
tm.assert_index_equal(idx.shift(3, freq="H"), idx)
|
||||
|
||||
idx = pd.DatetimeIndex(
|
||||
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(3, freq="H"), exp)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(-3, freq="H"), exp)
|
||||
|
||||
def test_dti_shift_freqs(self):
|
||||
# test shift for DatetimeIndex and non DatetimeIndex
|
||||
# GH#8083
|
||||
drange = pd.date_range("20130101", periods=5)
|
||||
result = drange.shift(1)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = drange.shift(-1)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = drange.shift(3, freq="2D")
|
||||
expected = pd.DatetimeIndex(
|
||||
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_shift_int(self):
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng + 5
|
||||
|
||||
expected = rng.shift(5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng - 5
|
||||
|
||||
expected = rng.shift(-5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_shift_no_freq(self):
|
||||
# GH#19147
|
||||
dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
|
||||
with pytest.raises(NullFrequencyError):
|
||||
dti.shift(2)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_shift_localized(self, tzstr):
|
||||
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
|
||||
dr_tz = dr.tz_localize(tzstr)
|
||||
|
||||
result = dr_tz.shift(1, "10T")
|
||||
assert result.tz == dr_tz.tz
|
||||
|
||||
def test_dti_shift_across_dst(self):
|
||||
# GH 8616
|
||||
idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H")
|
||||
s = Series(index=idx[:-1])
|
||||
result = s.shift(freq="H")
|
||||
expected = Series(index=idx[1:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"shift, result_time",
|
||||
[
|
||||
[0, "2014-11-14 00:00:00"],
|
||||
[-1, "2014-11-13 23:00:00"],
|
||||
[1, "2014-11-14 01:00:00"],
|
||||
],
|
||||
)
|
||||
def test_dti_shift_near_midnight(self, shift, result_time):
|
||||
# GH 8616
|
||||
dt = datetime(2014, 11, 14, 0)
|
||||
dt_est = pytz.timezone("EST").localize(dt)
|
||||
s = Series(data=[1], index=[dt_est])
|
||||
result = s.shift(shift, freq="H")
|
||||
expected = Series(1, index=DatetimeIndex([result_time], tz="EST"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,378 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil
|
||||
from dateutil.tz import tzlocal
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Int64Index,
|
||||
NaT,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_astype(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
|
||||
|
||||
result = idx.astype(object)
|
||||
expected = Index([Timestamp("2016-05-16")] + [NaT] * 3, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(int)
|
||||
expected = Int64Index(
|
||||
[1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
result = rng.astype("i8")
|
||||
tm.assert_index_equal(result, Index(rng.asi8))
|
||||
tm.assert_numpy_array_equal(result.values, rng.asi8)
|
||||
|
||||
def test_astype_uint(self):
|
||||
arr = date_range("2000", periods=2)
|
||||
expected = pd.UInt64Index(
|
||||
np.array([946684800000000000, 946771200000000000], dtype="uint64")
|
||||
)
|
||||
|
||||
tm.assert_index_equal(arr.astype("uint64"), expected)
|
||||
tm.assert_index_equal(arr.astype("uint32"), expected)
|
||||
|
||||
def test_astype_with_tz(self):
|
||||
|
||||
# with tz
|
||||
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
|
||||
result = rng.astype("datetime64[ns]")
|
||||
expected = (
|
||||
date_range("1/1/2000", periods=10, tz="US/Eastern")
|
||||
.tz_convert("UTC")
|
||||
.tz_localize(None)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
|
||||
result = pd.Series(pd.date_range("2012-01-01", periods=3)).astype(str)
|
||||
expected = pd.Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(pd.date_range("2012-01-01", periods=3, tz="US/Eastern")).astype(
|
||||
str
|
||||
)
|
||||
expected = Series(
|
||||
[
|
||||
"2012-01-01 00:00:00-05:00",
|
||||
"2012-01-02 00:00:00-05:00",
|
||||
"2012-01-03 00:00:00-05:00",
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 18951: tz-aware to tz-aware
|
||||
idx = date_range("20170101", periods=4, tz="US/Pacific")
|
||||
result = idx.astype("datetime64[ns, US/Eastern]")
|
||||
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 18951: tz-naive to tz-aware
|
||||
idx = date_range("20170101", periods=4)
|
||||
result = idx.astype("datetime64[ns, US/Eastern]")
|
||||
expected = date_range("20170101", periods=4, tz="US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str_compat(self):
|
||||
# GH 13149, GH 13209
|
||||
# verify that we are returning NaT as a string (and not unicode)
|
||||
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
|
||||
result = idx.astype(str)
|
||||
expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str(self):
|
||||
# test astype string - #10442
|
||||
result = date_range("2012-01-01", periods=4, name="test_name").astype(str)
|
||||
expected = Index(
|
||||
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
|
||||
name="test_name",
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# test astype string with tz and name
|
||||
result = date_range(
|
||||
"2012-01-01", periods=3, name="test_name", tz="US/Eastern"
|
||||
).astype(str)
|
||||
expected = Index(
|
||||
[
|
||||
"2012-01-01 00:00:00-05:00",
|
||||
"2012-01-02 00:00:00-05:00",
|
||||
"2012-01-03 00:00:00-05:00",
|
||||
],
|
||||
name="test_name",
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# test astype string with freqH and name
|
||||
result = date_range("1/1/2011", periods=3, freq="H", name="test_name").astype(
|
||||
str
|
||||
)
|
||||
expected = Index(
|
||||
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
|
||||
name="test_name",
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# test astype string with freqH and timezone
|
||||
result = date_range(
|
||||
"3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name"
|
||||
).astype(str)
|
||||
expected = Index(
|
||||
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
|
||||
dtype=object,
|
||||
name="test_name",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_datetime64(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
|
||||
|
||||
result = idx.astype("datetime64[ns]")
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
result = idx.astype("datetime64[ns]", copy=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is idx
|
||||
|
||||
idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST")
|
||||
result = idx_tz.astype("datetime64[ns]")
|
||||
expected = DatetimeIndex(
|
||||
["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_object(self):
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
|
||||
casted = rng.astype("O")
|
||||
exp_values = list(rng)
|
||||
|
||||
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
|
||||
assert casted.tolist() == exp_values
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
||||
def test_astype_object_tz(self, tz):
|
||||
idx = pd.date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz)
|
||||
expected_list = [
|
||||
Timestamp("2013-01-31", tz=tz),
|
||||
Timestamp("2013-02-28", tz=tz),
|
||||
Timestamp("2013-03-31", tz=tz),
|
||||
Timestamp("2013-04-30", tz=tz),
|
||||
]
|
||||
expected = pd.Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
def test_astype_object_with_nat(self):
|
||||
idx = DatetimeIndex(
|
||||
[datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)],
|
||||
name="idx",
|
||||
)
|
||||
expected_list = [
|
||||
Timestamp("2013-01-01"),
|
||||
Timestamp("2013-01-02"),
|
||||
pd.NaT,
|
||||
Timestamp("2013-01-04"),
|
||||
]
|
||||
expected = pd.Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"],
|
||||
)
|
||||
def test_astype_raises(self, dtype):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
|
||||
msg = "Cannot cast DatetimeArray to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.astype(dtype)
|
||||
|
||||
def test_index_convert_to_datetime_array(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz="US/Eastern")
|
||||
rng_utc = date_range("20090415", "20090519", tz="utc")
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
def test_index_convert_to_datetime_array_explicit_pytz(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
|
||||
rng_utc = date_range("20090415", "20090519", tz=pytz.utc)
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
def test_index_convert_to_datetime_array_dateutil(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
|
||||
rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc())
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz, dtype",
|
||||
[["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]],
|
||||
)
|
||||
def test_integer_index_astype_datetime(self, tz, dtype):
|
||||
# GH 20997, 20964, 24559
|
||||
val = [pd.Timestamp("2018-01-01", tz=tz).value]
|
||||
result = pd.Index(val).astype(dtype)
|
||||
expected = pd.DatetimeIndex(["2018-01-01"], tz=tz)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestToPeriod:
|
||||
def setup_method(self, method):
|
||||
data = [
|
||||
Timestamp("2007-01-01 10:11:12.123456Z"),
|
||||
Timestamp("2007-01-01 10:11:13.789123Z"),
|
||||
]
|
||||
self.index = DatetimeIndex(data)
|
||||
|
||||
def test_to_period_millisecond(self):
|
||||
index = self.index
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# warning that timezone info will be lost
|
||||
period = index.to_period(freq="L")
|
||||
assert 2 == len(period)
|
||||
assert period[0] == Period("2007-01-01 10:11:12.123Z", "L")
|
||||
assert period[1] == Period("2007-01-01 10:11:13.789Z", "L")
|
||||
|
||||
def test_to_period_microsecond(self):
|
||||
index = self.index
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# warning that timezone info will be lost
|
||||
period = index.to_period(freq="U")
|
||||
assert 2 == len(period)
|
||||
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U")
|
||||
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz",
|
||||
["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()],
|
||||
)
|
||||
def test_to_period_tz(self, tz):
|
||||
ts = date_range("1/1/2000", "2/1/2000", tz=tz)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# GH#21333 warning that timezone info will be lost
|
||||
result = ts.to_period()[0]
|
||||
expected = ts[0].to_period()
|
||||
|
||||
assert result == expected
|
||||
|
||||
expected = date_range("1/1/2000", "2/1/2000").to_period()
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# GH#21333 warning that timezone info will be lost
|
||||
result = ts.to_period()
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"])
|
||||
def test_to_period_tz_utc_offset_consistency(self, tz):
|
||||
# GH 22905
|
||||
ts = pd.date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = ts.to_period()[0]
|
||||
expected = ts[0].to_period()
|
||||
assert result == expected
|
||||
|
||||
def test_to_period_nofreq(self):
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"])
|
||||
with pytest.raises(ValueError):
|
||||
idx.to_period()
|
||||
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer")
|
||||
assert idx.freqstr == "D"
|
||||
expected = pd.PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D")
|
||||
tm.assert_index_equal(idx.to_period(), expected)
|
||||
|
||||
# GH 7606
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
|
||||
assert idx.freqstr is None
|
||||
tm.assert_index_equal(idx.to_period(), expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_astype_category(self, tz):
|
||||
obj = pd.date_range("2000", periods=2, tz=tz)
|
||||
result = obj.astype("category")
|
||||
expected = pd.CategoricalIndex(
|
||||
[pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype("category")
|
||||
expected = expected.values
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_astype_array_fallback(self, tz):
|
||||
obj = pd.date_range("2000", periods=2, tz=tz)
|
||||
result = obj.astype(bool)
|
||||
expected = pd.Index(np.array([True, True]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype(bool)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,975 @@
|
||||
from datetime import timedelta
|
||||
from functools import partial
|
||||
from operator import attrgetter
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import OutOfBoundsDatetime, conversion
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Timestamp,
|
||||
date_range,
|
||||
datetime,
|
||||
offsets,
|
||||
to_datetime,
|
||||
)
|
||||
from pandas.core.arrays import DatetimeArray, period_array
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
@pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence])
|
||||
def test_freq_validation_with_nat(self, dt_cls):
|
||||
# GH#11587 make sure we get a useful error message when generate_range
|
||||
# raises
|
||||
msg = (
|
||||
"Inferred frequency None from passed values does not conform "
|
||||
"to passed frequency D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D")
|
||||
|
||||
def test_categorical_preserves_tz(self):
|
||||
# GH#18664 retain tz when going DTI-->Categorical-->DTI
|
||||
# TODO: parametrize over DatetimeIndex/DatetimeArray
|
||||
# once CategoricalIndex(DTA) works
|
||||
|
||||
dti = pd.DatetimeIndex(
|
||||
[pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
|
||||
)
|
||||
|
||||
ci = pd.CategoricalIndex(dti)
|
||||
carr = pd.Categorical(dti)
|
||||
cser = pd.Series(ci)
|
||||
|
||||
for obj in [ci, carr, cser]:
|
||||
result = pd.DatetimeIndex(obj)
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
def test_dti_with_period_data_raises(self):
|
||||
# GH#23675
|
||||
data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")
|
||||
|
||||
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
||||
DatetimeIndex(data)
|
||||
|
||||
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
||||
to_datetime(data)
|
||||
|
||||
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
||||
DatetimeIndex(period_array(data))
|
||||
|
||||
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
||||
to_datetime(period_array(data))
|
||||
|
||||
def test_dti_with_timedelta64_data_deprecation(self):
|
||||
# GH#23675
|
||||
data = np.array([0], dtype="m8[ns]")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = DatetimeIndex(data)
|
||||
|
||||
assert result[0] == Timestamp("1970-01-01")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = to_datetime(data)
|
||||
|
||||
assert result[0] == Timestamp("1970-01-01")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = DatetimeIndex(pd.TimedeltaIndex(data))
|
||||
|
||||
assert result[0] == Timestamp("1970-01-01")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = to_datetime(pd.TimedeltaIndex(data))
|
||||
|
||||
assert result[0] == Timestamp("1970-01-01")
|
||||
|
||||
def test_construction_caching(self):
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"dt": pd.date_range("20130101", periods=3),
|
||||
"dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
|
||||
"dt_with_null": [
|
||||
pd.Timestamp("20130101"),
|
||||
pd.NaT,
|
||||
pd.Timestamp("20130103"),
|
||||
],
|
||||
"dtns": pd.date_range("20130101", periods=3, freq="ns"),
|
||||
}
|
||||
)
|
||||
assert df.dttz.dtype.tz.zone == "US/Eastern"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
|
||||
)
|
||||
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
|
||||
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
|
||||
result = DatetimeIndex(i, **kwargs)
|
||||
tm.assert_index_equal(i, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
|
||||
)
|
||||
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
|
||||
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
|
||||
|
||||
if str(tz) in ("UTC", "tzutc()", "UTC+00:00"):
|
||||
warn = None
|
||||
else:
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, check_stacklevel=False):
|
||||
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
|
||||
expected = DatetimeIndex(i, **kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# localize into the provided tz
|
||||
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
|
||||
expected = i.tz_localize(None).tz_localize("UTC")
|
||||
tm.assert_index_equal(i2, expected)
|
||||
|
||||
# incompat tz/dtype
|
||||
msg = "cannot supply both a tz and a dtype with a tz"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")
|
||||
|
||||
def test_construction_index_with_mixed_timezones(self):
|
||||
# gh-11488: no tz results in DatetimeIndex
|
||||
result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is None
|
||||
|
||||
# same tz results in DatetimeIndex
|
||||
result = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
|
||||
tz="Asia/Tokyo",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
# same tz results in DatetimeIndex (DST)
|
||||
result = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
||||
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
|
||||
tz="US/Eastern",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
# Different tz results in Index(dtype=object)
|
||||
result = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
dtype="object",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
|
||||
result = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
dtype="object",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
|
||||
# length = 1
|
||||
result = Index([Timestamp("2011-01-01")], name="idx")
|
||||
exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is None
|
||||
|
||||
# length = 1 with tz
|
||||
result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
def test_construction_index_with_mixed_timezones_with_NaT(self):
|
||||
# see gh-11488
|
||||
result = Index(
|
||||
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is None
|
||||
|
||||
# Same tz results in DatetimeIndex
|
||||
result = Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00"),
|
||||
],
|
||||
tz="Asia/Tokyo",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
# same tz results in DatetimeIndex (DST)
|
||||
result = Index(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
|
||||
tz="US/Eastern",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
# different tz results in Index(dtype=object)
|
||||
result = Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
dtype="object",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
|
||||
result = Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
dtype="object",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
|
||||
# all NaT
|
||||
result = Index([pd.NaT, pd.NaT], name="idx")
|
||||
exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is None
|
||||
|
||||
# all NaT with tz
|
||||
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
|
||||
exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
|
||||
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is not None
|
||||
assert result.tz == exp.tz
|
||||
|
||||
def test_construction_dti_with_mixed_timezones(self):
|
||||
# GH 11488 (not changed, added explicit tests)
|
||||
|
||||
# no tz results in DatetimeIndex
|
||||
result = DatetimeIndex(
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
|
||||
# same tz results in DatetimeIndex
|
||||
result = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
|
||||
tz="Asia/Tokyo",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
|
||||
# same tz results in DatetimeIndex (DST)
|
||||
result = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
||||
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
|
||||
tz="US/Eastern",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, exp, exact=True)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
|
||||
# tz mismatch affecting to tz-aware raises TypeError/ValueError
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
DatetimeIndex(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
|
||||
msg = "cannot be converted to datetime64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
tz="Asia/Tokyo",
|
||||
name="idx",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
DatetimeIndex(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
tz="US/Eastern",
|
||||
name="idx",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# passing tz should results in DatetimeIndex, then mismatch raises
|
||||
# TypeError
|
||||
Index(
|
||||
[
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
pd.NaT,
|
||||
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
||||
],
|
||||
tz="Asia/Tokyo",
|
||||
name="idx",
|
||||
)
|
||||
|
||||
def test_construction_base_constructor(self):
|
||||
arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))
|
||||
|
||||
arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))
|
||||
|
||||
def test_construction_outofbounds(self):
|
||||
# GH 13663
|
||||
dates = [
|
||||
datetime(3000, 1, 1),
|
||||
datetime(4000, 1, 1),
|
||||
datetime(5000, 1, 1),
|
||||
datetime(6000, 1, 1),
|
||||
]
|
||||
exp = Index(dates, dtype=object)
|
||||
# coerces to object
|
||||
tm.assert_index_equal(Index(dates), exp)
|
||||
|
||||
with pytest.raises(OutOfBoundsDatetime):
|
||||
# can't create DatetimeIndex
|
||||
DatetimeIndex(dates)
|
||||
|
||||
def test_construction_with_ndarray(self):
|
||||
# GH 5152
|
||||
dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
|
||||
data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
|
||||
result = DatetimeIndex(data, freq=pd.offsets.BDay())
|
||||
expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_verify_integrity_deprecated(self):
|
||||
# GH#23919
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
DatetimeIndex(["1/1/2000"], verify_integrity=False)
|
||||
|
||||
def test_range_kwargs_deprecated(self):
|
||||
# GH#23919
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
DatetimeIndex(start="1/1/2000", end="1/10/2000", freq="D")
|
||||
|
||||
def test_integer_values_and_tz_deprecated(self):
|
||||
# GH-24559
|
||||
values = np.array([946684800000000000])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = DatetimeIndex(values, tz="US/Central")
|
||||
expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# but UTC is *not* deprecated.
|
||||
with tm.assert_produces_warning(None):
|
||||
result = DatetimeIndex(values, tz="UTC")
|
||||
expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
rng = date_range("1/1/2000", periods=10.5)
|
||||
exp = date_range("1/1/2000", periods=10)
|
||||
tm.assert_index_equal(rng, exp)
|
||||
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
date_range(start="1/1/2000", periods="foo", freq="D")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
DatetimeIndex(start="1/1/2000", end="1/10/2000")
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
DatetimeIndex("1/1/2000")
|
||||
|
||||
# generator expression
|
||||
gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
|
||||
result = DatetimeIndex(gen)
|
||||
expected = DatetimeIndex(
|
||||
[datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NumPy string array
|
||||
strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
|
||||
result = DatetimeIndex(strings)
|
||||
expected = DatetimeIndex(strings.astype("O"))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
from_ints = DatetimeIndex(expected.asi8)
|
||||
tm.assert_index_equal(from_ints, expected)
|
||||
|
||||
# string with NaT
|
||||
strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
|
||||
result = DatetimeIndex(strings)
|
||||
expected = DatetimeIndex(strings.astype("O"))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
from_ints = DatetimeIndex(expected.asi8)
|
||||
tm.assert_index_equal(from_ints, expected)
|
||||
|
||||
# non-conforming
|
||||
msg = (
|
||||
"Inferred frequency None from passed values does not conform"
|
||||
" to passed frequency D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")
|
||||
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, exactly"
|
||||
" three must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start="2011-01-01", freq="b")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(end="2011-01-01", freq="B")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(periods=10, freq="D")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["AS", "W-SUN"])
|
||||
def test_constructor_datetime64_tzformat(self, freq):
|
||||
# see GH#6572: ISO 8601 format results in pytz.FixedOffset
|
||||
idx = date_range(
|
||||
"2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
|
||||
)
|
||||
expected = date_range(
|
||||
"2013-01-01T00:00:00",
|
||||
"2016-01-01T23:59:59",
|
||||
freq=freq,
|
||||
tz=pytz.FixedOffset(-300),
|
||||
)
|
||||
tm.assert_index_equal(idx, expected)
|
||||
# Unable to use `US/Eastern` because of DST
|
||||
expected_i8 = date_range(
|
||||
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
||||
|
||||
idx = date_range(
|
||||
"2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
|
||||
)
|
||||
expected = date_range(
|
||||
"2013-01-01T00:00:00",
|
||||
"2016-01-01T23:59:59",
|
||||
freq=freq,
|
||||
tz=pytz.FixedOffset(540),
|
||||
)
|
||||
tm.assert_index_equal(idx, expected)
|
||||
expected_i8 = date_range(
|
||||
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
||||
|
||||
# Non ISO 8601 format results in dateutil.tz.tzoffset
|
||||
idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
|
||||
expected = date_range(
|
||||
"2013-01-01T00:00:00",
|
||||
"2016-01-01T23:59:59",
|
||||
freq=freq,
|
||||
tz=pytz.FixedOffset(-300),
|
||||
)
|
||||
tm.assert_index_equal(idx, expected)
|
||||
# Unable to use `US/Eastern` because of DST
|
||||
expected_i8 = date_range(
|
||||
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
||||
|
||||
idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
|
||||
expected = date_range(
|
||||
"2013-01-01T00:00:00",
|
||||
"2016-01-01T23:59:59",
|
||||
freq=freq,
|
||||
tz=pytz.FixedOffset(540),
|
||||
)
|
||||
tm.assert_index_equal(idx, expected)
|
||||
expected_i8 = date_range(
|
||||
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
||||
|
||||
def test_constructor_dtype(self):
|
||||
|
||||
# passing a dtype with a tz should localize
|
||||
idx = DatetimeIndex(
|
||||
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
|
||||
)
|
||||
expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
# if we already have a tz and its not the same, then raise
|
||||
idx = DatetimeIndex(
|
||||
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
|
||||
)
|
||||
|
||||
msg = (
|
||||
"cannot supply both a tz and a timezone-naive dtype"
|
||||
r" \(i\.e\. datetime64\[ns\]\)"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(idx, dtype="datetime64[ns]")
|
||||
|
||||
# this is effectively trying to convert tz's
|
||||
msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeIndex(idx, dtype="datetime64[ns, CET]")
|
||||
msg = "cannot supply both a tz and a dtype with a tz"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")
|
||||
|
||||
result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
|
||||
tm.assert_index_equal(idx, result)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
|
||||
def test_constructor_invalid_dtype_raises(self, dtype):
|
||||
# GH 23986
|
||||
with pytest.raises(ValueError):
|
||||
DatetimeIndex([1, 2], dtype=dtype)
|
||||
|
||||
def test_constructor_name(self):
|
||||
idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST")
|
||||
assert idx.name == "TEST"
|
||||
|
||||
def test_000constructor_resolution(self):
|
||||
# 2252
|
||||
t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
|
||||
idx = DatetimeIndex([t1])
|
||||
|
||||
assert idx.nanosecond[0] == t1.nanosecond
|
||||
|
||||
def test_disallow_setting_tz(self):
|
||||
# GH 3746
|
||||
dti = DatetimeIndex(["2010"], tz="UTC")
|
||||
with pytest.raises(AttributeError):
|
||||
dti.tz = pytz.timezone("US/Pacific")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz",
|
||||
[
|
||||
None,
|
||||
"America/Los_Angeles",
|
||||
pytz.timezone("America/Los_Angeles"),
|
||||
Timestamp("2000", tz="America/Los_Angeles").tz,
|
||||
],
|
||||
)
|
||||
def test_constructor_start_end_with_tz(self, tz):
|
||||
# GH 18595
|
||||
start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
|
||||
end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
|
||||
result = date_range(freq="D", start=start, end=end, tz=tz)
|
||||
expected = DatetimeIndex(
|
||||
["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
# Especially assert that the timezone is consistent for pytz
|
||||
assert pytz.timezone("America/Los_Angeles") is result.tz
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
|
||||
def test_constructor_with_non_normalized_pytz(self, tz):
|
||||
# GH 18595
|
||||
non_norm_tz = Timestamp("2010", tz=tz).tz
|
||||
result = DatetimeIndex(["2010"], tz=non_norm_tz)
|
||||
assert pytz.timezone(tz) is result.tz
|
||||
|
||||
def test_constructor_timestamp_near_dst(self):
|
||||
# GH 20854
|
||||
ts = [
|
||||
Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
|
||||
Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
|
||||
]
|
||||
result = DatetimeIndex(ts)
|
||||
expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO(GH-24559): Remove the xfail for the tz-aware case.
|
||||
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
|
||||
@pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
|
||||
@pytest.mark.parametrize(
|
||||
"tz, dtype",
|
||||
[
|
||||
pytest.param(
|
||||
"US/Pacific",
|
||||
"datetime64[ns, US/Pacific]",
|
||||
marks=[
|
||||
pytest.mark.xfail(),
|
||||
pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning"),
|
||||
],
|
||||
),
|
||||
[None, "datetime64[ns]"],
|
||||
],
|
||||
)
|
||||
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
|
||||
# GH 20997, 20964
|
||||
ts = Timestamp("2018-01-01", tz=tz)
|
||||
result = klass(box([ts.value]), dtype=dtype)
|
||||
expected = klass([ts])
|
||||
assert result == expected
|
||||
|
||||
# This is the desired future behavior
|
||||
@pytest.mark.xfail(reason="Future behavior", strict=False)
|
||||
@pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
|
||||
def test_construction_int_rountrip(self, tz_naive_fixture):
|
||||
# GH 12619
|
||||
# TODO(GH-24559): Remove xfail
|
||||
tz = tz_naive_fixture
|
||||
result = 1293858000000000000
|
||||
expected = DatetimeIndex([1293858000000000000], tz=tz).asi8[0]
|
||||
assert result == expected
|
||||
|
||||
def test_construction_from_replaced_timestamps_with_dst(self):
|
||||
# GH 18785
|
||||
index = pd.date_range(
|
||||
pd.Timestamp(2000, 1, 1),
|
||||
pd.Timestamp(2005, 1, 1),
|
||||
freq="MS",
|
||||
tz="Australia/Melbourne",
|
||||
)
|
||||
test = pd.DataFrame({"data": range(len(index))}, index=index)
|
||||
test = test.resample("Y").mean()
|
||||
result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index])
|
||||
expected = pd.DatetimeIndex(
|
||||
[
|
||||
"2000-06-01 00:00:00",
|
||||
"2001-06-01 00:00:00",
|
||||
"2002-06-01 00:00:00",
|
||||
"2003-06-01 00:00:00",
|
||||
"2004-06-01 00:00:00",
|
||||
"2005-06-01 00:00:00",
|
||||
],
|
||||
tz="Australia/Melbourne",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_construction_with_tz_and_tz_aware_dti(self):
|
||||
# GH 23579
|
||||
dti = date_range("2016-01-01", periods=3, tz="US/Central")
|
||||
with pytest.raises(TypeError):
|
||||
DatetimeIndex(dti, tz="Asia/Tokyo")
|
||||
|
||||
def test_construction_with_nat_and_tzlocal(self):
|
||||
tz = dateutil.tz.tzlocal()
|
||||
result = DatetimeIndex(["2018", "NaT"], tz=tz)
|
||||
expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_no_precision_warns(self):
|
||||
# GH-24753, GH-24739
|
||||
expected = pd.DatetimeIndex(["2000"], dtype="datetime64[ns]")
|
||||
|
||||
# we set the stacklevel for DatetimeIndex
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = pd.DatetimeIndex(["2000"], dtype="datetime64")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = pd.Index(["2000"], dtype="datetime64")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_wrong_precision_raises(self):
|
||||
with pytest.raises(ValueError):
|
||||
pd.DatetimeIndex(["2000"], dtype="datetime64[us]")
|
||||
|
||||
|
||||
class TestTimeSeries:
|
||||
def test_dti_constructor_preserve_dti_freq(self):
|
||||
rng = date_range("1/1/2000", "1/2/2000", freq="5min")
|
||||
|
||||
rng2 = DatetimeIndex(rng)
|
||||
assert rng.freq == rng2.freq
|
||||
|
||||
def test_dti_constructor_years_only(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
# GH 6961
|
||||
rng1 = date_range("2014", "2015", freq="M", tz=tz)
|
||||
expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz)
|
||||
|
||||
rng2 = date_range("2014", "2015", freq="MS", tz=tz)
|
||||
expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz)
|
||||
|
||||
rng3 = date_range("2014", "2020", freq="A", tz=tz)
|
||||
expected3 = date_range("2014-12-31", "2019-12-31", freq="A", tz=tz)
|
||||
|
||||
rng4 = date_range("2014", "2020", freq="AS", tz=tz)
|
||||
expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz)
|
||||
|
||||
for rng, expected in [
|
||||
(rng1, expected1),
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
def test_dti_constructor_small_int(self, any_int_dtype):
|
||||
# see gh-13721
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:00:00.00000000",
|
||||
"1970-01-01 00:00:00.00000001",
|
||||
"1970-01-01 00:00:00.00000002",
|
||||
]
|
||||
)
|
||||
|
||||
arr = np.array([0, 10, 20], dtype=any_int_dtype)
|
||||
tm.assert_index_equal(DatetimeIndex(arr), exp)
|
||||
|
||||
def test_ctor_str_intraday(self):
|
||||
rng = DatetimeIndex(["1-1-2000 00:00:01"])
|
||||
assert rng[0].second == 1
|
||||
|
||||
def test_is_(self):
|
||||
dti = date_range(start="1/1/2005", end="12/1/2005", freq="M")
|
||||
assert dti.is_(dti)
|
||||
assert dti.is_(dti.view())
|
||||
assert not dti.is_(dti.copy())
|
||||
|
||||
def test_index_cast_datetime64_other_units(self):
|
||||
arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]")
|
||||
idx = Index(arr)
|
||||
|
||||
assert (idx.values == conversion.ensure_datetime64ns(arr)).all()
|
||||
|
||||
def test_constructor_int64_nocopy(self):
|
||||
# GH#1624
|
||||
arr = np.arange(1000, dtype=np.int64)
|
||||
index = DatetimeIndex(arr)
|
||||
|
||||
arr[50:100] = -1
|
||||
assert (index.asi8[50:100] == -1).all()
|
||||
|
||||
arr = np.arange(1000, dtype=np.int64)
|
||||
index = DatetimeIndex(arr, copy=True)
|
||||
|
||||
arr[50:100] = -1
|
||||
assert (index.asi8[50:100] != -1).all()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq", ["M", "Q", "A", "D", "B", "BH", "T", "S", "L", "U", "H", "N", "C"]
|
||||
)
|
||||
def test_from_freq_recreate_from_data(self, freq):
|
||||
org = date_range(start="2001/02/01 09:00", freq=freq, periods=1)
|
||||
idx = DatetimeIndex(org, freq=freq)
|
||||
tm.assert_index_equal(idx, org)
|
||||
|
||||
org = date_range(
|
||||
start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1
|
||||
)
|
||||
idx = DatetimeIndex(org, freq=freq, tz="US/Pacific")
|
||||
tm.assert_index_equal(idx, org)
|
||||
|
||||
def test_datetimeindex_constructor_misc(self):
|
||||
arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
|
||||
msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeIndex(arr)
|
||||
|
||||
arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
|
||||
idx1 = DatetimeIndex(arr)
|
||||
|
||||
arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
|
||||
idx2 = DatetimeIndex(arr)
|
||||
|
||||
arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
|
||||
idx3 = DatetimeIndex(arr)
|
||||
|
||||
arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
|
||||
idx4 = DatetimeIndex(arr)
|
||||
|
||||
arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
|
||||
idx5 = DatetimeIndex(arr)
|
||||
|
||||
arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
|
||||
idx6 = DatetimeIndex(arr)
|
||||
|
||||
idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
|
||||
idx8 = DatetimeIndex(
|
||||
["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
|
||||
)
|
||||
tm.assert_index_equal(idx7, idx8)
|
||||
|
||||
for other in [idx2, idx3, idx4, idx5, idx6]:
|
||||
assert (idx1.values == other.values).all()
|
||||
|
||||
sdate = datetime(1999, 12, 25)
|
||||
edate = datetime(2000, 1, 1)
|
||||
idx = date_range(start=sdate, freq="1B", periods=20)
|
||||
assert len(idx) == 20
|
||||
assert idx[0] == sdate + 0 * offsets.BDay()
|
||||
assert idx.freq == "B"
|
||||
|
||||
idx = date_range(end=edate, freq=("D", 5), periods=20)
|
||||
assert len(idx) == 20
|
||||
assert idx[-1] == edate
|
||||
assert idx.freq == "5D"
|
||||
|
||||
idx1 = date_range(start=sdate, end=edate, freq="W-SUN")
|
||||
idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6))
|
||||
assert len(idx1) == len(idx2)
|
||||
assert idx1.freq == idx2.freq
|
||||
|
||||
idx1 = date_range(start=sdate, end=edate, freq="QS")
|
||||
idx2 = date_range(
|
||||
start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)
|
||||
)
|
||||
assert len(idx1) == len(idx2)
|
||||
assert idx1.freq == idx2.freq
|
||||
|
||||
idx1 = date_range(start=sdate, end=edate, freq="BQ")
|
||||
idx2 = date_range(
|
||||
start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)
|
||||
)
|
||||
assert len(idx1) == len(idx2)
|
||||
assert idx1.freq == idx2.freq
|
||||
@@ -0,0 +1,948 @@
|
||||
"""
|
||||
test date_range, bdate_range construction from the convenience range functions
|
||||
"""
|
||||
|
||||
from datetime import datetime, time, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
from pytz import timezone
|
||||
|
||||
from pandas.errors import OutOfBoundsDatetime
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Timestamp, bdate_range, date_range, offsets
|
||||
from pandas.tests.series.common import TestData
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
BDay,
|
||||
CDay,
|
||||
DateOffset,
|
||||
MonthEnd,
|
||||
generate_range,
|
||||
prefix_mapping,
|
||||
)
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestTimestampEquivDateRange:
|
||||
# Older tests in TestTimeSeries constructed their `stamp` objects
|
||||
# using `date_range` instead of the `Timestamp` constructor.
|
||||
# TestTimestampEquivDateRange checks that these are equivalent in the
|
||||
# pertinent cases.
|
||||
|
||||
def test_date_range_timestamp_equiv(self):
|
||||
rng = date_range("20090415", "20090519", tz="US/Eastern")
|
||||
stamp = rng[0]
|
||||
|
||||
ts = Timestamp("20090415", tz="US/Eastern", freq="D")
|
||||
assert ts == stamp
|
||||
|
||||
def test_date_range_timestamp_equiv_dateutil(self):
|
||||
rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
|
||||
stamp = rng[0]
|
||||
|
||||
ts = Timestamp("20090415", tz="dateutil/US/Eastern", freq="D")
|
||||
assert ts == stamp
|
||||
|
||||
def test_date_range_timestamp_equiv_explicit_pytz(self):
|
||||
rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
|
||||
stamp = rng[0]
|
||||
|
||||
ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"), freq="D")
|
||||
assert ts == stamp
|
||||
|
||||
@td.skip_if_windows_python_3
|
||||
def test_date_range_timestamp_equiv_explicit_dateutil(self):
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
rng = date_range("20090415", "20090519", tz=gettz("US/Eastern"))
|
||||
stamp = rng[0]
|
||||
|
||||
ts = Timestamp("20090415", tz=gettz("US/Eastern"), freq="D")
|
||||
assert ts == stamp
|
||||
|
||||
def test_date_range_timestamp_equiv_from_datetime_instance(self):
|
||||
datetime_instance = datetime(2014, 3, 4)
|
||||
# build a timestamp with a frequency, since then it supports
|
||||
# addition/subtraction of integers
|
||||
timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0]
|
||||
|
||||
ts = Timestamp(datetime_instance, freq="D")
|
||||
assert ts == timestamp_instance
|
||||
|
||||
def test_date_range_timestamp_equiv_preserve_frequency(self):
|
||||
timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0]
|
||||
ts = Timestamp("2014-03-05", freq="D")
|
||||
|
||||
assert timestamp_instance == ts
|
||||
|
||||
|
||||
class TestDateRanges(TestData):
|
||||
def test_date_range_nat(self):
|
||||
# GH#11587
|
||||
msg = "Neither `start` nor `end` can be NaT"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start="2016-01-01", end=pd.NaT, freq="D")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start=pd.NaT, end="2016-01-01", freq="D")
|
||||
|
||||
def test_date_range_multiplication_overflow(self):
|
||||
# GH#24255
|
||||
# check that overflows in calculating `addend = periods * stride`
|
||||
# are caught
|
||||
with tm.assert_produces_warning(None):
|
||||
# we should _not_ be seeing a overflow RuntimeWarning
|
||||
dti = date_range(start="1677-09-22", periods=213503, freq="D")
|
||||
|
||||
assert dti[0] == Timestamp("1677-09-22")
|
||||
assert len(dti) == 213503
|
||||
|
||||
msg = "Cannot generate range with"
|
||||
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
||||
date_range("1969-05-04", periods=200000000, freq="30000D")
|
||||
|
||||
def test_date_range_unsigned_overflow_handling(self):
|
||||
# GH#24255
|
||||
# case where `addend = periods * stride` overflows int64 bounds
|
||||
# but not uint64 bounds
|
||||
dti = date_range(start="1677-09-22", end="2262-04-11", freq="D")
|
||||
|
||||
dti2 = date_range(start=dti[0], periods=len(dti), freq="D")
|
||||
assert dti2.equals(dti)
|
||||
|
||||
dti3 = date_range(end=dti[-1], periods=len(dti), freq="D")
|
||||
assert dti3.equals(dti)
|
||||
|
||||
def test_date_range_int64_overflow_non_recoverable(self):
|
||||
# GH#24255
|
||||
# case with start later than 1970-01-01, overflow int64 but not uint64
|
||||
msg = "Cannot generate range with"
|
||||
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
||||
date_range(start="1970-02-01", periods=106752 * 24, freq="H")
|
||||
|
||||
# case with end before 1970-01-01, overflow int64 but not uint64
|
||||
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
||||
date_range(end="1969-11-14", periods=106752 * 24, freq="H")
|
||||
|
||||
def test_date_range_int64_overflow_stride_endpoint_different_signs(self):
|
||||
# cases where stride * periods overflow int64 and stride/endpoint
|
||||
# have different signs
|
||||
start = Timestamp("2262-02-23")
|
||||
end = Timestamp("1969-11-14")
|
||||
|
||||
expected = date_range(start=start, end=end, freq="-1H")
|
||||
assert expected[0] == start
|
||||
assert expected[-1] == end
|
||||
|
||||
dti = date_range(end=end, periods=len(expected), freq="-1H")
|
||||
tm.assert_index_equal(dti, expected)
|
||||
|
||||
start2 = Timestamp("1970-02-01")
|
||||
end2 = Timestamp("1677-10-22")
|
||||
|
||||
expected2 = date_range(start=start2, end=end2, freq="-1H")
|
||||
assert expected2[0] == start2
|
||||
assert expected2[-1] == end2
|
||||
|
||||
dti2 = date_range(start=start2, periods=len(expected2), freq="-1H")
|
||||
tm.assert_index_equal(dti2, expected2)
|
||||
|
||||
def test_date_range_out_of_bounds(self):
|
||||
# GH#14187
|
||||
with pytest.raises(OutOfBoundsDatetime):
|
||||
date_range("2016-01-01", periods=100000, freq="D")
|
||||
with pytest.raises(OutOfBoundsDatetime):
|
||||
date_range(end="1763-10-12", periods=100000, freq="D")
|
||||
|
||||
def test_date_range_gen_error(self):
|
||||
rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
|
||||
assert len(rng) == 4
|
||||
|
||||
@pytest.mark.parametrize("freq", ["AS", "YS"])
|
||||
def test_begin_year_alias(self, freq):
|
||||
# see gh-9313
|
||||
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
|
||||
freq=freq,
|
||||
)
|
||||
tm.assert_index_equal(rng, exp)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["A", "Y"])
|
||||
def test_end_year_alias(self, freq):
|
||||
# see gh-9313
|
||||
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"], freq=freq
|
||||
)
|
||||
tm.assert_index_equal(rng, exp)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["BA", "BY"])
|
||||
def test_business_end_year_alias(self, freq):
|
||||
# see gh-9313
|
||||
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq
|
||||
)
|
||||
tm.assert_index_equal(rng, exp)
|
||||
|
||||
def test_date_range_negative_freq(self):
|
||||
# GH 11018
|
||||
rng = date_range("2011-12-31", freq="-2A", periods=3)
|
||||
exp = pd.DatetimeIndex(["2011-12-31", "2009-12-31", "2007-12-31"], freq="-2A")
|
||||
tm.assert_index_equal(rng, exp)
|
||||
assert rng.freq == "-2A"
|
||||
|
||||
rng = date_range("2011-01-31", freq="-2M", periods=3)
|
||||
exp = pd.DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M")
|
||||
tm.assert_index_equal(rng, exp)
|
||||
assert rng.freq == "-2M"
|
||||
|
||||
def test_date_range_bms_bug(self):
|
||||
# #1645
|
||||
rng = date_range("1/1/2000", periods=10, freq="BMS")
|
||||
|
||||
ex_first = Timestamp("2000-01-03")
|
||||
assert rng[0] == ex_first
|
||||
|
||||
def test_date_range_normalize(self):
|
||||
snap = datetime.today()
|
||||
n = 50
|
||||
|
||||
rng = date_range(snap, periods=n, normalize=False, freq="2D")
|
||||
|
||||
offset = timedelta(2)
|
||||
values = DatetimeIndex([snap + i * offset for i in range(n)])
|
||||
|
||||
tm.assert_index_equal(rng, values)
|
||||
|
||||
rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B")
|
||||
the_time = time(8, 15)
|
||||
for val in rng:
|
||||
assert val.time() == the_time
|
||||
|
||||
def test_date_range_fy5252(self):
|
||||
dr = date_range(
|
||||
start="2013-01-01",
|
||||
periods=2,
|
||||
freq=offsets.FY5253(startingMonth=1, weekday=3, variation="nearest"),
|
||||
)
|
||||
assert dr[0] == Timestamp("2013-01-31")
|
||||
assert dr[1] == Timestamp("2014-01-30")
|
||||
|
||||
def test_date_range_ambiguous_arguments(self):
|
||||
# #2538
|
||||
start = datetime(2011, 1, 1, 5, 3, 40)
|
||||
end = datetime(2011, 1, 1, 8, 9, 40)
|
||||
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and "
|
||||
"freq, exactly three must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start, end, periods=10, freq="s")
|
||||
|
||||
def test_date_range_convenience_periods(self):
|
||||
# GH 20808
|
||||
result = date_range("2018-04-24", "2018-04-27", periods=3)
|
||||
expected = DatetimeIndex(
|
||||
["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"],
|
||||
freq=None,
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Test if spacing remains linear if tz changes to dst in range
|
||||
result = date_range(
|
||||
"2018-04-01 01:00:00",
|
||||
"2018-04-01 04:00:00",
|
||||
tz="Australia/Sydney",
|
||||
periods=3,
|
||||
)
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"),
|
||||
Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"),
|
||||
Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,result_tz",
|
||||
[
|
||||
["20180101", "20180103", "US/Eastern"],
|
||||
[datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"],
|
||||
[Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"],
|
||||
[
|
||||
Timestamp("20180101", tz="US/Eastern"),
|
||||
Timestamp("20180103", tz="US/Eastern"),
|
||||
"US/Eastern",
|
||||
],
|
||||
[
|
||||
Timestamp("20180101", tz="US/Eastern"),
|
||||
Timestamp("20180103", tz="US/Eastern"),
|
||||
None,
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_date_range_linspacing_tz(self, start, end, result_tz):
|
||||
# GH 20983
|
||||
result = date_range(start, end, periods=3, tz=result_tz)
|
||||
expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_date_range_businesshour(self):
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2014-07-04 09:00",
|
||||
"2014-07-04 10:00",
|
||||
"2014-07-04 11:00",
|
||||
"2014-07-04 12:00",
|
||||
"2014-07-04 13:00",
|
||||
"2014-07-04 14:00",
|
||||
"2014-07-04 15:00",
|
||||
"2014-07-04 16:00",
|
||||
],
|
||||
freq="BH",
|
||||
)
|
||||
rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="BH")
|
||||
tm.assert_index_equal(idx, rng)
|
||||
|
||||
idx = DatetimeIndex(["2014-07-04 16:00", "2014-07-07 09:00"], freq="BH")
|
||||
rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="BH")
|
||||
tm.assert_index_equal(idx, rng)
|
||||
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2014-07-04 09:00",
|
||||
"2014-07-04 10:00",
|
||||
"2014-07-04 11:00",
|
||||
"2014-07-04 12:00",
|
||||
"2014-07-04 13:00",
|
||||
"2014-07-04 14:00",
|
||||
"2014-07-04 15:00",
|
||||
"2014-07-04 16:00",
|
||||
"2014-07-07 09:00",
|
||||
"2014-07-07 10:00",
|
||||
"2014-07-07 11:00",
|
||||
"2014-07-07 12:00",
|
||||
"2014-07-07 13:00",
|
||||
"2014-07-07 14:00",
|
||||
"2014-07-07 15:00",
|
||||
"2014-07-07 16:00",
|
||||
"2014-07-08 09:00",
|
||||
"2014-07-08 10:00",
|
||||
"2014-07-08 11:00",
|
||||
"2014-07-08 12:00",
|
||||
"2014-07-08 13:00",
|
||||
"2014-07-08 14:00",
|
||||
"2014-07-08 15:00",
|
||||
"2014-07-08 16:00",
|
||||
],
|
||||
freq="BH",
|
||||
)
|
||||
rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="BH")
|
||||
tm.assert_index_equal(idx, rng)
|
||||
|
||||
def test_range_misspecified(self):
|
||||
# GH #1095
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and "
|
||||
"freq, exactly three must be specified"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start="1/1/2000")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(end="1/1/2000")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(periods=10)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(start="1/1/2000", freq="H")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(end="1/1/2000", freq="H")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(periods=10, freq="H")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range()
|
||||
|
||||
def test_compat_replace(self):
|
||||
# https://github.com/statsmodels/statsmodels/issues/3349
|
||||
# replace should take ints/longs for compat
|
||||
result = date_range(
|
||||
Timestamp("1960-04-01 00:00:00", freq="QS-JAN"), periods=76, freq="QS-JAN"
|
||||
)
|
||||
assert len(result) == 76
|
||||
|
||||
def test_catch_infinite_loop(self):
|
||||
offset = offsets.DateOffset(minute=5)
|
||||
# blow up, don't loop forever
|
||||
msg = "Offset <DateOffset: minute=5> did not increment date"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset)
|
||||
|
||||
@pytest.mark.parametrize("periods", (1, 2))
|
||||
def test_wom_len(self, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/20517
|
||||
res = date_range(start="20110101", periods=periods, freq="WOM-1MON")
|
||||
assert len(res) == periods
|
||||
|
||||
def test_construct_over_dst(self):
|
||||
# GH 20854
|
||||
pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
|
||||
"US/Pacific", ambiguous=True
|
||||
)
|
||||
pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
|
||||
"US/Pacific", ambiguous=False
|
||||
)
|
||||
expect_data = [
|
||||
Timestamp("2010-11-07 00:00:00", tz="US/Pacific"),
|
||||
pre_dst,
|
||||
pst_dst,
|
||||
]
|
||||
expected = DatetimeIndex(expect_data)
|
||||
result = date_range(start="2010-11-7", periods=3, freq="H", tz="US/Pacific")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_construct_with_different_start_end_string_format(self):
|
||||
# GH 12064
|
||||
result = date_range(
|
||||
"2013-01-01 00:00:00+09:00", "2013/01/01 02:00:00+09:00", freq="H"
|
||||
)
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2013-01-01 00:00:00+09:00"),
|
||||
Timestamp("2013-01-01 01:00:00+09:00"),
|
||||
Timestamp("2013-01-01 02:00:00+09:00"),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_error_with_zero_monthends(self):
|
||||
msg = r"Offset <0 \* MonthEnds> did not increment date"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0))
|
||||
|
||||
def test_range_bug(self):
|
||||
# GH #770
|
||||
offset = DateOffset(months=3)
|
||||
result = date_range("2011-1-1", "2012-1-31", freq=offset)
|
||||
|
||||
start = datetime(2011, 1, 1)
|
||||
expected = DatetimeIndex([start + i * offset for i in range(5)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_range_tz_pytz(self):
|
||||
# see gh-2906
|
||||
tz = timezone("US/Eastern")
|
||||
start = tz.localize(datetime(2011, 1, 1))
|
||||
end = tz.localize(datetime(2011, 1, 3))
|
||||
|
||||
dr = date_range(start=start, periods=3)
|
||||
assert dr.tz.zone == tz.zone
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
dr = date_range(end=end, periods=3)
|
||||
assert dr.tz.zone == tz.zone
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
dr = date_range(start=start, end=end)
|
||||
assert dr.tz.zone == tz.zone
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end",
|
||||
[
|
||||
[
|
||||
Timestamp(datetime(2014, 3, 6), tz="US/Eastern"),
|
||||
Timestamp(datetime(2014, 3, 12), tz="US/Eastern"),
|
||||
],
|
||||
[
|
||||
Timestamp(datetime(2013, 11, 1), tz="US/Eastern"),
|
||||
Timestamp(datetime(2013, 11, 6), tz="US/Eastern"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_range_tz_dst_straddle_pytz(self, start, end):
|
||||
dr = date_range(start, end, freq="D")
|
||||
assert dr[0] == start
|
||||
assert dr[-1] == end
|
||||
assert np.all(dr.hour == 0)
|
||||
|
||||
dr = date_range(start, end, freq="D", tz="US/Eastern")
|
||||
assert dr[0] == start
|
||||
assert dr[-1] == end
|
||||
assert np.all(dr.hour == 0)
|
||||
|
||||
dr = date_range(
|
||||
start.replace(tzinfo=None),
|
||||
end.replace(tzinfo=None),
|
||||
freq="D",
|
||||
tz="US/Eastern",
|
||||
)
|
||||
assert dr[0] == start
|
||||
assert dr[-1] == end
|
||||
assert np.all(dr.hour == 0)
|
||||
|
||||
def test_range_tz_dateutil(self):
|
||||
# see gh-2906
|
||||
|
||||
# Use maybe_get_tz to fix filename in tz under dateutil.
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
|
||||
tz = lambda x: maybe_get_tz("dateutil/" + x)
|
||||
|
||||
start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern"))
|
||||
end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern"))
|
||||
|
||||
dr = date_range(start=start, periods=3)
|
||||
assert dr.tz == tz("US/Eastern")
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
dr = date_range(end=end, periods=3)
|
||||
assert dr.tz == tz("US/Eastern")
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
dr = date_range(start=start, end=end)
|
||||
assert dr.tz == tz("US/Eastern")
|
||||
assert dr[0] == start
|
||||
assert dr[2] == end
|
||||
|
||||
@pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"])
|
||||
def test_range_closed(self, freq):
|
||||
begin = datetime(2011, 1, 1)
|
||||
end = datetime(2014, 1, 1)
|
||||
|
||||
closed = date_range(begin, end, closed=None, freq=freq)
|
||||
left = date_range(begin, end, closed="left", freq=freq)
|
||||
right = date_range(begin, end, closed="right", freq=freq)
|
||||
expected_left = left
|
||||
expected_right = right
|
||||
|
||||
if end == closed[-1]:
|
||||
expected_left = closed[:-1]
|
||||
if begin == closed[0]:
|
||||
expected_right = closed[1:]
|
||||
|
||||
tm.assert_index_equal(expected_left, left)
|
||||
tm.assert_index_equal(expected_right, right)
|
||||
|
||||
def test_range_closed_with_tz_aware_start_end(self):
|
||||
# GH12409, GH12684
|
||||
begin = Timestamp("2011/1/1", tz="US/Eastern")
|
||||
end = Timestamp("2014/1/1", tz="US/Eastern")
|
||||
|
||||
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
|
||||
closed = date_range(begin, end, closed=None, freq=freq)
|
||||
left = date_range(begin, end, closed="left", freq=freq)
|
||||
right = date_range(begin, end, closed="right", freq=freq)
|
||||
expected_left = left
|
||||
expected_right = right
|
||||
|
||||
if end == closed[-1]:
|
||||
expected_left = closed[:-1]
|
||||
if begin == closed[0]:
|
||||
expected_right = closed[1:]
|
||||
|
||||
tm.assert_index_equal(expected_left, left)
|
||||
tm.assert_index_equal(expected_right, right)
|
||||
|
||||
begin = Timestamp("2011/1/1")
|
||||
end = Timestamp("2014/1/1")
|
||||
begintz = Timestamp("2011/1/1", tz="US/Eastern")
|
||||
endtz = Timestamp("2014/1/1", tz="US/Eastern")
|
||||
|
||||
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
|
||||
closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern")
|
||||
left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern")
|
||||
right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern")
|
||||
expected_left = left
|
||||
expected_right = right
|
||||
|
||||
if endtz == closed[-1]:
|
||||
expected_left = closed[:-1]
|
||||
if begintz == closed[0]:
|
||||
expected_right = closed[1:]
|
||||
|
||||
tm.assert_index_equal(expected_left, left)
|
||||
tm.assert_index_equal(expected_right, right)
|
||||
|
||||
@pytest.mark.parametrize("closed", ["right", "left", None])
|
||||
def test_range_closed_boundary(self, closed):
|
||||
# GH#11804
|
||||
right_boundary = date_range(
|
||||
"2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed
|
||||
)
|
||||
left_boundary = date_range(
|
||||
"2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed
|
||||
)
|
||||
both_boundary = date_range(
|
||||
"2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed
|
||||
)
|
||||
expected_right = expected_left = expected_both = both_boundary
|
||||
|
||||
if closed == "right":
|
||||
expected_left = both_boundary[1:]
|
||||
if closed == "left":
|
||||
expected_right = both_boundary[:-1]
|
||||
if closed is None:
|
||||
expected_right = both_boundary[1:]
|
||||
expected_left = both_boundary[:-1]
|
||||
|
||||
tm.assert_index_equal(right_boundary, expected_right)
|
||||
tm.assert_index_equal(left_boundary, expected_left)
|
||||
tm.assert_index_equal(both_boundary, expected_both)
|
||||
|
||||
def test_years_only(self):
|
||||
# GH 6961
|
||||
dr = date_range("2014", "2015", freq="M")
|
||||
assert dr[0] == datetime(2014, 1, 31)
|
||||
assert dr[-1] == datetime(2014, 12, 31)
|
||||
|
||||
def test_freq_divides_end_in_nanos(self):
|
||||
# GH 10885
|
||||
result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min")
|
||||
result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min")
|
||||
expected_1 = DatetimeIndex(
|
||||
["2005-01-12 10:00:00", "2005-01-12 15:45:00"],
|
||||
dtype="datetime64[ns]",
|
||||
freq="345T",
|
||||
tz=None,
|
||||
)
|
||||
expected_2 = DatetimeIndex(
|
||||
["2005-01-13 10:00:00", "2005-01-13 15:45:00"],
|
||||
dtype="datetime64[ns]",
|
||||
freq="345T",
|
||||
tz=None,
|
||||
)
|
||||
tm.assert_index_equal(result_1, expected_1)
|
||||
tm.assert_index_equal(result_2, expected_2)
|
||||
|
||||
def test_cached_range_bug(self):
|
||||
rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6))
|
||||
assert len(rng) == 50
|
||||
assert rng[0] == datetime(2010, 9, 1, 5)
|
||||
|
||||
def test_timezone_comparaison_bug(self):
|
||||
# smoke test
|
||||
start = Timestamp("20130220 10:00", tz="US/Eastern")
|
||||
result = date_range(start, periods=2, tz="US/Eastern")
|
||||
assert len(result) == 2
|
||||
|
||||
def test_timezone_comparaison_assert(self):
|
||||
start = Timestamp("20130220 10:00", tz="US/Eastern")
|
||||
msg = "Inferred time zone not equal to passed time zone"
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
date_range(start, periods=2, tz="Europe/Berlin")
|
||||
|
||||
def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
|
||||
# GH 23270
|
||||
tz = tz_aware_fixture
|
||||
result = pd.date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz)
|
||||
expected = pd.date_range(
|
||||
end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz
|
||||
)[::-1]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestGenRangeGeneration:
|
||||
def test_generate(self):
|
||||
rng1 = list(generate_range(START, END, offset=BDay()))
|
||||
rng2 = list(generate_range(START, END, offset="B"))
|
||||
assert rng1 == rng2
|
||||
|
||||
def test_generate_cday(self):
|
||||
rng1 = list(generate_range(START, END, offset=CDay()))
|
||||
rng2 = list(generate_range(START, END, offset="C"))
|
||||
assert rng1 == rng2
|
||||
|
||||
def test_1(self):
|
||||
rng = list(generate_range(start=datetime(2009, 3, 25), periods=2))
|
||||
expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
|
||||
assert rng == expected
|
||||
|
||||
def test_2(self):
|
||||
rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3)))
|
||||
expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
|
||||
assert rng == expected
|
||||
|
||||
def test_3(self):
|
||||
rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6)))
|
||||
expected = []
|
||||
assert rng == expected
|
||||
|
||||
def test_precision_finer_than_offset(self):
|
||||
# GH#9907
|
||||
result1 = pd.date_range(
|
||||
start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q"
|
||||
)
|
||||
result2 = pd.date_range(
|
||||
start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
|
||||
)
|
||||
expected1_list = [
|
||||
"2015-06-30 00:00:03",
|
||||
"2015-09-30 00:00:03",
|
||||
"2015-12-31 00:00:03",
|
||||
"2016-03-31 00:00:03",
|
||||
]
|
||||
expected2_list = [
|
||||
"2015-04-19 00:00:03",
|
||||
"2015-04-26 00:00:03",
|
||||
"2015-05-03 00:00:03",
|
||||
"2015-05-10 00:00:03",
|
||||
"2015-05-17 00:00:03",
|
||||
"2015-05-24 00:00:03",
|
||||
"2015-05-31 00:00:03",
|
||||
"2015-06-07 00:00:03",
|
||||
"2015-06-14 00:00:03",
|
||||
"2015-06-21 00:00:03",
|
||||
]
|
||||
expected1 = DatetimeIndex(
|
||||
expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None
|
||||
)
|
||||
expected2 = DatetimeIndex(
|
||||
expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
|
||||
)
|
||||
tm.assert_index_equal(result1, expected1)
|
||||
tm.assert_index_equal(result2, expected2)
|
||||
|
||||
dt1, dt2 = "2017-01-01", "2017-01-01"
|
||||
tz1, tz2 = "US/Eastern", "Europe/London"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end",
|
||||
[
|
||||
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)),
|
||||
(pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)),
|
||||
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)),
|
||||
(pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)),
|
||||
],
|
||||
)
|
||||
def test_mismatching_tz_raises_err(self, start, end):
|
||||
# issue 18488
|
||||
with pytest.raises(TypeError):
|
||||
pd.date_range(start, end)
|
||||
with pytest.raises(TypeError):
|
||||
pd.date_range(start, end, freq=BDay())
|
||||
|
||||
|
||||
class TestBusinessDateRange:
|
||||
def test_constructor(self):
|
||||
bdate_range(START, END, freq=BDay())
|
||||
bdate_range(START, periods=20, freq=BDay())
|
||||
bdate_range(end=START, periods=20, freq=BDay())
|
||||
|
||||
msg = "periods must be a number, got B"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
date_range("2011-1-1", "2012-1-1", "B")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
bdate_range("2011-1-1", "2012-1-1", "B")
|
||||
|
||||
msg = "freq must be specified for bdate_range; use date_range instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
bdate_range(START, END, periods=10, freq=None)
|
||||
|
||||
def test_naive_aware_conflicts(self):
|
||||
naive = bdate_range(START, END, freq=BDay(), tz=None)
|
||||
aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")
|
||||
|
||||
msg = "tz-naive.*tz-aware"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
naive.join(aware)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
aware.join(naive)
|
||||
|
||||
def test_misc(self):
|
||||
end = datetime(2009, 5, 13)
|
||||
dr = bdate_range(end=end, periods=20)
|
||||
firstDate = end - 19 * BDay()
|
||||
|
||||
assert len(dr) == 20
|
||||
assert dr[0] == firstDate
|
||||
assert dr[-1] == end
|
||||
|
||||
def test_date_parse_failure(self):
|
||||
badly_formed_date = "2007/100/1"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Timestamp(badly_formed_date)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bdate_range(start=badly_formed_date, periods=10)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bdate_range(end=badly_formed_date, periods=10)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bdate_range(badly_formed_date, badly_formed_date)
|
||||
|
||||
def test_daterange_bug_456(self):
|
||||
# GH #456
|
||||
rng1 = bdate_range("12/5/2011", "12/5/2011")
|
||||
rng2 = bdate_range("12/2/2011", "12/5/2011")
|
||||
rng2.freq = BDay()
|
||||
|
||||
result = rng1.union(rng2)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
|
||||
@pytest.mark.parametrize("closed", ["left", "right"])
|
||||
def test_bdays_and_open_boundaries(self, closed):
|
||||
# GH 6673
|
||||
start = "2018-07-21" # Saturday
|
||||
end = "2018-07-29" # Sunday
|
||||
result = pd.date_range(start, end, freq="B", closed=closed)
|
||||
|
||||
bday_start = "2018-07-23" # Monday
|
||||
bday_end = "2018-07-27" # Friday
|
||||
expected = pd.date_range(bday_start, bday_end, freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_bday_near_overflow(self):
|
||||
# GH#24252 avoid doing unnecessary addition that _would_ overflow
|
||||
start = pd.Timestamp.max.floor("D").to_pydatetime()
|
||||
rng = pd.date_range(start, end=None, periods=1, freq="B")
|
||||
expected = pd.DatetimeIndex([start], freq="B")
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
def test_bday_overflow_error(self):
|
||||
# GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
|
||||
start = pd.Timestamp.max.floor("D").to_pydatetime()
|
||||
with pytest.raises(OutOfBoundsDatetime):
|
||||
pd.date_range(start, periods=2, freq="B")
|
||||
|
||||
|
||||
class TestCustomDateRange:
|
||||
def test_constructor(self):
|
||||
bdate_range(START, END, freq=CDay())
|
||||
bdate_range(START, periods=20, freq=CDay())
|
||||
bdate_range(end=START, periods=20, freq=CDay())
|
||||
|
||||
msg = "periods must be a number, got C"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
date_range("2011-1-1", "2012-1-1", "C")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
bdate_range("2011-1-1", "2012-1-1", "C")
|
||||
|
||||
def test_misc(self):
|
||||
end = datetime(2009, 5, 13)
|
||||
dr = bdate_range(end=end, periods=20, freq="C")
|
||||
firstDate = end - 19 * CDay()
|
||||
|
||||
assert len(dr) == 20
|
||||
assert dr[0] == firstDate
|
||||
assert dr[-1] == end
|
||||
|
||||
def test_daterange_bug_456(self):
|
||||
# GH #456
|
||||
rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C")
|
||||
rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C")
|
||||
rng2.freq = CDay()
|
||||
|
||||
result = rng1.union(rng2)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
|
||||
def test_cdaterange(self):
|
||||
result = bdate_range("2013-05-01", periods=3, freq="C")
|
||||
expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_cdaterange_weekmask(self):
|
||||
result = bdate_range(
|
||||
"2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu"
|
||||
)
|
||||
expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-05"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raise with non-custom freq
|
||||
msg = (
|
||||
"a custom frequency string is required when holidays or "
|
||||
"weekmask are passed, got frequency B"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu")
|
||||
|
||||
def test_cdaterange_holidays(self):
|
||||
result = bdate_range("2013-05-01", periods=3, freq="C", holidays=["2013-05-01"])
|
||||
expected = DatetimeIndex(["2013-05-02", "2013-05-03", "2013-05-06"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raise with non-custom freq
|
||||
msg = (
|
||||
"a custom frequency string is required when holidays or "
|
||||
"weekmask are passed, got frequency B"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"])
|
||||
|
||||
def test_cdaterange_weekmask_and_holidays(self):
|
||||
result = bdate_range(
|
||||
"2013-05-01",
|
||||
periods=3,
|
||||
freq="C",
|
||||
weekmask="Sun Mon Tue Wed Thu",
|
||||
holidays=["2013-05-01"],
|
||||
)
|
||||
expected = DatetimeIndex(["2013-05-02", "2013-05-05", "2013-05-06"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raise with non-custom freq
|
||||
msg = (
|
||||
"a custom frequency string is required when holidays or "
|
||||
"weekmask are passed, got frequency B"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bdate_range(
|
||||
"2013-05-01",
|
||||
periods=3,
|
||||
weekmask="Sun Mon Tue Wed Thu",
|
||||
holidays=["2013-05-01"],
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq", [freq for freq in prefix_mapping if freq.startswith("C")]
|
||||
)
|
||||
def test_all_custom_freq(self, freq):
|
||||
# should not raise
|
||||
bdate_range(
|
||||
START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"]
|
||||
)
|
||||
|
||||
bad_freq = freq + "FOO"
|
||||
msg = "invalid custom frequency string: {freq}"
|
||||
with pytest.raises(ValueError, match=msg.format(freq=bad_freq)):
|
||||
bdate_range(START, END, freq=bad_freq)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start_end",
|
||||
[
|
||||
("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"),
|
||||
("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"),
|
||||
("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"),
|
||||
],
|
||||
)
|
||||
def test_range_with_millisecond_resolution(self, start_end):
|
||||
# https://github.com/pandas-dev/pandas/issues/24110
|
||||
start, end = start_end
|
||||
result = pd.date_range(start=start, end=end, periods=2, closed="left")
|
||||
expected = DatetimeIndex([start])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,452 @@
|
||||
from datetime import date
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
|
||||
randn = np.random.randn
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_roundtrip_pickle_with_tz(self):
|
||||
|
||||
# GH 8367
|
||||
# round-trip of timezone
|
||||
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
|
||||
unpickled = tm.round_trip_pickle(index)
|
||||
tm.assert_index_equal(index, unpickled)
|
||||
|
||||
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
|
||||
# GH7774
|
||||
index = date_range("20130101", periods=3, tz="US/Eastern")
|
||||
assert str(index.reindex([])[0].tz) == "US/Eastern"
|
||||
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
|
||||
|
||||
def test_time_loc(self): # GH8667
|
||||
from datetime import time
|
||||
from pandas._libs.index import _SIZE_CUTOFF
|
||||
|
||||
ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
|
||||
key = time(15, 11, 30)
|
||||
start = key.hour * 3600 + key.minute * 60 + key.second
|
||||
step = 24 * 3600
|
||||
|
||||
for n in ns:
|
||||
idx = pd.date_range("2014-11-26", periods=n, freq="S")
|
||||
ts = pd.Series(np.random.randn(n), index=idx)
|
||||
i = np.arange(start, n, step)
|
||||
|
||||
tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False)
|
||||
tm.assert_series_equal(ts[key], ts.iloc[i])
|
||||
|
||||
left, right = ts.copy(), ts.copy()
|
||||
left[key] *= -10
|
||||
right.iloc[i] *= -10
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
def test_time_overflow_for_32bit_machines(self):
|
||||
# GH8943. On some machines NumPy defaults to np.int32 (for example,
|
||||
# 32-bit Linux machines). In the function _generate_regular_range
|
||||
# found in tseries/index.py, `periods` gets multiplied by `strides`
|
||||
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
|
||||
# and since those machines won't promote np.int32 to np.int64, we get
|
||||
# overflow.
|
||||
periods = np.int_(1000)
|
||||
|
||||
idx1 = pd.date_range(start="2000", periods=periods, freq="S")
|
||||
assert len(idx1) == periods
|
||||
|
||||
idx2 = pd.date_range(end="2000", periods=periods, freq="S")
|
||||
assert len(idx2) == periods
|
||||
|
||||
def test_nat(self):
|
||||
assert DatetimeIndex([np.nan])[0] is pd.NaT
|
||||
|
||||
def test_week_of_month_frequency(self):
|
||||
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
|
||||
d1 = date(2002, 9, 1)
|
||||
d2 = date(2013, 10, 27)
|
||||
d3 = date(2012, 9, 30)
|
||||
idx1 = DatetimeIndex([d1, d2])
|
||||
idx2 = DatetimeIndex([d3])
|
||||
result_append = idx1.append(idx2)
|
||||
expected = DatetimeIndex([d1, d2, d3])
|
||||
tm.assert_index_equal(result_append, expected)
|
||||
result_union = idx1.union(idx2)
|
||||
expected = DatetimeIndex([d1, d3, d2])
|
||||
tm.assert_index_equal(result_union, expected)
|
||||
|
||||
# GH 5115
|
||||
result = date_range("2013-1-1", periods=4, freq="WOM-1SAT")
|
||||
dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"]
|
||||
expected = DatetimeIndex(dates, freq="WOM-1SAT")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_hash_error(self):
|
||||
index = date_range("20010101", periods=10)
|
||||
with pytest.raises(
|
||||
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
|
||||
):
|
||||
hash(index)
|
||||
|
||||
def test_stringified_slice_with_tz(self):
|
||||
# GH#2658
|
||||
start = "2013-01-07"
|
||||
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
|
||||
df = DataFrame(np.arange(10), index=idx)
|
||||
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
|
||||
|
||||
def test_append_join_nondatetimeindex(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
idx = Index(["a", "b", "c", "d"])
|
||||
|
||||
result = rng.append(idx)
|
||||
assert isinstance(result[0], Timestamp)
|
||||
|
||||
# it works
|
||||
rng.join(idx, how="outer")
|
||||
|
||||
def test_map(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
|
||||
f = lambda x: x.strftime("%Y%m%d")
|
||||
result = rng.map(f)
|
||||
exp = Index([f(x) for x in rng], dtype="<U8")
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_map_fallthrough(self, capsys):
|
||||
# GH#22067, check we don't get warnings about silently ignored errors
|
||||
dti = date_range("2017-01-01", "2018-01-01", freq="B")
|
||||
|
||||
dti.map(lambda x: pd.Period(year=x.year, month=x.month, freq="M"))
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.err == ""
|
||||
|
||||
def test_iteration_preserves_tz(self):
|
||||
# see gh-8890
|
||||
index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern")
|
||||
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i]
|
||||
assert result == expected
|
||||
|
||||
index = date_range(
|
||||
"2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800)
|
||||
)
|
||||
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i]
|
||||
assert result._repr_base == expected._repr_base
|
||||
assert result == expected
|
||||
|
||||
# 9100
|
||||
index = pd.DatetimeIndex(
|
||||
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
|
||||
)
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i]
|
||||
assert result._repr_base == expected._repr_base
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("periods", [0, 9999, 10000, 10001])
|
||||
def test_iteration_over_chunksize(self, periods):
|
||||
# GH21012
|
||||
|
||||
index = date_range("2000-01-01 00:00:00", periods=periods, freq="min")
|
||||
num = 0
|
||||
for stamp in index:
|
||||
assert index[num] == stamp
|
||||
num += 1
|
||||
assert num == len(index)
|
||||
|
||||
def test_misc_coverage(self):
|
||||
rng = date_range("1/1/2000", periods=5)
|
||||
result = rng.groupby(rng.day)
|
||||
assert isinstance(list(result.values())[0][0], Timestamp)
|
||||
|
||||
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
|
||||
assert not idx.equals(list(idx))
|
||||
|
||||
non_datetime = Index(list("abc"))
|
||||
assert not idx.equals(list(non_datetime))
|
||||
|
||||
def test_string_index_series_name_converted(self):
|
||||
# #1644
|
||||
df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10))
|
||||
|
||||
result = df.loc["1/3/2000"]
|
||||
assert result.name == df.index[2]
|
||||
|
||||
result = df.T["1/3/2000"]
|
||||
assert result.name == df.index[2]
|
||||
|
||||
def test_get_duplicates(self):
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
"2000-01-03",
|
||||
"2000-01-04",
|
||||
]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
result = idx.get_duplicates()
|
||||
|
||||
ex = DatetimeIndex(["2000-01-02", "2000-01-03"])
|
||||
tm.assert_index_equal(result, ex)
|
||||
|
||||
def test_argmin_argmax(self):
|
||||
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
assert idx.argmin() == 1
|
||||
assert idx.argmax() == 0
|
||||
|
||||
def test_sort_values(self):
|
||||
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
||||
|
||||
def test_map_bug_1677(self):
|
||||
index = DatetimeIndex(["2012-04-25 09:30:00.393000"])
|
||||
f = index.asof
|
||||
|
||||
result = index.map(f)
|
||||
expected = Index([f(index[0])])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_groupby_function_tuple_1677(self):
|
||||
df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100))
|
||||
monthly_group = df.groupby(lambda x: (x.year, x.month))
|
||||
|
||||
result = monthly_group.mean()
|
||||
assert isinstance(result.index[0], tuple)
|
||||
|
||||
def test_append_numpy_bug_1681(self):
|
||||
# another datetime64 bug
|
||||
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
|
||||
a = DataFrame()
|
||||
c = DataFrame({"A": "foo", "B": dr}, index=dr)
|
||||
|
||||
result = a.append(c)
|
||||
assert (result["B"] == dr).all()
|
||||
|
||||
def test_isin(self):
|
||||
index = tm.makeDateIndex(4)
|
||||
result = index.isin(index)
|
||||
assert result.all()
|
||||
|
||||
result = index.isin(list(index))
|
||||
assert result.all()
|
||||
|
||||
assert_almost_equal(
|
||||
index.isin([index[2], 5]), np.array([False, False, True, False])
|
||||
)
|
||||
|
||||
def test_does_not_convert_mixed_integer(self):
|
||||
df = tm.makeCustomDataframe(
|
||||
10,
|
||||
10,
|
||||
data_gen_f=lambda *args, **kwargs: randn(),
|
||||
r_idx_type="i",
|
||||
c_idx_type="dt",
|
||||
)
|
||||
cols = df.columns.join(df.index, how="outer")
|
||||
joined = cols.join(df.columns)
|
||||
assert cols.dtype == np.dtype("O")
|
||||
assert cols.dtype == joined.dtype
|
||||
tm.assert_numpy_array_equal(cols.values, joined.values)
|
||||
|
||||
def test_join_self(self, join_type):
|
||||
index = date_range("1/1/2000", periods=10)
|
||||
joined = index.join(index, how=join_type)
|
||||
assert index is joined
|
||||
|
||||
def assert_index_parameters(self, index):
|
||||
assert index.freq == "40960N"
|
||||
assert index.inferred_freq == "40960N"
|
||||
|
||||
def test_ns_index(self):
|
||||
nsamples = 400
|
||||
ns = int(1e9 / 24414)
|
||||
dtstart = np.datetime64("2012-09-20T00:00:00")
|
||||
|
||||
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
|
||||
freq = ns * offsets.Nano()
|
||||
index = pd.DatetimeIndex(dt, freq=freq, name="time")
|
||||
self.assert_index_parameters(index)
|
||||
|
||||
new_index = pd.date_range(start=index[0], end=index[-1], freq=index.freq)
|
||||
self.assert_index_parameters(new_index)
|
||||
|
||||
def test_join_with_period_index(self, join_type):
|
||||
df = tm.makeCustomDataframe(
|
||||
10,
|
||||
10,
|
||||
data_gen_f=lambda *args: np.random.randint(2),
|
||||
c_idx_type="p",
|
||||
r_idx_type="dt",
|
||||
)
|
||||
s = df.iloc[:5, 0]
|
||||
|
||||
expected = df.columns.astype("O").join(s.index, how=join_type)
|
||||
result = df.columns.join(s.index, how=join_type)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
def test_factorize(self):
|
||||
idx1 = DatetimeIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
|
||||
)
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
arr, idx = idx1.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
# tz must be preserved
|
||||
idx1 = idx1.tz_localize("Asia/Tokyo")
|
||||
exp_idx = exp_idx.tz_localize("Asia/Tokyo")
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
idx2 = pd.DatetimeIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
|
||||
)
|
||||
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
|
||||
arr, idx = idx2.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
|
||||
arr, idx = idx2.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
# freq must be preserved
|
||||
idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
|
||||
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
|
||||
arr, idx = idx3.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, idx3)
|
||||
|
||||
def test_factorize_tz(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
# GH#13750
|
||||
base = pd.date_range("2016-11-05", freq="H", periods=100, tz=tz)
|
||||
idx = base.repeat(5)
|
||||
|
||||
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
|
||||
|
||||
for obj in [idx, pd.Series(idx)]:
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(res, base)
|
||||
|
||||
def test_factorize_dst(self):
|
||||
# GH 13750
|
||||
idx = pd.date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
|
||||
|
||||
for obj in [idx, pd.Series(idx)]:
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
|
||||
tm.assert_index_equal(res, idx)
|
||||
|
||||
idx = pd.date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
|
||||
|
||||
for obj in [idx, pd.Series(idx)]:
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
|
||||
tm.assert_index_equal(res, idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, expected",
|
||||
[
|
||||
(pd.DatetimeIndex(["2017", "2017"]), pd.DatetimeIndex(["2017"])),
|
||||
(
|
||||
pd.DatetimeIndex(["2017", "2017"], tz="US/Eastern"),
|
||||
pd.DatetimeIndex(["2017"], tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_unique(self, arr, expected):
|
||||
result = arr.unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
# GH 21737
|
||||
# Ensure the underlying data is consistent
|
||||
assert result[0] == expected[0]
|
||||
|
||||
def test_asarray_tz_naive(self):
|
||||
# This shouldn't produce a warning.
|
||||
idx = pd.date_range("2000", periods=2)
|
||||
# M8[ns] by default
|
||||
with tm.assert_produces_warning(None):
|
||||
result = np.asarray(idx)
|
||||
|
||||
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# optionally, object
|
||||
with tm.assert_produces_warning(None):
|
||||
result = np.asarray(idx, dtype=object)
|
||||
|
||||
expected = np.array([pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02")])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_asarray_tz_aware(self):
|
||||
tz = "US/Central"
|
||||
idx = pd.date_range("2000", periods=2, tz=tz)
|
||||
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
|
||||
# We warn by default and return an ndarray[M8[ns]]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = np.asarray(idx)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Old behavior with no warning
|
||||
with tm.assert_produces_warning(None):
|
||||
result = np.asarray(idx, dtype="M8[ns]")
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Future behavior with no warning
|
||||
expected = np.array(
|
||||
[pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]
|
||||
)
|
||||
with tm.assert_produces_warning(None):
|
||||
result = np.asarray(idx, dtype=object)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,32 @@
|
||||
""" generic tests from the Datetimelike class """
|
||||
|
||||
from pandas import DatetimeIndex, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
from ..datetimelike import DatetimeLike
|
||||
|
||||
|
||||
class TestDatetimeIndex(DatetimeLike):
|
||||
_holder = DatetimeIndex
|
||||
|
||||
def setup_method(self, method):
|
||||
self.indices = dict(
|
||||
index=tm.makeDateIndex(10),
|
||||
index_dec=date_range("20130110", periods=10, freq="-1D"),
|
||||
)
|
||||
self.setup_indices()
|
||||
|
||||
def create_index(self):
|
||||
return date_range("20130101", periods=5)
|
||||
|
||||
def test_shift(self):
|
||||
pass # handled in test_ops
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
pass
|
||||
|
||||
def test_intersection(self):
|
||||
pass # handled in test_setops
|
||||
|
||||
def test_union(self):
|
||||
pass # handled in test_setops
|
||||
@@ -0,0 +1,241 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil.tz
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_to_native_types():
|
||||
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
|
||||
|
||||
# First, with no arguments.
|
||||
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# No NaN values, so na_rep has no effect
|
||||
result = index.to_native_types(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Make sure slicing works
|
||||
expected = np.array(["2017-01-01", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types([0, 2])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Make sure date formatting works
|
||||
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types(date_format="%m-%Y-%d")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# NULL object handling should work
|
||||
index = DatetimeIndex(["2017-01-01", pd.NaT, "2017-01-03"])
|
||||
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestDatetimeIndexRendering:
|
||||
def test_dti_repr_short(self):
|
||||
dr = pd.date_range(start="1/1/2012", periods=1)
|
||||
repr(dr)
|
||||
|
||||
dr = pd.date_range(start="1/1/2012", periods=2)
|
||||
repr(dr)
|
||||
|
||||
dr = pd.date_range(start="1/1/2012", periods=3)
|
||||
repr(dr)
|
||||
|
||||
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
|
||||
def test_dti_representation(self, method):
|
||||
idxs = []
|
||||
idxs.append(DatetimeIndex([], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
|
||||
idxs.append(
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
)
|
||||
idxs.append(
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
|
||||
)
|
||||
)
|
||||
idxs.append(
|
||||
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="UTC")
|
||||
)
|
||||
|
||||
exp = []
|
||||
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
|
||||
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
|
||||
"dtype='datetime64[ns]', freq='D')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
|
||||
"dtype='datetime64[ns]', freq='D')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
|
||||
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
|
||||
", dtype='datetime64[ns, Asia/Tokyo]', freq='H')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
|
||||
"'2011-01-01 10:00:00-05:00', 'NaT'], "
|
||||
"dtype='datetime64[ns, US/Eastern]', freq=None)"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
|
||||
"'2011-01-01 10:00:00+00:00', 'NaT'], "
|
||||
"dtype='datetime64[ns, UTC]', freq=None)"
|
||||
""
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for indx, expected in zip(idxs, exp):
|
||||
result = getattr(indx, method)()
|
||||
assert result == expected
|
||||
|
||||
def test_dti_representation_to_series(self):
|
||||
idx1 = DatetimeIndex([], freq="D")
|
||||
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
||||
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
idx6 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
|
||||
)
|
||||
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
|
||||
|
||||
exp1 = """Series([], dtype: datetime64[ns])"""
|
||||
|
||||
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
|
||||
|
||||
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
|
||||
|
||||
exp4 = (
|
||||
"0 2011-01-01\n"
|
||||
"1 2011-01-02\n"
|
||||
"2 2011-01-03\n"
|
||||
"dtype: datetime64[ns]"
|
||||
)
|
||||
|
||||
exp5 = (
|
||||
"0 2011-01-01 09:00:00+09:00\n"
|
||||
"1 2011-01-01 10:00:00+09:00\n"
|
||||
"2 2011-01-01 11:00:00+09:00\n"
|
||||
"dtype: datetime64[ns, Asia/Tokyo]"
|
||||
)
|
||||
|
||||
exp6 = (
|
||||
"0 2011-01-01 09:00:00-05:00\n"
|
||||
"1 2011-01-01 10:00:00-05:00\n"
|
||||
"2 NaT\n"
|
||||
"dtype: datetime64[ns, US/Eastern]"
|
||||
)
|
||||
|
||||
exp7 = (
|
||||
"0 2011-01-01 09:00:00\n"
|
||||
"1 2011-01-02 10:15:00\n"
|
||||
"dtype: datetime64[ns]"
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
|
||||
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
|
||||
):
|
||||
result = repr(Series(idx))
|
||||
assert result == expected
|
||||
|
||||
def test_dti_summary(self):
|
||||
# GH#9116
|
||||
idx1 = DatetimeIndex([], freq="D")
|
||||
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
||||
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
idx6 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
|
||||
)
|
||||
|
||||
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
|
||||
|
||||
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
|
||||
|
||||
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
|
||||
|
||||
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
|
||||
|
||||
exp5 = (
|
||||
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
|
||||
"to 2011-01-01 11:00:00+09:00\n"
|
||||
"Freq: H"
|
||||
)
|
||||
|
||||
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
|
||||
):
|
||||
result = idx._summary()
|
||||
assert result == expected
|
||||
|
||||
def test_dti_business_repr(self):
|
||||
# only really care that it works
|
||||
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)))
|
||||
|
||||
def test_dti_business_summary(self):
|
||||
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))
|
||||
rng._summary()
|
||||
rng[2:2]._summary()
|
||||
|
||||
def test_dti_business_summary_pytz(self):
|
||||
pd.bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)._summary()
|
||||
|
||||
def test_dti_business_summary_dateutil(self):
|
||||
pd.bdate_range("1/1/2005", "1/1/2009", tz=dateutil.tz.tzutc())._summary()
|
||||
|
||||
def test_dti_custom_business_repr(self):
|
||||
# only really care that it works
|
||||
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C"))
|
||||
|
||||
def test_dti_custom_business_summary(self):
|
||||
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C")
|
||||
rng._summary()
|
||||
rng[2:2]._summary()
|
||||
|
||||
def test_dti_custom_business_summary_pytz(self):
|
||||
pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=pytz.utc)._summary()
|
||||
|
||||
def test_dti_custom_business_summary_dateutil(self):
|
||||
pd.bdate_range(
|
||||
"1/1/2005", "1/1/2009", freq="C", tz=dateutil.tz.tzutc()
|
||||
)._summary()
|
||||
@@ -0,0 +1,719 @@
|
||||
from datetime import datetime, time, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Index, Timestamp, date_range, notna
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import BDay, CDay
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_ellipsis(self):
|
||||
# GH#21282
|
||||
idx = pd.date_range(
|
||||
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
|
||||
)
|
||||
|
||||
result = idx[...]
|
||||
assert result.equals(idx)
|
||||
assert result is not idx
|
||||
|
||||
def test_getitem(self):
|
||||
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
idx2 = pd.date_range(
|
||||
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
|
||||
)
|
||||
|
||||
for idx in [idx1, idx2]:
|
||||
result = idx[0]
|
||||
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
||||
|
||||
result = idx[0:5]
|
||||
expected = pd.date_range(
|
||||
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[0:10:2]
|
||||
expected = pd.date_range(
|
||||
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[-20:-5:3]
|
||||
expected = pd.date_range(
|
||||
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[4::-1]
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
|
||||
freq="-1D",
|
||||
tz=idx.tz,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
def test_dti_business_getitem(self):
|
||||
rng = pd.bdate_range(START, END)
|
||||
smaller = rng[:5]
|
||||
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
|
||||
tm.assert_index_equal(smaller, exp)
|
||||
|
||||
assert smaller.freq == rng.freq
|
||||
|
||||
sliced = rng[::5]
|
||||
assert sliced.freq == BDay() * 5
|
||||
|
||||
fancy_indexed = rng[[4, 3, 2, 1, 0]]
|
||||
assert len(fancy_indexed) == 5
|
||||
assert isinstance(fancy_indexed, DatetimeIndex)
|
||||
assert fancy_indexed.freq is None
|
||||
|
||||
# 32-bit vs. 64-bit platforms
|
||||
assert rng[4] == rng[np.int_(4)]
|
||||
|
||||
def test_dti_business_getitem_matplotlib_hackaround(self):
|
||||
rng = pd.bdate_range(START, END)
|
||||
values = rng[:, None]
|
||||
expected = rng.values[:, None]
|
||||
tm.assert_numpy_array_equal(values, expected)
|
||||
|
||||
def test_dti_custom_getitem(self):
|
||||
rng = pd.bdate_range(START, END, freq="C")
|
||||
smaller = rng[:5]
|
||||
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
|
||||
tm.assert_index_equal(smaller, exp)
|
||||
assert smaller.freq == rng.freq
|
||||
|
||||
sliced = rng[::5]
|
||||
assert sliced.freq == CDay() * 5
|
||||
|
||||
fancy_indexed = rng[[4, 3, 2, 1, 0]]
|
||||
assert len(fancy_indexed) == 5
|
||||
assert isinstance(fancy_indexed, DatetimeIndex)
|
||||
assert fancy_indexed.freq is None
|
||||
|
||||
# 32-bit vs. 64-bit platforms
|
||||
assert rng[4] == rng[np.int_(4)]
|
||||
|
||||
def test_dti_custom_getitem_matplotlib_hackaround(self):
|
||||
rng = pd.bdate_range(START, END, freq="C")
|
||||
values = rng[:, None]
|
||||
expected = rng.values[:, None]
|
||||
tm.assert_numpy_array_equal(values, expected)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_other(self):
|
||||
# other is ndarray or Index
|
||||
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
|
||||
|
||||
for arr in [np.nan, pd.NaT]:
|
||||
result = i.where(notna(i), other=np.nan)
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2), i2)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2), i2.values)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
def test_where_tz(self):
|
||||
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
|
||||
result = i.where(notna(i))
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2))
|
||||
expected = i2
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take(self):
|
||||
# GH#10295
|
||||
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
idx2 = pd.date_range(
|
||||
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
|
||||
)
|
||||
|
||||
for idx in [idx1, idx2]:
|
||||
result = idx.take([0])
|
||||
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
||||
|
||||
result = idx.take([0, 1, 2])
|
||||
expected = pd.date_range(
|
||||
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([0, 2, 4])
|
||||
expected = pd.date_range(
|
||||
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([7, 4, 1])
|
||||
expected = pd.date_range(
|
||||
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([3, 2, 5])
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-03", "2011-01-06"],
|
||||
freq=None,
|
||||
tz=idx.tz,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
result = idx.take([-3, 2, 5])
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-29", "2011-01-03", "2011-01-06"],
|
||||
freq=None,
|
||||
tz=idx.tz,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
indices = [1, 6, 5, 9, 10, 13, 15, 3]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
# TODO: This method came from test_datetime; de-dup with version above
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
|
||||
def test_take2(self, tz):
|
||||
dates = [
|
||||
datetime(2010, 1, 1, 14),
|
||||
datetime(2010, 1, 1, 15),
|
||||
datetime(2010, 1, 1, 17),
|
||||
datetime(2010, 1, 1, 21),
|
||||
]
|
||||
|
||||
idx = pd.date_range(
|
||||
start="2010-01-01 09:00",
|
||||
end="2010-02-01 09:00",
|
||||
freq="H",
|
||||
tz=tz,
|
||||
name="idx",
|
||||
)
|
||||
expected = DatetimeIndex(dates, freq=None, name="idx", tz=tz)
|
||||
|
||||
taken1 = idx.take([5, 6, 8, 12])
|
||||
taken2 = idx[[5, 6, 8, 12]]
|
||||
|
||||
for taken in [taken1, taken2]:
|
||||
tm.assert_index_equal(taken, expected)
|
||||
assert isinstance(taken, DatetimeIndex)
|
||||
assert taken.freq is None
|
||||
assert taken.tz == expected.tz
|
||||
assert taken.name == expected.name
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# GH#12631
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_with_timezone(self):
|
||||
idx = pd.DatetimeIndex(
|
||||
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
@pytest.mark.parametrize("null", [None, np.nan, pd.NaT])
|
||||
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
|
||||
def test_insert_nat(self, tz, null):
|
||||
# GH#16537, GH#18295 (test missing)
|
||||
idx = pd.DatetimeIndex(["2017-01-01"], tz=tz)
|
||||
expected = pd.DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
|
||||
res = idx.insert(0, null)
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
def test_insert(self):
|
||||
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx")
|
||||
|
||||
result = idx.insert(2, datetime(2000, 1, 5))
|
||||
exp = DatetimeIndex(
|
||||
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# insertion of non-datetime should coerce to object index
|
||||
result = idx.insert(1, "inserted")
|
||||
expected = Index(
|
||||
[
|
||||
datetime(2000, 1, 4),
|
||||
"inserted",
|
||||
datetime(2000, 1, 1),
|
||||
datetime(2000, 1, 2),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
idx = date_range("1/1/2000", periods=3, freq="M", name="idx")
|
||||
|
||||
# preserve freq
|
||||
expected_0 = DatetimeIndex(
|
||||
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
|
||||
name="idx",
|
||||
freq="M",
|
||||
)
|
||||
expected_3 = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
|
||||
name="idx",
|
||||
freq="M",
|
||||
)
|
||||
|
||||
# reset freq to None
|
||||
expected_1_nofreq = DatetimeIndex(
|
||||
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
)
|
||||
expected_3_nofreq = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
)
|
||||
|
||||
cases = [
|
||||
(0, datetime(1999, 12, 31), expected_0),
|
||||
(-3, datetime(1999, 12, 31), expected_0),
|
||||
(3, datetime(2000, 4, 30), expected_3),
|
||||
(1, datetime(2000, 1, 31), expected_1_nofreq),
|
||||
(3, datetime(2000, 1, 2), expected_3_nofreq),
|
||||
]
|
||||
|
||||
for n, d, expected in cases:
|
||||
result = idx.insert(n, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# reset freq to None
|
||||
result = idx.insert(3, datetime(2000, 1, 2))
|
||||
expected = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq is None
|
||||
|
||||
# see gh-7299
|
||||
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
|
||||
with pytest.raises(ValueError):
|
||||
idx.insert(3, pd.Timestamp("2000-01-04"))
|
||||
with pytest.raises(ValueError):
|
||||
idx.insert(3, datetime(2000, 1, 4))
|
||||
with pytest.raises(ValueError):
|
||||
idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern"))
|
||||
with pytest.raises(ValueError):
|
||||
idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")))
|
||||
|
||||
for tz in ["US/Pacific", "Asia/Singapore"]:
|
||||
idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx")
|
||||
# preserve freq
|
||||
expected = date_range(
|
||||
"1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx"
|
||||
)
|
||||
for d in [
|
||||
pd.Timestamp("2000-01-01 15:00", tz=tz),
|
||||
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
|
||||
]:
|
||||
|
||||
result = idx.insert(6, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 09:00",
|
||||
"2000-01-01 10:00",
|
||||
"2000-01-01 11:00",
|
||||
"2000-01-01 12:00",
|
||||
"2000-01-01 13:00",
|
||||
"2000-01-01 14:00",
|
||||
"2000-01-01 10:00",
|
||||
],
|
||||
name="idx",
|
||||
tz=tz,
|
||||
freq=None,
|
||||
)
|
||||
# reset freq to None
|
||||
for d in [
|
||||
pd.Timestamp("2000-01-01 10:00", tz=tz),
|
||||
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
|
||||
]:
|
||||
result = idx.insert(6, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.tz == expected.tz
|
||||
assert result.freq is None
|
||||
|
||||
def test_delete(self):
|
||||
idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx")
|
||||
|
||||
# prserve freq
|
||||
expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx")
|
||||
expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx")
|
||||
|
||||
# reset freq to None
|
||||
expected_1 = DatetimeIndex(
|
||||
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
|
||||
freq=None,
|
||||
name="idx",
|
||||
)
|
||||
|
||||
cases = {
|
||||
0: expected_0,
|
||||
-5: expected_0,
|
||||
-1: expected_4,
|
||||
4: expected_4,
|
||||
1: expected_1,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
with pytest.raises((IndexError, ValueError)):
|
||||
# either depending on numpy version
|
||||
idx.delete(5)
|
||||
|
||||
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
|
||||
idx = date_range(
|
||||
start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
|
||||
)
|
||||
|
||||
expected = date_range(
|
||||
start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz
|
||||
)
|
||||
result = idx.delete(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freqstr == "H"
|
||||
assert result.tz == expected.tz
|
||||
|
||||
expected = date_range(
|
||||
start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz
|
||||
)
|
||||
result = idx.delete(-1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freqstr == "H"
|
||||
assert result.tz == expected.tz
|
||||
|
||||
def test_delete_slice(self):
|
||||
idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx")
|
||||
|
||||
# prserve freq
|
||||
expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx")
|
||||
expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx")
|
||||
|
||||
# reset freq to None
|
||||
expected_3_5 = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
"2000-01-07",
|
||||
"2000-01-08",
|
||||
"2000-01-09",
|
||||
"2000-01-10",
|
||||
],
|
||||
freq=None,
|
||||
name="idx",
|
||||
)
|
||||
|
||||
cases = {
|
||||
(0, 1, 2): expected_0_2,
|
||||
(7, 8, 9): expected_7_9,
|
||||
(3, 4, 5): expected_3_5,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.delete(slice(n[0], n[-1] + 1))
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
|
||||
ts = pd.Series(
|
||||
1,
|
||||
index=pd.date_range(
|
||||
"2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
|
||||
),
|
||||
)
|
||||
# preserve freq
|
||||
result = ts.drop(ts.index[:5]).index
|
||||
expected = pd.date_range(
|
||||
"2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
# reset freq to None
|
||||
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 09:00",
|
||||
"2000-01-01 11:00",
|
||||
"2000-01-01 13:00",
|
||||
"2000-01-01 15:00",
|
||||
"2000-01-01 17:00",
|
||||
],
|
||||
freq=None,
|
||||
name="idx",
|
||||
tz=tz,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
def test_get_loc(self):
|
||||
idx = pd.date_range("2000-01-01", periods=3)
|
||||
|
||||
for method in [None, "pad", "backfill", "nearest"]:
|
||||
assert idx.get_loc(idx[1], method) == 1
|
||||
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
|
||||
assert idx.get_loc(str(idx[1]), method) == 1
|
||||
|
||||
if method is not None:
|
||||
assert (
|
||||
idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1
|
||||
)
|
||||
|
||||
assert idx.get_loc("2000-01-01", method="nearest") == 0
|
||||
assert idx.get_loc("2000-01-01T12", method="nearest") == 1
|
||||
|
||||
assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1
|
||||
assert (
|
||||
idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D"))
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
idx.get_loc(
|
||||
"2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D")
|
||||
)
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1
|
||||
)
|
||||
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
|
||||
idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
|
||||
with pytest.raises(KeyError, match="'2000-01-01T03'"):
|
||||
idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
|
||||
with pytest.raises(
|
||||
ValueError, match="tolerance size must match target index size"
|
||||
):
|
||||
idx.get_loc(
|
||||
"2000-01-01",
|
||||
method="nearest",
|
||||
tolerance=[
|
||||
pd.Timedelta("1day").to_timedelta64(),
|
||||
pd.Timedelta("1day").to_timedelta64(),
|
||||
],
|
||||
)
|
||||
|
||||
assert idx.get_loc("2000", method="nearest") == slice(0, 3)
|
||||
assert idx.get_loc("2000-01", method="nearest") == slice(0, 3)
|
||||
|
||||
assert idx.get_loc("1999", method="nearest") == 0
|
||||
assert idx.get_loc("2001", method="nearest") == 2
|
||||
|
||||
with pytest.raises(KeyError, match="'1999'"):
|
||||
idx.get_loc("1999", method="pad")
|
||||
with pytest.raises(KeyError, match="'2001'"):
|
||||
idx.get_loc("2001", method="backfill")
|
||||
|
||||
with pytest.raises(KeyError, match="'foobar'"):
|
||||
idx.get_loc("foobar")
|
||||
with pytest.raises(TypeError):
|
||||
idx.get_loc(slice(2))
|
||||
|
||||
idx = pd.to_datetime(["2000-01-01", "2000-01-04"])
|
||||
assert idx.get_loc("2000-01-02", method="nearest") == 0
|
||||
assert idx.get_loc("2000-01-03", method="nearest") == 1
|
||||
assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)
|
||||
|
||||
# time indexing
|
||||
idx = pd.date_range("2000-01-01", periods=24, freq="H")
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_loc(time(12)), np.array([12]), check_dtype=False
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_loc(time(12, 30)), np.array([]), check_dtype=False
|
||||
)
|
||||
with pytest.raises(NotImplementedError):
|
||||
idx.get_loc(time(12, 30), method="pad")
|
||||
|
||||
def test_get_indexer(self):
|
||||
idx = pd.date_range("2000-01-01", periods=3)
|
||||
exp = np.array([0, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
|
||||
|
||||
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_raw = [
|
||||
pd.Timedelta("1 hour"),
|
||||
pd.Timedelta("1 hour"),
|
||||
pd.Timedelta("1 hour").to_timedelta64(),
|
||||
]
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(
|
||||
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
|
||||
),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_bad = [
|
||||
pd.Timedelta("2 hour").to_timedelta64(),
|
||||
pd.Timedelta("1 hour").to_timedelta64(),
|
||||
"foo",
|
||||
]
|
||||
with pytest.raises(ValueError, match="abbreviation w/o a number"):
|
||||
idx.get_indexer(target, "nearest", tolerance=tol_bad)
|
||||
with pytest.raises(ValueError):
|
||||
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
|
||||
|
||||
def test_reasonable_key_error(self):
|
||||
# GH#1062
|
||||
index = DatetimeIndex(["1/3/2000"])
|
||||
with pytest.raises(KeyError, match="2000"):
|
||||
index.get_loc("1/1/2000")
|
||||
|
||||
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
|
||||
def test_timedelta_invalid_key(self, key):
|
||||
# GH#20464
|
||||
dti = pd.date_range("1970-01-01", periods=10)
|
||||
with pytest.raises(TypeError):
|
||||
dti.get_loc(key)
|
||||
|
||||
def test_get_loc_nat(self):
|
||||
# GH#20464
|
||||
index = DatetimeIndex(["1/3/2000", "NaT"])
|
||||
assert index.get_loc(pd.NaT) == 1
|
||||
@@ -0,0 +1,379 @@
|
||||
import calendar
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Index, Timestamp, date_range, datetime, offsets
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestTimeSeries:
|
||||
def test_pass_datetimeindex_to_index(self):
|
||||
# Bugs in #1396
|
||||
rng = date_range("1/1/2000", "3/1/2000")
|
||||
idx = Index(rng, dtype=object)
|
||||
|
||||
expected = Index(rng.to_pydatetime(), dtype=object)
|
||||
|
||||
tm.assert_numpy_array_equal(idx.values, expected.values)
|
||||
|
||||
def test_range_edges(self):
|
||||
# GH#13672
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:00.000000001"),
|
||||
end=Timestamp("1970-01-01 00:00:00.000000004"),
|
||||
freq="N",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:00:00.000000001",
|
||||
"1970-01-01 00:00:00.000000002",
|
||||
"1970-01-01 00:00:00.000000003",
|
||||
"1970-01-01 00:00:00.000000004",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:00.000000004"),
|
||||
end=Timestamp("1970-01-01 00:00:00.000000001"),
|
||||
freq="N",
|
||||
)
|
||||
exp = DatetimeIndex([])
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:00.000000001"),
|
||||
end=Timestamp("1970-01-01 00:00:00.000000001"),
|
||||
freq="N",
|
||||
)
|
||||
exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"])
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:00.000001"),
|
||||
end=Timestamp("1970-01-01 00:00:00.000004"),
|
||||
freq="U",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:00:00.000001",
|
||||
"1970-01-01 00:00:00.000002",
|
||||
"1970-01-01 00:00:00.000003",
|
||||
"1970-01-01 00:00:00.000004",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:00.001"),
|
||||
end=Timestamp("1970-01-01 00:00:00.004"),
|
||||
freq="L",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:00:00.001",
|
||||
"1970-01-01 00:00:00.002",
|
||||
"1970-01-01 00:00:00.003",
|
||||
"1970-01-01 00:00:00.004",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:00:01"),
|
||||
end=Timestamp("1970-01-01 00:00:04"),
|
||||
freq="S",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:00:01",
|
||||
"1970-01-01 00:00:02",
|
||||
"1970-01-01 00:00:03",
|
||||
"1970-01-01 00:00:04",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 00:01"),
|
||||
end=Timestamp("1970-01-01 00:04"),
|
||||
freq="T",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 00:01",
|
||||
"1970-01-01 00:02",
|
||||
"1970-01-01 00:03",
|
||||
"1970-01-01 00:04",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01 01:00"),
|
||||
end=Timestamp("1970-01-01 04:00"),
|
||||
freq="H",
|
||||
)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"1970-01-01 01:00",
|
||||
"1970-01-01 02:00",
|
||||
"1970-01-01 03:00",
|
||||
"1970-01-01 04:00",
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = pd.date_range(
|
||||
start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D"
|
||||
)
|
||||
exp = DatetimeIndex(["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"])
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
|
||||
class TestDatetime64:
|
||||
def test_datetimeindex_accessors(self):
|
||||
dti_naive = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
|
||||
# GH#13303
|
||||
dti_tz = pd.date_range(
|
||||
freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern"
|
||||
)
|
||||
for dti in [dti_naive, dti_tz]:
|
||||
|
||||
assert dti.year[0] == 1998
|
||||
assert dti.month[0] == 1
|
||||
assert dti.day[0] == 1
|
||||
assert dti.hour[0] == 0
|
||||
assert dti.minute[0] == 0
|
||||
assert dti.second[0] == 0
|
||||
assert dti.microsecond[0] == 0
|
||||
assert dti.dayofweek[0] == 3
|
||||
|
||||
assert dti.dayofyear[0] == 1
|
||||
assert dti.dayofyear[120] == 121
|
||||
|
||||
assert dti.weekofyear[0] == 1
|
||||
assert dti.weekofyear[120] == 18
|
||||
|
||||
assert dti.quarter[0] == 1
|
||||
assert dti.quarter[120] == 2
|
||||
|
||||
assert dti.days_in_month[0] == 31
|
||||
assert dti.days_in_month[90] == 30
|
||||
|
||||
assert dti.is_month_start[0]
|
||||
assert not dti.is_month_start[1]
|
||||
assert dti.is_month_start[31]
|
||||
assert dti.is_quarter_start[0]
|
||||
assert dti.is_quarter_start[90]
|
||||
assert dti.is_year_start[0]
|
||||
assert not dti.is_year_start[364]
|
||||
assert not dti.is_month_end[0]
|
||||
assert dti.is_month_end[30]
|
||||
assert not dti.is_month_end[31]
|
||||
assert dti.is_month_end[364]
|
||||
assert not dti.is_quarter_end[0]
|
||||
assert not dti.is_quarter_end[30]
|
||||
assert dti.is_quarter_end[89]
|
||||
assert dti.is_quarter_end[364]
|
||||
assert not dti.is_year_end[0]
|
||||
assert dti.is_year_end[364]
|
||||
|
||||
assert len(dti.year) == 365
|
||||
assert len(dti.month) == 365
|
||||
assert len(dti.day) == 365
|
||||
assert len(dti.hour) == 365
|
||||
assert len(dti.minute) == 365
|
||||
assert len(dti.second) == 365
|
||||
assert len(dti.microsecond) == 365
|
||||
assert len(dti.dayofweek) == 365
|
||||
assert len(dti.dayofyear) == 365
|
||||
assert len(dti.weekofyear) == 365
|
||||
assert len(dti.quarter) == 365
|
||||
assert len(dti.is_month_start) == 365
|
||||
assert len(dti.is_month_end) == 365
|
||||
assert len(dti.is_quarter_start) == 365
|
||||
assert len(dti.is_quarter_end) == 365
|
||||
assert len(dti.is_year_start) == 365
|
||||
assert len(dti.is_year_end) == 365
|
||||
assert len(dti.weekday_name) == 365
|
||||
|
||||
dti.name = "name"
|
||||
|
||||
# non boolean accessors -> return Index
|
||||
for accessor in DatetimeIndex._field_ops:
|
||||
res = getattr(dti, accessor)
|
||||
assert len(res) == 365
|
||||
assert isinstance(res, Index)
|
||||
assert res.name == "name"
|
||||
|
||||
# boolean accessors -> return array
|
||||
for accessor in DatetimeIndex._bool_ops:
|
||||
res = getattr(dti, accessor)
|
||||
assert len(res) == 365
|
||||
assert isinstance(res, np.ndarray)
|
||||
|
||||
# test boolean indexing
|
||||
res = dti[dti.is_quarter_start]
|
||||
exp = dti[[0, 90, 181, 273]]
|
||||
tm.assert_index_equal(res, exp)
|
||||
res = dti[dti.is_leap_year]
|
||||
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
dti = pd.date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4)
|
||||
|
||||
assert sum(dti.is_quarter_start) == 0
|
||||
assert sum(dti.is_quarter_end) == 4
|
||||
assert sum(dti.is_year_start) == 0
|
||||
assert sum(dti.is_year_end) == 1
|
||||
|
||||
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
|
||||
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
|
||||
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
|
||||
msg = "Custom business days is not supported by is_month_start"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dti.is_month_start
|
||||
|
||||
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
|
||||
|
||||
assert dti.is_month_start[0] == 1
|
||||
|
||||
tests = [
|
||||
(Timestamp("2013-06-01", freq="M").is_month_start, 1),
|
||||
(Timestamp("2013-06-01", freq="BM").is_month_start, 0),
|
||||
(Timestamp("2013-06-03", freq="M").is_month_start, 0),
|
||||
(Timestamp("2013-06-03", freq="BM").is_month_start, 1),
|
||||
(Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1),
|
||||
(Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1),
|
||||
(Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1),
|
||||
(Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1),
|
||||
(Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1),
|
||||
(Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1),
|
||||
(Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1),
|
||||
(Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0),
|
||||
(Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0),
|
||||
(Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1),
|
||||
(Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1),
|
||||
(Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1),
|
||||
(Timestamp("2013-06-30", freq="BQ").is_month_end, 0),
|
||||
(Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0),
|
||||
(Timestamp("2013-06-30", freq="BQ").is_year_end, 0),
|
||||
(Timestamp("2013-06-28", freq="BQ").is_month_end, 1),
|
||||
(Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1),
|
||||
(Timestamp("2013-06-28", freq="BQ").is_year_end, 0),
|
||||
(Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0),
|
||||
(Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0),
|
||||
(Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0),
|
||||
(Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1),
|
||||
(Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1),
|
||||
(Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1),
|
||||
(Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1),
|
||||
(Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1),
|
||||
(Timestamp("2012-02-01").days_in_month, 29),
|
||||
(Timestamp("2013-02-01").days_in_month, 28),
|
||||
]
|
||||
|
||||
for ts, value in tests:
|
||||
assert ts == value
|
||||
|
||||
# GH 6538: Check that DatetimeIndex and its TimeStamp elements
|
||||
# return the same weekofyear accessor close to new year w/ tz
|
||||
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
|
||||
dates = DatetimeIndex(dates, tz="Europe/Brussels")
|
||||
expected = [52, 1, 1]
|
||||
assert dates.weekofyear.tolist() == expected
|
||||
assert [d.weekofyear for d in dates] == expected
|
||||
|
||||
# GH 12806
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales()
|
||||
)
|
||||
def test_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
# GH#11128
|
||||
dti = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert dti.weekday_name[day] == eng_name
|
||||
assert dti.day_name(locale=time_locale)[day] == name
|
||||
ts = Timestamp(datetime(2016, 4, day))
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert ts.weekday_name == eng_name
|
||||
assert ts.day_name(locale=time_locale) == name
|
||||
dti = dti.append(DatetimeIndex([pd.NaT]))
|
||||
assert np.isnan(dti.day_name(locale=time_locale)[-1])
|
||||
ts = Timestamp(pd.NaT)
|
||||
assert np.isnan(ts.day_name(locale=time_locale))
|
||||
|
||||
# GH#12805
|
||||
dti = pd.date_range(freq="M", start="2012", end="2013")
|
||||
result = dti.month_name(locale=time_locale)
|
||||
expected = Index([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around different normalization schemes
|
||||
# https://github.com/pandas-dev/pandas/issues/22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for date, expected in zip(dti, expected_months):
|
||||
result = date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", result)
|
||||
|
||||
assert result == expected
|
||||
dti = dti.append(DatetimeIndex([pd.NaT]))
|
||||
assert np.isnan(dti.month_name(locale=time_locale)[-1])
|
||||
|
||||
def test_nanosecond_field(self):
|
||||
dti = DatetimeIndex(np.arange(10))
|
||||
|
||||
tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64)))
|
||||
@@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
|
||||
def test_fillna_datetime64(self, tz):
|
||||
# GH 11343
|
||||
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"]
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
|
||||
|
||||
# tz mismatch
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 10:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
|
||||
|
||||
# object
|
||||
exp = pd.Index(
|
||||
[pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
|
||||
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz)
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
|
||||
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 10:00"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz=tz),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
|
||||
|
||||
# object
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00", tz=tz),
|
||||
"x",
|
||||
pd.Timestamp("2011-01-01 11:00", tz=tz),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
@@ -0,0 +1,548 @@
|
||||
from datetime import datetime
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCDateOffset
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.test_base import Ops
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestDatetimeIndexOps(Ops):
|
||||
def setup_method(self, method):
|
||||
super().setup_method(method)
|
||||
mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
|
||||
self.is_valid_objs = [o for o in self.objs if mask(o)]
|
||||
self.not_valid_objs = [o for o in self.objs if not mask(o)]
|
||||
|
||||
def test_ops_properties(self):
|
||||
f = lambda x: isinstance(x, DatetimeIndex)
|
||||
self.check_ops_properties(DatetimeIndex._field_ops, f)
|
||||
self.check_ops_properties(DatetimeIndex._object_ops, f)
|
||||
self.check_ops_properties(DatetimeIndex._bool_ops, f)
|
||||
|
||||
def test_ops_properties_basic(self):
|
||||
|
||||
# sanity check that the behavior didn't change
|
||||
# GH#7206
|
||||
msg = "'Series' object has no attribute '{}'"
|
||||
for op in ["year", "day", "second", "weekday"]:
|
||||
with pytest.raises(AttributeError, match=msg.format(op)):
|
||||
getattr(self.dt_series, op)
|
||||
|
||||
# attribute access should still work!
|
||||
s = Series(dict(year=2000, month=1, day=10))
|
||||
assert s.year == 2000
|
||||
assert s.month == 1
|
||||
assert s.day == 10
|
||||
msg = "'Series' object has no attribute 'weekday'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.weekday
|
||||
|
||||
def test_repeat_range(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
rng = date_range("1/1/2000", "1/1/2001")
|
||||
|
||||
result = rng.repeat(5)
|
||||
assert result.freq is None
|
||||
assert len(result) == 5 * len(rng)
|
||||
|
||||
index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
|
||||
)
|
||||
for res in [index.repeat(2), np.repeat(index, 2)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
|
||||
exp = pd.DatetimeIndex(
|
||||
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
|
||||
)
|
||||
for res in [index.repeat(2), np.repeat(index, 2)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
|
||||
exp = pd.DatetimeIndex(
|
||||
[
|
||||
"2001-01-01",
|
||||
"2001-01-01",
|
||||
"2001-01-01",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"2003-01-01",
|
||||
"2003-01-01",
|
||||
"2003-01-01",
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
for res in [index.repeat(3), np.repeat(index, 3)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
def test_repeat(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
reps = 2
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
|
||||
rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
|
||||
]
|
||||
)
|
||||
|
||||
res = rng.repeat(reps)
|
||||
tm.assert_index_equal(res, expected_rng)
|
||||
assert res.freq is None
|
||||
|
||||
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(rng, reps, axis=1)
|
||||
|
||||
def test_resolution(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
for freq, expected in zip(
|
||||
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
|
||||
[
|
||||
"day",
|
||||
"day",
|
||||
"day",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"millisecond",
|
||||
"microsecond",
|
||||
],
|
||||
):
|
||||
idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
|
||||
assert idx.resolution == expected
|
||||
|
||||
def test_value_counts_unique(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
# GH 7735
|
||||
idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)
|
||||
|
||||
exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
pd.NaT,
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
|
||||
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
|
||||
expected = Series([3, 2], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
|
||||
expected = Series([3, 2, 1], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
||||
|
||||
def test_nonunique_contains(self):
|
||||
# GH 9512
|
||||
for idx in map(
|
||||
DatetimeIndex,
|
||||
(
|
||||
[0, 1, 0],
|
||||
[0, 0, -1],
|
||||
[0, -1, -1],
|
||||
["2015", "2015", "2016"],
|
||||
["2015", "2015", "2014"],
|
||||
),
|
||||
):
|
||||
assert idx[0] in idx
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
||||
),
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
name="tzidx",
|
||||
tz="Asia/Tokyo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_order_with_freq(self, idx):
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
assert ordered.freq == idx.freq
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq.n == -1
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
||||
assert ordered.freq == idx.freq
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq.n == -1
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_dates,expected_dates",
|
||||
[
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
[pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
|
||||
[pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
|
||||
# without freq
|
||||
index = DatetimeIndex(index_dates, tz=tz, name="idx")
|
||||
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
||||
|
||||
ordered = index.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered = index.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered, indexer = index.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 4, 0])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq is None
|
||||
|
||||
def test_drop_duplicates_metadata(self):
|
||||
# GH 10115
|
||||
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
assert idx_dup.freq is None # freq is reset
|
||||
result = idx_dup.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert result.freq is None
|
||||
|
||||
def test_drop_duplicates(self):
|
||||
# to check Index/Series compat
|
||||
base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
idx = base.append(base[:5])
|
||||
|
||||
res = idx.drop_duplicates()
|
||||
tm.assert_index_equal(res, base)
|
||||
res = Series(idx).drop_duplicates()
|
||||
tm.assert_series_equal(res, Series(base))
|
||||
|
||||
res = idx.drop_duplicates(keep="last")
|
||||
exp = base[5:].append(base[:5])
|
||||
tm.assert_index_equal(res, exp)
|
||||
res = Series(idx).drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
|
||||
|
||||
res = idx.drop_duplicates(keep=False)
|
||||
tm.assert_index_equal(res, base[5:])
|
||||
res = Series(idx).drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq",
|
||||
[
|
||||
"A",
|
||||
"2A",
|
||||
"-2A",
|
||||
"Q",
|
||||
"-1Q",
|
||||
"M",
|
||||
"-1M",
|
||||
"D",
|
||||
"3D",
|
||||
"-3D",
|
||||
"W",
|
||||
"-1W",
|
||||
"H",
|
||||
"2H",
|
||||
"-2H",
|
||||
"T",
|
||||
"2T",
|
||||
"S",
|
||||
"-3S",
|
||||
],
|
||||
)
|
||||
def test_infer_freq(self, freq):
|
||||
# GH 11018
|
||||
idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
|
||||
result = pd.DatetimeIndex(idx.asi8, freq="infer")
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert result.freq == freq
|
||||
|
||||
def test_nat(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
assert pd.DatetimeIndex._na_value is pd.NaT
|
||||
assert pd.DatetimeIndex([])._na_value is pd.NaT
|
||||
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
|
||||
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
|
||||
|
||||
def test_equals(self):
|
||||
# GH 13107
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific")
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
|
||||
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_freq_setter(self, values, freq, tz):
|
||||
# GH 20678
|
||||
idx = DatetimeIndex(values, tz=tz)
|
||||
|
||||
# can set to an offset, converting from string if necessary
|
||||
idx.freq = freq
|
||||
assert idx.freq == freq
|
||||
assert isinstance(idx.freq, ABCDateOffset)
|
||||
|
||||
# can reset to None
|
||||
idx.freq = None
|
||||
assert idx.freq is None
|
||||
|
||||
def test_freq_setter_errors(self):
|
||||
# GH 20678
|
||||
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
|
||||
|
||||
# setting with an incompatible freq
|
||||
msg = (
|
||||
"Inferred frequency 2D from passed values does not conform to "
|
||||
"passed frequency 5D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.freq = "5D"
|
||||
|
||||
# setting with non-freq string
|
||||
with pytest.raises(ValueError, match="Invalid frequency"):
|
||||
idx.freq = "foo"
|
||||
|
||||
def test_offset_deprecated(self):
|
||||
# GH 20716
|
||||
idx = pd.DatetimeIndex(["20180101", "20180102"])
|
||||
|
||||
# getter deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.offset
|
||||
|
||||
# setter deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.offset = BDay()
|
||||
|
||||
|
||||
class TestBusinessDatetimeIndex:
|
||||
def setup_method(self, method):
|
||||
self.rng = bdate_range(START, END)
|
||||
|
||||
def test_comparison(self):
|
||||
d = self.rng[10]
|
||||
|
||||
comp = self.rng > d
|
||||
assert comp[11]
|
||||
assert not comp[9]
|
||||
|
||||
def test_pickle_unpickle(self):
|
||||
unpickled = tm.round_trip_pickle(self.rng)
|
||||
assert unpickled.freq is not None
|
||||
|
||||
def test_copy(self):
|
||||
cp = self.rng.copy()
|
||||
repr(cp)
|
||||
tm.assert_index_equal(cp, self.rng)
|
||||
|
||||
def test_shift(self):
|
||||
shifted = self.rng.shift(5)
|
||||
assert shifted[0] == self.rng[5]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
shifted = self.rng.shift(-5)
|
||||
assert shifted[5] == self.rng[0]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
shifted = self.rng.shift(0)
|
||||
assert shifted[0] == self.rng[0]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
shifted = rng.shift(1, freq=BDay())
|
||||
assert shifted[0] == rng[0] + BDay()
|
||||
|
||||
def test_equals(self):
|
||||
assert not self.rng.equals(list(self.rng))
|
||||
|
||||
def test_identical(self):
|
||||
t1 = self.rng.copy()
|
||||
t2 = self.rng.copy()
|
||||
assert t1.identical(t2)
|
||||
|
||||
# name
|
||||
t1 = t1.rename("foo")
|
||||
assert t1.equals(t2)
|
||||
assert not t1.identical(t2)
|
||||
t2 = t2.rename("foo")
|
||||
assert t1.identical(t2)
|
||||
|
||||
# freq
|
||||
t2v = Index(t2.values)
|
||||
assert t1.equals(t2v)
|
||||
assert not t1.identical(t2v)
|
||||
|
||||
|
||||
class TestCustomDatetimeIndex:
|
||||
def setup_method(self, method):
|
||||
self.rng = bdate_range(START, END, freq="C")
|
||||
|
||||
def test_comparison(self):
|
||||
d = self.rng[10]
|
||||
|
||||
comp = self.rng > d
|
||||
assert comp[11]
|
||||
assert not comp[9]
|
||||
|
||||
def test_copy(self):
|
||||
cp = self.rng.copy()
|
||||
repr(cp)
|
||||
tm.assert_index_equal(cp, self.rng)
|
||||
|
||||
def test_shift(self):
|
||||
|
||||
shifted = self.rng.shift(5)
|
||||
assert shifted[0] == self.rng[5]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
shifted = self.rng.shift(-5)
|
||||
assert shifted[5] == self.rng[0]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
shifted = self.rng.shift(0)
|
||||
assert shifted[0] == self.rng[0]
|
||||
assert shifted.freq == self.rng.freq
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
shifted = rng.shift(1, freq=CDay())
|
||||
assert shifted[0] == rng[0] + CDay()
|
||||
|
||||
def test_shift_periods(self):
|
||||
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
|
||||
idx = pd.date_range(start=START, end=END, periods=3)
|
||||
tm.assert_index_equal(idx.shift(periods=0), idx)
|
||||
tm.assert_index_equal(idx.shift(0), idx)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
|
||||
tm.assert_index_equal(idx.shift(n=0), idx)
|
||||
|
||||
def test_pickle_unpickle(self):
|
||||
unpickled = tm.round_trip_pickle(self.rng)
|
||||
assert unpickled.freq is not None
|
||||
|
||||
def test_equals(self):
|
||||
assert not self.rng.equals(list(self.rng))
|
||||
@@ -0,0 +1,470 @@
|
||||
""" test partial slicing on Series/Frame """
|
||||
|
||||
from datetime import datetime
|
||||
import operator as op
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestSlicing:
|
||||
def test_dti_slicing(self):
|
||||
dti = date_range(start="1/1/2005", end="12/1/2005", freq="M")
|
||||
dti2 = dti[[1, 3, 5]]
|
||||
|
||||
v1 = dti2[0]
|
||||
v2 = dti2[1]
|
||||
v3 = dti2[2]
|
||||
|
||||
assert v1 == Timestamp("2/28/2005")
|
||||
assert v2 == Timestamp("4/30/2005")
|
||||
assert v3 == Timestamp("6/30/2005")
|
||||
|
||||
# don't carry freq through irregular slicing
|
||||
assert dti2.freq is None
|
||||
|
||||
def test_slice_keeps_name(self):
|
||||
# GH4226
|
||||
st = pd.Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
|
||||
et = pd.Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
|
||||
dr = pd.date_range(st, et, freq="H", name="timebucket")
|
||||
assert dr[1:].name == dr.name
|
||||
|
||||
def test_slice_with_negative_step(self):
|
||||
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
|
||||
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[Timestamp("2014-10-01") :: -1], SLC[9::-1])
|
||||
assert_slices_equivalent(SLC["2014-10-01"::-1], SLC[9::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[: Timestamp("2014-10-01") : -1], SLC[:8:-1])
|
||||
assert_slices_equivalent(SLC[:"2014-10-01":-1], SLC[:8:-1])
|
||||
|
||||
assert_slices_equivalent(SLC["2015-02-01":"2014-10-01":-1], SLC[13:8:-1])
|
||||
assert_slices_equivalent(
|
||||
SLC[Timestamp("2015-02-01") : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
|
||||
)
|
||||
assert_slices_equivalent(
|
||||
SLC["2015-02-01" : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
|
||||
)
|
||||
assert_slices_equivalent(
|
||||
SLC[Timestamp("2015-02-01") : "2014-10-01" : -1], SLC[13:8:-1]
|
||||
)
|
||||
|
||||
assert_slices_equivalent(SLC["2014-10-01":"2015-02-01":-1], SLC[:0])
|
||||
|
||||
def test_slice_with_zero_step_raises(self):
|
||||
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
|
||||
def test_slice_bounds_empty(self):
|
||||
# GH#14354
|
||||
empty_idx = date_range(freq="1H", periods=0, end="2015")
|
||||
|
||||
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right", "loc")
|
||||
exp = Timestamp("2015-01-02 23:59:59.999999999")
|
||||
assert right == exp
|
||||
|
||||
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left", "loc")
|
||||
exp = Timestamp("2015-01-02 00:00:00")
|
||||
assert left == exp
|
||||
|
||||
def test_slice_duplicate_monotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16515
|
||||
idx = pd.DatetimeIndex(["2017", "2017"])
|
||||
result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
|
||||
expected = Timestamp("2017-01-01")
|
||||
assert result == expected
|
||||
|
||||
def test_monotone_DTI_indexing_bug(self):
|
||||
# GH 19362
|
||||
# Testing accessing the first element in a monotonic descending
|
||||
# partial string indexing.
|
||||
|
||||
df = pd.DataFrame(list(range(5)))
|
||||
date_list = [
|
||||
"2018-01-02",
|
||||
"2017-02-10",
|
||||
"2016-03-10",
|
||||
"2015-03-15",
|
||||
"2014-03-16",
|
||||
]
|
||||
date_index = pd.to_datetime(date_list)
|
||||
df["date"] = date_index
|
||||
expected = pd.DataFrame({0: list(range(5)), "date": date_index})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = pd.DataFrame(
|
||||
{"A": [1, 2, 3]}, index=pd.date_range("20170101", periods=3)[::-1]
|
||||
)
|
||||
expected = pd.DataFrame({"A": 1}, index=pd.date_range("20170103", periods=1))
|
||||
tm.assert_frame_equal(df.loc["2017-01-03"], expected)
|
||||
|
||||
def test_slice_year(self):
|
||||
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
result = s["2005"]
|
||||
expected = s[s.index.year == 2005]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
|
||||
result = df.loc["2005"]
|
||||
expected = df[df.index.year == 2005]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
rng = date_range("1/1/2000", "1/1/2010")
|
||||
|
||||
result = rng.get_loc("2009")
|
||||
expected = slice(3288, 3653)
|
||||
assert result == expected
|
||||
|
||||
def test_slice_quarter(self):
|
||||
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
assert len(s["2001Q1"]) == 90
|
||||
|
||||
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
|
||||
assert len(df.loc["1Q01"]) == 90
|
||||
|
||||
def test_slice_month(self):
|
||||
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
assert len(s["2005-11"]) == 30
|
||||
|
||||
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
|
||||
assert len(df.loc["2005-11"]) == 30
|
||||
|
||||
tm.assert_series_equal(s["2005-11"], s["11-2005"])
|
||||
|
||||
def test_partial_slice(self):
|
||||
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-05":"2006-02"]
|
||||
expected = s["20050501":"20060228"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s["2005-05":]
|
||||
expected = s["20050501":]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s[:"2006-02"]
|
||||
expected = s[:"20060228"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s["2005-1-1"]
|
||||
assert result == s.iloc[0]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
|
||||
s["2004-12-31"]
|
||||
|
||||
def test_partial_slice_daily(self):
|
||||
rng = date_range(freq="H", start=datetime(2005, 1, 31), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-31"]
|
||||
tm.assert_series_equal(result, s.iloc[:24])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
|
||||
s["2004-12-31 00"]
|
||||
|
||||
def test_partial_slice_hourly(self):
|
||||
rng = date_range(freq="T", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-1"]
|
||||
tm.assert_series_equal(result, s.iloc[: 60 * 4])
|
||||
|
||||
result = s["2005-1-1 20"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
assert s["2005-1-1 20:00"] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
|
||||
s["2004-12-31 00:15"]
|
||||
|
||||
def test_partial_slice_minutely(self):
|
||||
rng = date_range(freq="S", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-1 23:59"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
result = s["2005-1-1"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
|
||||
s["2004-12-31 00:00:00"]
|
||||
|
||||
def test_partial_slice_second_precision(self):
|
||||
rng = date_range(
|
||||
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
|
||||
periods=20,
|
||||
freq="US",
|
||||
)
|
||||
s = Series(np.arange(20), rng)
|
||||
|
||||
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
|
||||
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
|
||||
|
||||
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
|
||||
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
|
||||
|
||||
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
|
||||
s["2005-1-1 00:00:00"]
|
||||
|
||||
def test_partial_slicing_dataframe(self):
|
||||
# GH14856
|
||||
# Test various combinations of string slicing resolution vs.
|
||||
# index resolution
|
||||
# - If string resolution is less precise than index resolution,
|
||||
# string is considered a slice
|
||||
# - If string resolution is equal to or more precise than index
|
||||
# resolution, string is considered an exact match
|
||||
formats = [
|
||||
"%Y",
|
||||
"%Y-%m",
|
||||
"%Y-%m-%d",
|
||||
"%Y-%m-%d %H",
|
||||
"%Y-%m-%d %H:%M",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
]
|
||||
resolutions = ["year", "month", "day", "hour", "minute", "second"]
|
||||
for rnum, resolution in enumerate(resolutions[2:], 2):
|
||||
# we check only 'day', 'hour', 'minute' and 'second'
|
||||
unit = Timedelta("1 " + resolution)
|
||||
middate = datetime(2012, 1, 1, 0, 0, 0)
|
||||
index = DatetimeIndex([middate - unit, middate, middate + unit])
|
||||
values = [1, 2, 3]
|
||||
df = DataFrame({"a": values}, index, dtype=np.int64)
|
||||
assert df.index.resolution == resolution
|
||||
|
||||
# Timestamp with the same resolution as index
|
||||
# Should be exact match for Series (return scalar)
|
||||
# and raise KeyError for Frame
|
||||
for timestamp, expected in zip(index, values):
|
||||
ts_string = timestamp.strftime(formats[rnum])
|
||||
# make ts_string as precise as index
|
||||
result = df["a"][ts_string]
|
||||
assert isinstance(result, np.int64)
|
||||
assert result == expected
|
||||
msg = r"^'{}'$".format(ts_string)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
# Timestamp with resolution less precise than index
|
||||
for fmt in formats[:rnum]:
|
||||
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
|
||||
ts_string = index[element].strftime(fmt)
|
||||
|
||||
# Series should return slice
|
||||
result = df["a"][ts_string]
|
||||
expected = df["a"][theslice]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Frame should return slice as well
|
||||
result = df[ts_string]
|
||||
expected = df[theslice]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Timestamp with resolution more precise than index
|
||||
# Compatible with existing key
|
||||
# Should return scalar for Series
|
||||
# and raise KeyError for Frame
|
||||
for fmt in formats[rnum + 1 :]:
|
||||
ts_string = index[1].strftime(fmt)
|
||||
result = df["a"][ts_string]
|
||||
assert isinstance(result, np.int64)
|
||||
assert result == 2
|
||||
msg = r"^'{}'$".format(ts_string)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
# Not compatible with existing key
|
||||
# Should raise KeyError
|
||||
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
|
||||
ts = index[1] + Timedelta("1 " + res)
|
||||
ts_string = ts.strftime(fmt)
|
||||
msg = r"^'{}'$".format(ts_string)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df["a"][ts_string]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
def test_partial_slicing_with_multiindex(self):
|
||||
|
||||
# GH 4758
|
||||
# partial string indexing with a multi-index buggy
|
||||
df = DataFrame(
|
||||
{
|
||||
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
|
||||
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
|
||||
"val": [1, 2, 3, 4],
|
||||
},
|
||||
index=date_range("2013-06-19 09:30:00", periods=4, freq="5T"),
|
||||
)
|
||||
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
|
||||
|
||||
expected = DataFrame(
|
||||
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
|
||||
)
|
||||
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df_multi.loc[
|
||||
(pd.Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
|
||||
]
|
||||
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# this is an IndexingError as we don't do partial string selection on
|
||||
# multi-levels.
|
||||
msg = "Too many indexers"
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
|
||||
|
||||
# GH 4294
|
||||
# partial slice on a series mi
|
||||
s = pd.DataFrame(
|
||||
np.random.rand(1000, 1000), index=pd.date_range("2000-1-1", periods=1000)
|
||||
).stack()
|
||||
|
||||
s2 = s[:-1].copy()
|
||||
expected = s2["2000-1-4"]
|
||||
result = s2[pd.Timestamp("2000-1-4")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s[pd.Timestamp("2000-1-4")]
|
||||
expected = s["2000-1-4"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df2 = pd.DataFrame(s)
|
||||
expected = df2.xs("2000-1-4")
|
||||
result = df2.loc[pd.Timestamp("2000-1-4")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_slice_doesnt_require_monotonicity(self):
|
||||
# For historical reasons.
|
||||
s = pd.Series(np.arange(10), pd.date_range("2014-01-01", periods=10))
|
||||
|
||||
nonmonotonic = s[[3, 5, 4]]
|
||||
expected = nonmonotonic.iloc[:0]
|
||||
timestamp = pd.Timestamp("2014-01-10")
|
||||
|
||||
tm.assert_series_equal(nonmonotonic["2014-01-10":], expected)
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
||||
nonmonotonic[timestamp:]
|
||||
|
||||
tm.assert_series_equal(nonmonotonic.loc["2014-01-10":], expected)
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
||||
nonmonotonic.loc[timestamp:]
|
||||
|
||||
def test_loc_datetime_length_one(self):
|
||||
# GH16071
|
||||
df = pd.DataFrame(
|
||||
columns=["1"],
|
||||
index=pd.date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
|
||||
)
|
||||
result = df.loc[datetime(2016, 10, 1) :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc["2016-10-01T00:00:00":]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"datetimelike",
|
||||
[
|
||||
Timestamp("20130101"),
|
||||
datetime(2013, 1, 1),
|
||||
np.datetime64("2013-01-01T00:00", "ns"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"op,expected",
|
||||
[
|
||||
(op.lt, [True, False, False, False]),
|
||||
(op.le, [True, True, False, False]),
|
||||
(op.eq, [False, True, False, False]),
|
||||
(op.gt, [False, False, False, True]),
|
||||
],
|
||||
)
|
||||
def test_selection_by_datetimelike(self, datetimelike, op, expected):
|
||||
# GH issue #17965, test for ability to compare datetime64[ns] columns
|
||||
# to datetimelike
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
pd.Timestamp("20120101"),
|
||||
pd.Timestamp("20130101"),
|
||||
np.nan,
|
||||
pd.Timestamp("20130103"),
|
||||
]
|
||||
}
|
||||
)
|
||||
result = op(df.A, datetimelike)
|
||||
expected = Series(expected, name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start",
|
||||
[
|
||||
"2018-12-02 21:50:00+00:00",
|
||||
pd.Timestamp("2018-12-02 21:50:00+00:00"),
|
||||
pd.Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"end",
|
||||
[
|
||||
"2018-12-02 21:52:00+00:00",
|
||||
pd.Timestamp("2018-12-02 21:52:00+00:00"),
|
||||
pd.Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
|
||||
],
|
||||
)
|
||||
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
|
||||
# GH 24076
|
||||
idx = pd.date_range(
|
||||
start="2018-12-02 14:50:00-07:00",
|
||||
end="2018-12-02 14:50:00-07:00",
|
||||
freq="1min",
|
||||
)
|
||||
df = pd.DataFrame(1, index=idx, columns=["A"])
|
||||
result = df[start:end]
|
||||
expected = df.iloc[0:3, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 16785
|
||||
start = str(start)
|
||||
end = str(end)
|
||||
with pytest.raises(ValueError, match="Both dates must"):
|
||||
df[start : end[:-4] + "1:00"]
|
||||
|
||||
with pytest.raises(ValueError, match="The index must be timezone"):
|
||||
df = df.tz_localize(None)
|
||||
df[start:end]
|
||||
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Timestamp, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.frequencies import to_offset
|
||||
|
||||
|
||||
class TestDatetimeIndexOps:
|
||||
def test_dti_time(self):
|
||||
rng = date_range("1/1/2000", freq="12min", periods=10)
|
||||
result = pd.Index(rng).time
|
||||
expected = [t.time() for t in rng]
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_dti_date(self):
|
||||
rng = date_range("1/1/2000", freq="12H", periods=10)
|
||||
result = pd.Index(rng).date
|
||||
expected = [t.date() for t in rng]
|
||||
assert (result == expected).all()
|
||||
|
||||
@pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
|
||||
def test_dti_date_out_of_range(self, data):
|
||||
# GH#1475
|
||||
msg = "Out of bounds nanosecond timestamp: 1400-01-01 00:00:00"
|
||||
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
||||
DatetimeIndex(data)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"field",
|
||||
[
|
||||
"dayofweek",
|
||||
"dayofyear",
|
||||
"week",
|
||||
"weekofyear",
|
||||
"quarter",
|
||||
"days_in_month",
|
||||
"is_month_start",
|
||||
"is_month_end",
|
||||
"is_quarter_start",
|
||||
"is_quarter_end",
|
||||
"is_year_start",
|
||||
"is_year_end",
|
||||
"weekday_name",
|
||||
],
|
||||
)
|
||||
def test_dti_timestamp_fields(self, field):
|
||||
# extra fields from DatetimeIndex like quarter and week
|
||||
idx = tm.makeDateIndex(100)
|
||||
expected = getattr(idx, field)[-1]
|
||||
if field == "weekday_name":
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = getattr(Timestamp(idx[-1]), field)
|
||||
else:
|
||||
result = getattr(Timestamp(idx[-1]), field)
|
||||
assert result == expected
|
||||
|
||||
def test_dti_timestamp_freq_fields(self):
|
||||
# extra fields from DatetimeIndex like quarter and week
|
||||
idx = tm.makeDateIndex(100)
|
||||
|
||||
assert idx.freq == Timestamp(idx[-1], idx.freq).freq
|
||||
assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# DatetimeIndex.round
|
||||
|
||||
def test_round_daily(self):
|
||||
dti = date_range("20130101 09:10:11", periods=5)
|
||||
result = dti.round("D")
|
||||
expected = date_range("20130101", periods=5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
|
||||
result = dti.round("D")
|
||||
expected = date_range("20130101", periods=5).tz_localize("US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = dti.round("s")
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, error_msg",
|
||||
[
|
||||
("Y", "<YearEnd: month=12> is a non-fixed frequency"),
|
||||
("M", "<MonthEnd> is a non-fixed frequency"),
|
||||
("foobar", "Invalid frequency: foobar"),
|
||||
],
|
||||
)
|
||||
def test_round_invalid(self, freq, error_msg):
|
||||
dti = date_range("20130101 09:10:11", periods=5)
|
||||
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
dti.round(freq)
|
||||
|
||||
def test_round(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz)
|
||||
elt = rng[1]
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 01:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"),
|
||||
Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"),
|
||||
]
|
||||
)
|
||||
expected_elt = expected_rng[1]
|
||||
|
||||
tm.assert_index_equal(rng.round(freq="H"), expected_rng)
|
||||
assert elt.round(freq="H") == expected_elt
|
||||
|
||||
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng.round(freq="foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="foo")
|
||||
|
||||
msg = "<MonthEnd> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng.round(freq="M")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="M")
|
||||
|
||||
# GH#14440 & GH#15578
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz)
|
||||
result = index.round("ms")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for freq in ["us", "ns"]:
|
||||
tm.assert_index_equal(index, index.round(freq))
|
||||
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz)
|
||||
result = index.round("ms")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.001501031"])
|
||||
result = index.round("10ns")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(False):
|
||||
ts = "2016-10-17 12:00:00.001501031"
|
||||
DatetimeIndex([ts]).round("1010ns")
|
||||
|
||||
def test_no_rounding_occurs(self, tz_naive_fixture):
|
||||
# GH 21262
|
||||
tz = tz_naive_fixture
|
||||
rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz)
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz, freq="2T"),
|
||||
Timestamp("2016-01-01 00:02:00", tz=tz, freq="2T"),
|
||||
Timestamp("2016-01-01 00:04:00", tz=tz, freq="2T"),
|
||||
Timestamp("2016-01-01 00:06:00", tz=tz, freq="2T"),
|
||||
Timestamp("2016-01-01 00:08:00", tz=tz, freq="2T"),
|
||||
]
|
||||
)
|
||||
|
||||
tm.assert_index_equal(rng.round(freq="2T"), expected_rng)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_input, rounder, freq, expected",
|
||||
[
|
||||
(["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]),
|
||||
(["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]),
|
||||
(
|
||||
["2117-01-01 00:00:45.000000012"],
|
||||
"floor",
|
||||
"10ns",
|
||||
["2117-01-01 00:00:45.000000010"],
|
||||
),
|
||||
(
|
||||
["1823-01-01 00:00:01.000000012"],
|
||||
"ceil",
|
||||
"10ns",
|
||||
["1823-01-01 00:00:01.000000020"],
|
||||
),
|
||||
(["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]),
|
||||
(["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]),
|
||||
(["2018-01-01 00:15:00"], "ceil", "15T", ["2018-01-01 00:15:00"]),
|
||||
(["2018-01-01 00:15:00"], "floor", "15T", ["2018-01-01 00:15:00"]),
|
||||
(["1823-01-01 03:00:00"], "ceil", "3H", ["1823-01-01 03:00:00"]),
|
||||
(["1823-01-01 03:00:00"], "floor", "3H", ["1823-01-01 03:00:00"]),
|
||||
(
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
"floor",
|
||||
"1s",
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
),
|
||||
(
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
"ceil",
|
||||
"1s",
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
|
||||
dt = DatetimeIndex(list(test_input))
|
||||
func = getattr(dt, rounder)
|
||||
result = func(freq)
|
||||
expected = DatetimeIndex(list(expected))
|
||||
assert expected.equals(result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, index_freq, periods",
|
||||
[("2018-01-01", "12H", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"round_freq",
|
||||
[
|
||||
"2ns",
|
||||
"3ns",
|
||||
"4ns",
|
||||
"5ns",
|
||||
"6ns",
|
||||
"7ns",
|
||||
"250ns",
|
||||
"500ns",
|
||||
"750ns",
|
||||
"1us",
|
||||
"19us",
|
||||
"250us",
|
||||
"500us",
|
||||
"750us",
|
||||
"1s",
|
||||
"2s",
|
||||
"3s",
|
||||
"12H",
|
||||
"1D",
|
||||
],
|
||||
)
|
||||
def test_round_int64(self, start, index_freq, periods, round_freq):
|
||||
dt = date_range(start=start, freq=index_freq, periods=periods)
|
||||
unit = to_offset(round_freq).nanos
|
||||
|
||||
# test floor
|
||||
result = dt.floor(round_freq)
|
||||
diff = dt.asi8 - result.asi8
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
|
||||
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
|
||||
|
||||
# test ceil
|
||||
result = dt.ceil(round_freq)
|
||||
diff = result.asi8 - dt.asi8
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
|
||||
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
|
||||
|
||||
# test round
|
||||
result = dt.round(round_freq)
|
||||
diff = abs(result.asi8 - dt.asi8)
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
|
||||
assert (diff <= unit // 2).all(), "round error"
|
||||
if unit % 2 == 0:
|
||||
assert (
|
||||
result.asi8[diff == unit // 2] % 2 == 0
|
||||
).all(), "round half to even error"
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# DatetimeIndex.normalize
|
||||
|
||||
def test_normalize(self):
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D")
|
||||
|
||||
result = rng.normalize()
|
||||
expected = date_range("1/1/2000", periods=10, freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype(
|
||||
"datetime64[ns]"
|
||||
)
|
||||
rng_ns = DatetimeIndex(arr_ns)
|
||||
rng_ns_normalized = rng_ns.normalize()
|
||||
|
||||
arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype(
|
||||
"datetime64[ns]"
|
||||
)
|
||||
expected = DatetimeIndex(arr_ns)
|
||||
tm.assert_index_equal(rng_ns_normalized, expected)
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
|
||||
def test_normalize_nat(self):
|
||||
dti = DatetimeIndex([pd.NaT, Timestamp("2018-01-01 01:00:00")])
|
||||
result = dti.normalize()
|
||||
expected = DatetimeIndex([pd.NaT, Timestamp("2018-01-01")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestDateTimeIndexToJulianDate:
|
||||
def test_1700(self):
|
||||
dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D")
|
||||
r1 = pd.Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, pd.Float64Index)
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_2000(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D")
|
||||
r1 = pd.Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, pd.Float64Index)
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_hour(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="H")
|
||||
r1 = pd.Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, pd.Float64Index)
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_minute(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="T")
|
||||
r1 = pd.Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, pd.Float64Index)
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_second(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="S")
|
||||
r1 = pd.Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, pd.Float64Index)
|
||||
tm.assert_index_equal(r1, r2)
|
||||
@@ -0,0 +1,581 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Int64Index,
|
||||
Series,
|
||||
bdate_range,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestDatetimeIndexSetOps:
|
||||
tz = [
|
||||
None,
|
||||
"UTC",
|
||||
"Asia/Tokyo",
|
||||
"US/Eastern",
|
||||
"dateutil/Asia/Singapore",
|
||||
"dateutil/US/Pacific",
|
||||
]
|
||||
|
||||
# TODO: moved from test_datetimelike; dedup with version below
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union2(self, sort):
|
||||
everything = tm.makeDateIndex(10)
|
||||
first = everything[:5]
|
||||
second = everything[5:]
|
||||
union = first.union(second, sort=sort)
|
||||
tm.assert_index_equal(union, everything)
|
||||
|
||||
@pytest.mark.parametrize("box", [np.array, Series, list])
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union3(self, sort, box):
|
||||
everything = tm.makeDateIndex(10)
|
||||
first = everything[:5]
|
||||
second = everything[5:]
|
||||
|
||||
# GH 10149
|
||||
expected = (
|
||||
first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O")
|
||||
)
|
||||
case = box(second.values)
|
||||
result = first.union(case, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", tz)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union(self, tz, sort):
|
||||
rng1 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
||||
expected1 = pd.date_range("1/1/2000", freq="D", periods=10, tz=tz)
|
||||
expected1_notsorted = pd.DatetimeIndex(list(other1) + list(rng1))
|
||||
|
||||
rng2 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
||||
expected2 = pd.date_range("1/1/2000", freq="D", periods=8, tz=tz)
|
||||
expected2_notsorted = pd.DatetimeIndex(list(other2) + list(rng2[:3]))
|
||||
|
||||
rng3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other3 = pd.DatetimeIndex([], tz=tz)
|
||||
expected3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
expected3_notsorted = rng3
|
||||
|
||||
for rng, other, exp, exp_notsorted in [
|
||||
(rng1, other1, expected1, expected1_notsorted),
|
||||
(rng2, other2, expected2, expected2_notsorted),
|
||||
(rng3, other3, expected3, expected3_notsorted),
|
||||
]:
|
||||
|
||||
result_union = rng.union(other, sort=sort)
|
||||
tm.assert_index_equal(result_union, exp)
|
||||
|
||||
result_union = other.union(rng, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result_union, exp)
|
||||
else:
|
||||
tm.assert_index_equal(result_union, exp_notsorted)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_coverage(self, sort):
|
||||
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
|
||||
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
|
||||
result = ordered.union(idx, sort=sort)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
|
||||
result = ordered[:0].union(ordered, sort=sort)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
assert result.freq == ordered.freq
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_bug_1730(self, sort):
|
||||
rng_a = date_range("1/1/2012", periods=4, freq="3H")
|
||||
rng_b = date_range("1/1/2012", periods=4, freq="4H")
|
||||
|
||||
result = rng_a.union(rng_b, sort=sort)
|
||||
exp = list(rng_a) + list(rng_b[1:])
|
||||
if sort is None:
|
||||
exp = DatetimeIndex(sorted(exp))
|
||||
else:
|
||||
exp = DatetimeIndex(exp)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_bug_1745(self, sort):
|
||||
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
|
||||
right = DatetimeIndex(
|
||||
[
|
||||
"2012-05-29 13:04:21.322000",
|
||||
"2012-05-11 15:27:24.873000",
|
||||
"2012-05-11 15:31:05.350000",
|
||||
]
|
||||
)
|
||||
|
||||
result = left.union(right, sort=sort)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"2012-05-11 15:19:49.695000",
|
||||
"2012-05-29 13:04:21.322000",
|
||||
"2012-05-11 15:27:24.873000",
|
||||
"2012-05-11 15:31:05.350000",
|
||||
]
|
||||
)
|
||||
if sort is None:
|
||||
exp = exp.sort_values()
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_bug_4564(self, sort):
|
||||
from pandas import DateOffset
|
||||
|
||||
left = date_range("2013-01-01", "2013-02-01")
|
||||
right = left + DateOffset(minutes=15)
|
||||
|
||||
result = left.union(right, sort=sort)
|
||||
exp = list(left) + list(right)
|
||||
if sort is None:
|
||||
exp = DatetimeIndex(sorted(exp))
|
||||
else:
|
||||
exp = DatetimeIndex(exp)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_freq_both_none(self, sort):
|
||||
# GH11086
|
||||
expected = bdate_range("20150101", periods=10)
|
||||
expected.freq = None
|
||||
|
||||
result = expected.union(expected, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
def test_union_dataframe_index(self):
|
||||
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
|
||||
s1 = Series(np.random.randn(len(rng1)), rng1)
|
||||
|
||||
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
|
||||
s2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df = DataFrame({"s1": s1, "s2": s2})
|
||||
|
||||
exp = pd.date_range("1/1/1980", "1/1/2012", freq="MS")
|
||||
tm.assert_index_equal(df.index, exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_with_DatetimeIndex(self, sort):
|
||||
i1 = Int64Index(np.arange(0, 20, 2))
|
||||
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
|
||||
# Works
|
||||
i1.union(i2, sort=sort)
|
||||
# Fails with "AttributeError: can't set attribute"
|
||||
i2.union(i1, sort=sort)
|
||||
|
||||
# TODO: moved from test_datetimelike; de-duplicate with version below
|
||||
def test_intersection2(self):
|
||||
first = tm.makeDateIndex(10)
|
||||
second = first[5:]
|
||||
intersect = first.intersection(second)
|
||||
assert tm.equalContents(intersect, second)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.intersection(case)
|
||||
assert tm.equalContents(result, second)
|
||||
|
||||
third = Index(["a", "b", "c"])
|
||||
result = first.intersection(third)
|
||||
expected = pd.Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
|
||||
)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection(self, tz, sort):
|
||||
# GH 4690 (with tz)
|
||||
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
|
||||
|
||||
# if target has the same name, it is preserved
|
||||
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
|
||||
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
|
||||
|
||||
# if target name is different, it will be reset
|
||||
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
|
||||
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
|
||||
|
||||
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
|
||||
expected4 = DatetimeIndex([], name="idx")
|
||||
|
||||
for (rng, expected) in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
# non-monotonic
|
||||
base = DatetimeIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
|
||||
)
|
||||
|
||||
rng2 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
|
||||
)
|
||||
expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx")
|
||||
|
||||
rng3 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
|
||||
tz=tz,
|
||||
name="other",
|
||||
)
|
||||
expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None)
|
||||
|
||||
# GH 7880
|
||||
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
|
||||
expected4 = DatetimeIndex([], tz=tz, name="idx")
|
||||
|
||||
for (rng, expected) in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq is None
|
||||
assert result.tz == expected.tz
|
||||
|
||||
def test_intersection_empty(self):
|
||||
# empty same freq GH2129
|
||||
rng = date_range("6/1/2000", "6/15/2000", freq="T")
|
||||
result = rng[0:0].intersection(rng)
|
||||
assert len(result) == 0
|
||||
|
||||
result = rng.intersection(rng[0:0])
|
||||
assert len(result) == 0
|
||||
|
||||
def test_intersection_bug_1708(self):
|
||||
from pandas import DateOffset
|
||||
|
||||
index_1 = date_range("1/1/2012", periods=4, freq="12H")
|
||||
index_2 = index_1 + DateOffset(hours=1)
|
||||
|
||||
result = index_1 & index_2
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize("tz", tz)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference(self, tz, sort):
|
||||
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
|
||||
|
||||
rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
|
||||
other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
||||
expected1 = pd.DatetimeIndex(rng_dates, tz=tz)
|
||||
|
||||
rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
|
||||
other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
||||
expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)
|
||||
|
||||
rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
|
||||
other3 = pd.DatetimeIndex([], tz=tz)
|
||||
expected3 = pd.DatetimeIndex(rng_dates, tz=tz)
|
||||
|
||||
for rng, other, expected in [
|
||||
(rng1, other1, expected1),
|
||||
(rng2, other2, expected2),
|
||||
(rng3, other3, expected3),
|
||||
]:
|
||||
result_diff = rng.difference(other, sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result_diff, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_freq(self, sort):
|
||||
# GH14323: difference of DatetimeIndex should not preserve frequency
|
||||
|
||||
index = date_range("20160920", "20160925", freq="D")
|
||||
other = date_range("20160921", "20160924", freq="D")
|
||||
expected = DatetimeIndex(["20160920", "20160925"], freq=None)
|
||||
idx_diff = index.difference(other, sort)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
other = date_range("20160922", "20160925", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
expected = DatetimeIndex(["20160920", "20160921"], freq=None)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_datetimeindex_diff(self, sort):
|
||||
dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100)
|
||||
dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98)
|
||||
assert len(dti1.difference(dti2, sort)) == 2
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_datetimeindex_union_join_empty(self, sort):
|
||||
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
|
||||
empty = Index([])
|
||||
|
||||
result = dti.union(empty, sort=sort)
|
||||
expected = dti.astype("O")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = dti.join(empty)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
def test_join_nonunique(self):
|
||||
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
|
||||
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
|
||||
rs = idx1.join(idx2, how="outer")
|
||||
assert rs.is_monotonic
|
||||
|
||||
|
||||
class TestBusinessDatetimeIndex:
|
||||
def setup_method(self, method):
|
||||
self.rng = bdate_range(START, END)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union(self, sort):
|
||||
# overlapping
|
||||
left = self.rng[:10]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = self.rng[:5]
|
||||
right = self.rng[10:]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, Index)
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = self.rng[:5]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# order does not matter
|
||||
if sort is None:
|
||||
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
||||
else:
|
||||
expected = pd.DatetimeIndex(list(right) + list(left))
|
||||
tm.assert_index_equal(right.union(left, sort=sort), expected)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_union = self.rng.union(rng, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
def test_outer_join(self):
|
||||
# should just behave as union
|
||||
|
||||
# overlapping
|
||||
left = self.rng[:10]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = self.rng[:5]
|
||||
right = self.rng[10:]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = self.rng[:5]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_join = self.rng.join(rng, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_not_cacheable(self, sort):
|
||||
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[:25]
|
||||
the_union = rng1.union(rng2, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_union, rng)
|
||||
else:
|
||||
expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10]))
|
||||
tm.assert_index_equal(the_union, expected)
|
||||
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[15:35]
|
||||
the_union = rng1.union(rng2, sort=sort)
|
||||
expected = rng[10:]
|
||||
tm.assert_index_equal(the_union, expected)
|
||||
|
||||
def test_intersection(self):
|
||||
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[:25]
|
||||
the_int = rng1.intersection(rng2)
|
||||
expected = rng[10:25]
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
assert isinstance(the_int, DatetimeIndex)
|
||||
assert the_int.freq == rng.freq
|
||||
|
||||
the_int = rng1.intersection(rng2.view(DatetimeIndex))
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
|
||||
# non-overlapping
|
||||
the_int = rng[:10].intersection(rng[10:])
|
||||
expected = DatetimeIndex([])
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
|
||||
def test_intersection_bug(self):
|
||||
# GH #771
|
||||
a = bdate_range("11/30/2011", "12/31/2011")
|
||||
b = bdate_range("12/10/2011", "12/20/2011")
|
||||
result = a.intersection(b)
|
||||
tm.assert_index_equal(result, b)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_month_range_union_tz_pytz(self, sort):
|
||||
from pytz import timezone
|
||||
|
||||
tz = timezone("US/Eastern")
|
||||
|
||||
early_start = datetime(2011, 1, 1)
|
||||
early_end = datetime(2011, 3, 1)
|
||||
|
||||
late_start = datetime(2011, 3, 1)
|
||||
late_end = datetime(2011, 5, 1)
|
||||
|
||||
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
||||
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
||||
|
||||
early_dr.union(late_dr, sort=sort)
|
||||
|
||||
@td.skip_if_windows_python_3
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_month_range_union_tz_dateutil(self, sort):
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz
|
||||
|
||||
tz = dateutil_gettz("US/Eastern")
|
||||
|
||||
early_start = datetime(2011, 1, 1)
|
||||
early_end = datetime(2011, 3, 1)
|
||||
|
||||
late_start = datetime(2011, 3, 1)
|
||||
late_end = datetime(2011, 5, 1)
|
||||
|
||||
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
||||
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
||||
|
||||
early_dr.union(late_dr, sort=sort)
|
||||
|
||||
|
||||
class TestCustomDatetimeIndex:
|
||||
def setup_method(self, method):
|
||||
self.rng = bdate_range(START, END, freq="C")
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union(self, sort):
|
||||
# overlapping
|
||||
left = self.rng[:10]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = self.rng[:5]
|
||||
right = self.rng[10:]
|
||||
|
||||
the_union = left.union(right, sort)
|
||||
assert isinstance(the_union, Index)
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = self.rng[:5]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# order does not matter
|
||||
if sort is None:
|
||||
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_union = self.rng.union(rng, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
def test_outer_join(self):
|
||||
# should just behave as union
|
||||
|
||||
# overlapping
|
||||
left = self.rng[:10]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = self.rng[:5]
|
||||
right = self.rng[10:]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = self.rng[:5]
|
||||
right = self.rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_join = self.rng.join(rng, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
def test_intersection_bug(self):
|
||||
# GH #771
|
||||
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
|
||||
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
|
||||
result = a.intersection(b)
|
||||
tm.assert_index_equal(result, b)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,223 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class Base:
|
||||
"""Tests common to IntervalIndex with any subtype"""
|
||||
|
||||
def test_astype_idempotent(self, index):
|
||||
result = index.astype("interval")
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.astype(index.dtype)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_astype_object(self, index):
|
||||
result = index.astype(object)
|
||||
expected = Index(index.values, dtype="object")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert not result.equals(index)
|
||||
|
||||
def test_astype_category(self, index):
|
||||
result = index.astype("category")
|
||||
expected = CategoricalIndex(index.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.astype(CategoricalDtype())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-default params
|
||||
categories = index.dropna().unique().values[:-1]
|
||||
dtype = CategoricalDtype(categories=categories, ordered=True)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"int64",
|
||||
"uint64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"period[M]",
|
||||
"timedelta64",
|
||||
"timedelta64[ns]",
|
||||
"datetime64",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
],
|
||||
)
|
||||
def test_astype_cannot_cast(self, index, dtype):
|
||||
msg = "Cannot cast IntervalIndex to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_astype_invalid_dtype(self, index):
|
||||
msg = "data type 'fake_dtype' not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype("fake_dtype")
|
||||
|
||||
|
||||
class TestIntSubtype(Base):
|
||||
"""Tests specific to IntervalIndex with integer-like subtype"""
|
||||
|
||||
indexes = [
|
||||
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
|
||||
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
|
||||
)
|
||||
def test_subtype_conversion(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
|
||||
)
|
||||
def test_subtype_integer(self, subtype_start, subtype_end):
|
||||
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
|
||||
dtype = IntervalDtype(subtype_end)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype_end),
|
||||
index.right.astype(subtype_end),
|
||||
closed=index.closed,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#15832")
|
||||
def test_subtype_integer_errors(self):
|
||||
# int64 -> uint64 fails with negative values
|
||||
index = interval_range(-10, 10)
|
||||
dtype = IntervalDtype("uint64")
|
||||
with pytest.raises(ValueError):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestFloatSubtype(Base):
|
||||
"""Tests specific to IntervalIndex with float subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(-10.0, 10.0, closed="neither"),
|
||||
IntervalIndex.from_arrays(
|
||||
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
|
||||
),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, subtype):
|
||||
index = interval_range(0.0, 10.0)
|
||||
dtype = IntervalDtype(subtype)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raises with NA
|
||||
msg = "Cannot convert NA to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.insert(0, np.nan).astype(dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#15832")
|
||||
def test_subtype_integer_errors(self):
|
||||
# float64 -> uint64 fails with negative values
|
||||
index = interval_range(-10.0, 10.0)
|
||||
dtype = IntervalDtype("uint64")
|
||||
with pytest.raises(ValueError):
|
||||
index.astype(dtype)
|
||||
|
||||
# float64 -> integer-like fails with non-integer valued floats
|
||||
index = interval_range(0.0, 10.0, freq=0.25)
|
||||
dtype = IntervalDtype("int64")
|
||||
with pytest.raises(ValueError):
|
||||
index.astype(dtype)
|
||||
|
||||
dtype = IntervalDtype("uint64")
|
||||
with pytest.raises(ValueError):
|
||||
index.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_subtype_datetimelike(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype)
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestDatetimelikeSubtype(Base):
|
||||
"""Tests specific to IntervalIndex with datetime-like subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
|
||||
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
|
||||
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
|
||||
interval_range(Timedelta("0 days"), periods=10, closed="both"),
|
||||
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_float(self, index):
|
||||
dtype = IntervalDtype("float64")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_subtype_datetimelike(self):
|
||||
# datetime -> timedelta raises
|
||||
dtype = IntervalDtype("timedelta64[ns]")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
# timedelta -> datetime raises
|
||||
dtype = IntervalDtype("datetime64[ns]")
|
||||
index = interval_range(Timedelta("0 days"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
@@ -0,0 +1,452 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Float64Index,
|
||||
Index,
|
||||
Int64Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
notna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.core.common as com
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class Base:
|
||||
"""
|
||||
Common tests for all variations of IntervalIndex construction. Input data
|
||||
to be supplied in breaks format, then converted by the subclass method
|
||||
get_kwargs_from_breaks to the expected format.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[3, 14, 15, 92, 653],
|
||||
np.arange(10, dtype="int64"),
|
||||
Int64Index(range(-10, 11)),
|
||||
Float64Index(np.arange(20, 30, 0.5)),
|
||||
date_range("20180101", periods=10),
|
||||
date_range("20180101", periods=10, tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=10),
|
||||
],
|
||||
)
|
||||
def test_constructor(self, constructor, breaks, closed, name):
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
|
||||
result = constructor(closed=closed, name=name, **result_kwargs)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.name == name
|
||||
assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
|
||||
tm.assert_index_equal(result.left, Index(breaks[:-1]))
|
||||
tm.assert_index_equal(result.right, Index(breaks[1:]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks, subtype",
|
||||
[
|
||||
(Int64Index([0, 1, 2, 3, 4]), "float64"),
|
||||
(Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
|
||||
(Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
|
||||
(Float64Index([0, 1, 2, 3, 4]), "int64"),
|
||||
(date_range("2017-01-01", periods=5), "int64"),
|
||||
(timedelta_range("1 day", periods=5), "int64"),
|
||||
],
|
||||
)
|
||||
def test_constructor_dtype(self, constructor, breaks, subtype):
|
||||
# GH 19262: conversion via dtype parameter
|
||||
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
|
||||
expected = constructor(**expected_kwargs)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
iv_dtype = IntervalDtype(subtype)
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
result = constructor(dtype=dtype, **result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
|
||||
def test_constructor_nan(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_subtype = np.float64
|
||||
expected_values = np.array(breaks[:-1], dtype=object)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[],
|
||||
np.array([], dtype="int64"),
|
||||
np.array([], dtype="float64"),
|
||||
np.array([], dtype="datetime64[ns]"),
|
||||
np.array([], dtype="timedelta64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_constructor_empty(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_values = np.array([], dtype=object)
|
||||
expected_subtype = getattr(breaks, "dtype", np.int64)
|
||||
|
||||
assert result.empty
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
tuple("0123456789"),
|
||||
list("abcdefghij"),
|
||||
np.array(list("abcdefghij"), dtype=object),
|
||||
np.array(list("abcdefghij"), dtype="<U1"),
|
||||
],
|
||||
)
|
||||
def test_constructor_string(self, constructor, breaks):
|
||||
# GH 19016
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(**self.get_kwargs_from_breaks(breaks))
|
||||
|
||||
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
|
||||
def test_constructor_categorical_valid(self, constructor, cat_constructor):
|
||||
# GH 21243/21253
|
||||
if isinstance(constructor, partial) and constructor.func is Index:
|
||||
# Index is defined to create CategoricalIndex from categorical data
|
||||
pytest.skip()
|
||||
|
||||
breaks = np.arange(10, dtype="int64")
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
cat_breaks = cat_constructor(breaks)
|
||||
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
|
||||
result = constructor(**result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
# filler input data to be used when supplying invalid kwargs
|
||||
filler = self.get_kwargs_from_breaks(range(10))
|
||||
|
||||
# invalid closed
|
||||
msg = "invalid option for 'closed': invalid"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(closed="invalid", **filler)
|
||||
|
||||
# unsupported dtype
|
||||
msg = "dtype must be an IntervalDtype, got int64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="int64", **filler)
|
||||
|
||||
# invalid dtype
|
||||
msg = "data type 'invalid' not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="invalid", **filler)
|
||||
|
||||
# no point in nesting periods in an IntervalIndex
|
||||
periods = period_range("2000-01-01", periods=10)
|
||||
periods_kwargs = self.get_kwargs_from_breaks(periods)
|
||||
msg = "Period dtypes are not supported, use a PeriodIndex instead"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**periods_kwargs)
|
||||
|
||||
# decreasing values
|
||||
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
|
||||
msg = "left side of interval must be <= right side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**decreasing_kwargs)
|
||||
|
||||
|
||||
class TestFromArrays(Base):
|
||||
"""Tests specific to IntervalIndex.from_arrays"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_arrays
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_arrays
|
||||
"""
|
||||
return {"left": breaks[:-1], "right": breaks[1:]}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_arrays(data[:-1], data[1:])
|
||||
|
||||
# unequal length
|
||||
left = [0, 1, 2]
|
||||
right = [2, 3]
|
||||
msg = "left and right must have the same length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(left, right)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
|
||||
)
|
||||
def test_mixed_float_int(self, left_subtype, right_subtype):
|
||||
"""mixed int/float left/right results in float for both sides"""
|
||||
left = np.arange(9, dtype=left_subtype)
|
||||
right = np.arange(1, 10, dtype=right_subtype)
|
||||
result = IntervalIndex.from_arrays(left, right)
|
||||
|
||||
expected_left = Float64Index(left)
|
||||
expected_right = Float64Index(right)
|
||||
expected_subtype = np.float64
|
||||
|
||||
tm.assert_index_equal(result.left, expected_left)
|
||||
tm.assert_index_equal(result.right, expected_right)
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
|
||||
|
||||
class TestFromBreaks(Base):
|
||||
"""Tests specific to IntervalIndex.from_breaks"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_breaks
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_breaks
|
||||
"""
|
||||
return {"breaks": breaks}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_breaks(data)
|
||||
|
||||
def test_length_one(self):
|
||||
"""breaks of length one produce an empty IntervalIndex"""
|
||||
breaks = [0]
|
||||
result = IntervalIndex.from_breaks(breaks)
|
||||
expected = IntervalIndex.from_breaks([])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestFromTuples(Base):
|
||||
"""Tests specific to IntervalIndex.from_tuples"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_tuples
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_tuples
|
||||
"""
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
tuples = list(zip(breaks[:-1], breaks[1:]))
|
||||
if isinstance(breaks, (list, tuple)):
|
||||
return {"data": tuples}
|
||||
elif is_categorical_dtype(breaks):
|
||||
return {"data": breaks._constructor(tuples)}
|
||||
return {"data": com.asarray_tuplesafe(tuples)}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# non-tuple
|
||||
tuples = [(0, 1), 2, (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples received an invalid item, 2"
|
||||
with pytest.raises(TypeError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
# too few/many items
|
||||
tuples = [(0, 1), (2,), (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
tuples = [(0, 1), (2, 3, 4), (5, 6)]
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
def test_na_tuples(self):
|
||||
# tuple (NA, NA) evaluates the same as NA as an element
|
||||
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
|
||||
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
|
||||
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
tm.assert_index_equal(idx_na_tuple, idx_na_element)
|
||||
|
||||
|
||||
class TestClassConstructors(Base):
|
||||
"""Tests specific to the IntervalIndex/Index constructors"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[IntervalIndex, partial(Index, dtype="interval")],
|
||||
ids=["IntervalIndex", "Index"],
|
||||
)
|
||||
def constructor(self, request):
|
||||
return request.param
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by the IntervalIndex/Index constructors
|
||||
"""
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
ivs = [
|
||||
Interval(l, r, closed) if notna(l) else l
|
||||
for l, r in zip(breaks[:-1], breaks[1:])
|
||||
]
|
||||
|
||||
if isinstance(breaks, list):
|
||||
return {"data": ivs}
|
||||
elif is_categorical_dtype(breaks):
|
||||
return {"data": breaks._constructor(ivs)}
|
||||
return {"data": np.array(ivs, dtype=object)}
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
"""
|
||||
override the base class implementation since errors are handled
|
||||
differently; checks unnecessary since caught at the Interval level
|
||||
"""
|
||||
pass
|
||||
|
||||
def test_constructor_string(self):
|
||||
# GH23013
|
||||
# When forming the interval from breaks,
|
||||
# the interval of strings is already forbidden.
|
||||
pass
|
||||
|
||||
def test_constructor_errors(self, constructor):
|
||||
# mismatched closed within intervals with no constructor override
|
||||
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
|
||||
msg = "intervals must all be closed on the same side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(ivs)
|
||||
|
||||
# scalar
|
||||
msg = (
|
||||
r"IntervalIndex\(...\) must be called with a collection of "
|
||||
"some kind, 5 was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(5)
|
||||
|
||||
# not an interval
|
||||
msg = "type <class 'numpy.int64'> with value 0 is not an interval"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor([0, 1])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, closed",
|
||||
[
|
||||
([], "both"),
|
||||
([np.nan, np.nan], "neither"),
|
||||
(
|
||||
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
|
||||
"left",
|
||||
),
|
||||
(
|
||||
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
|
||||
"neither",
|
||||
),
|
||||
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
|
||||
],
|
||||
)
|
||||
def test_override_inferred_closed(self, constructor, data, closed):
|
||||
# GH 19370
|
||||
if isinstance(data, IntervalIndex):
|
||||
tuples = data.to_tuples()
|
||||
else:
|
||||
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
|
||||
expected = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = constructor(data, closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
|
||||
)
|
||||
def test_index_object_dtype(self, values_constructor):
|
||||
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
|
||||
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
|
||||
values = values_constructor(intervals)
|
||||
result = Index(values, dtype=object)
|
||||
|
||||
assert type(result) is Index
|
||||
tm.assert_numpy_array_equal(result.values, np.array(values))
|
||||
|
||||
def test_index_mixed_closed(self):
|
||||
# GH27172
|
||||
intervals = [
|
||||
Interval(0, 1, closed="left"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="neither"),
|
||||
Interval(3, 4, closed="both"),
|
||||
]
|
||||
result = Index(intervals)
|
||||
expected = Index(intervals, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestFromIntervals(TestClassConstructors):
|
||||
"""
|
||||
Tests for IntervalIndex.from_intervals, which is deprecated in favor of the
|
||||
IntervalIndex constructor. Same tests as the IntervalIndex constructor,
|
||||
plus deprecation test. Should only need to delete this class when removed.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
def from_intervals_ignore_warnings(*args, **kwargs):
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
return IntervalIndex.from_intervals(*args, **kwargs)
|
||||
|
||||
return from_intervals_ignore_warnings
|
||||
|
||||
def test_deprecated(self):
|
||||
ivs = [Interval(0, 1), Interval(1, 2)]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
IntervalIndex.from_intervals(ivs)
|
||||
|
||||
@pytest.mark.skip(reason="parent class test that is not applicable")
|
||||
def test_index_object_dtype(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="parent class test that is not applicable")
|
||||
def test_index_mixed_closed(self):
|
||||
pass
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,306 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Interval, IntervalIndex
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
|
||||
def test_get_loc_interval(self, closed, side):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
|
||||
# if get_loc is supplied an interval, it should only search
|
||||
# for exact matches, not overlaps or covers, else KeyError.
|
||||
msg = re.escape(
|
||||
"Interval({bound[0]}, {bound[1]}, closed='{side}')".format(
|
||||
bound=bound, side=side
|
||||
)
|
||||
)
|
||||
if closed == side:
|
||||
if bound == [0, 1]:
|
||||
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
|
||||
elif bound == [2, 3]:
|
||||
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
|
||||
def test_get_loc_scalar(self, closed, scalar):
|
||||
|
||||
# correct = {side: {query: answer}}.
|
||||
# If query is not in the dict, that query should raise a KeyError
|
||||
correct = {
|
||||
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
|
||||
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
|
||||
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
|
||||
"neither": {0.5: 0, 2.5: 1},
|
||||
}
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
# if get_loc is supplied a scalar, it should return the index of
|
||||
# the interval which contains the scalar, or KeyError.
|
||||
if scalar in correct[closed].keys():
|
||||
assert idx.get_loc(scalar) == correct[closed][scalar]
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
idx.get_loc(scalar)
|
||||
|
||||
def test_slice_locs_with_interval(self):
|
||||
|
||||
# increasing monotonically
|
||||
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
|
||||
|
||||
# decreasing monotonically
|
||||
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
|
||||
|
||||
# sorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
# unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label:'
|
||||
" Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label:'
|
||||
" Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2))
|
||||
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label:'
|
||||
" Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(end=Interval(0, 2))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label:'
|
||||
" Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
|
||||
|
||||
# another unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
def test_slice_locs_with_ints_and_floats_succeeds(self):
|
||||
|
||||
# increasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
|
||||
assert index.slice_locs(0, 1) == (0, 1)
|
||||
assert index.slice_locs(0, 2) == (0, 2)
|
||||
assert index.slice_locs(0, 3) == (0, 2)
|
||||
assert index.slice_locs(3, 1) == (2, 1)
|
||||
assert index.slice_locs(3, 4) == (2, 3)
|
||||
assert index.slice_locs(0, 4) == (0, 3)
|
||||
|
||||
# decreasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
|
||||
assert index.slice_locs(0, 1) == (3, 3)
|
||||
assert index.slice_locs(0, 2) == (3, 2)
|
||||
assert index.slice_locs(0, 3) == (3, 1)
|
||||
assert index.slice_locs(3, 1) == (1, 3)
|
||||
assert index.slice_locs(3, 4) == (1, 1)
|
||||
assert index.slice_locs(0, 4) == (3, 1)
|
||||
|
||||
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
[(0, 2), (1, 3), (2, 4)],
|
||||
[(2, 4), (1, 3), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4)],
|
||||
[(0, 2), (2, 4), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4), (1, 3)],
|
||||
],
|
||||
)
|
||||
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
|
||||
start, stop = query
|
||||
index = IntervalIndex.from_tuples(tuples)
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=(
|
||||
"'can only get slices from an IntervalIndex if bounds are"
|
||||
" non-overlapping and all monotonic increasing or decreasing'"
|
||||
),
|
||||
):
|
||||
index.slice_locs(start, stop)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([Interval(2, 4, closed="right")], [1]),
|
||||
([Interval(2, 4, closed="left")], [-1]),
|
||||
([Interval(2, 4, closed="both")], [-1]),
|
||||
([Interval(2, 4, closed="neither")], [-1]),
|
||||
([Interval(1, 4, closed="right")], [-1]),
|
||||
([Interval(0, 4, closed="right")], [-1]),
|
||||
([Interval(0.5, 1.5, closed="right")], [-1]),
|
||||
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
|
||||
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_interval(self, query, expected):
|
||||
|
||||
tuples = [(0, 2), (2, 4), (5, 7)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], [-1]),
|
||||
([0], [-1]),
|
||||
([0.5], [0]),
|
||||
([1], [0]),
|
||||
([1.5], [1]),
|
||||
([2], [1]),
|
||||
([2.5], [-1]),
|
||||
([3], [-1]),
|
||||
([3.5], [2]),
|
||||
([4], [2]),
|
||||
([4.5], [-1]),
|
||||
([1, 2], [0, 1]),
|
||||
([1, 2, 3], [0, 1, -1]),
|
||||
([1, 2, 3, 4], [0, 1, -1, 2]),
|
||||
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_int_and_float(self, query, expected):
|
||||
|
||||
tuples = [(0, 1), (1, 2), (3, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed",
|
||||
[
|
||||
([(0, 2), (1, 3), (3, 4)], "neither"),
|
||||
([(0, 5), (1, 4), (6, 7)], "left"),
|
||||
([(0, 1), (0, 1), (1, 2)], "right"),
|
||||
([(0, 1), (2, 3), (3, 4)], "both"),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_errors(self, tuples, closed):
|
||||
# IntervalIndex needs non-overlapping for uniqueness when querying
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
|
||||
msg = (
|
||||
"cannot handle overlapping indices; use "
|
||||
"IntervalIndex.get_indexer_non_unique"
|
||||
)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
index.get_indexer([0, 2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], ([-1], [0])),
|
||||
([0], ([0], [])),
|
||||
([0.5], ([0], [])),
|
||||
([1], ([0, 1], [])),
|
||||
([1.5], ([0, 1], [])),
|
||||
([2], ([0, 1, 2], [])),
|
||||
([2.5], ([1, 2], [])),
|
||||
([3], ([2], [])),
|
||||
([3.5], ([2], [])),
|
||||
([4], ([-1], [0])),
|
||||
([4.5], ([-1], [0])),
|
||||
([1, 2], ([0, 1, 0, 1, 2], [])),
|
||||
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
|
||||
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
|
||||
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
|
||||
|
||||
tuples = [(0, 2.5), (1, 3), (2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="left")
|
||||
|
||||
result_indexer, result_missing = index.get_indexer_non_unique(query)
|
||||
expected_indexer = np.array(expected[0], dtype="intp")
|
||||
expected_missing = np.array(expected[1], dtype="intp")
|
||||
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
# TODO we may also want to test get_indexer for the case when
|
||||
# the intervals are duplicated, decreasing, non-monotonic, etc..
|
||||
|
||||
def test_contains_dunder(self):
|
||||
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
|
||||
|
||||
# __contains__ requires perfect matches to intervals.
|
||||
assert 0 not in index
|
||||
assert 1 not in index
|
||||
assert 2 not in index
|
||||
|
||||
assert Interval(0, 1, closed="right") in index
|
||||
assert Interval(0, 2, closed="right") not in index
|
||||
assert Interval(0, 0.5, closed="right") not in index
|
||||
assert Interval(3, 5, closed="right") not in index
|
||||
assert Interval(-1, 0, closed="left") not in index
|
||||
assert Interval(0, 1, closed="left") not in index
|
||||
assert Interval(0, 1, closed="both") not in index
|
||||
@@ -0,0 +1,355 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
from pandas import (
|
||||
DateOffset,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day
|
||||
|
||||
|
||||
@pytest.fixture(scope="class", params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalRange:
|
||||
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
|
||||
def test_constructor_numeric(self, closed, name, freq, periods):
|
||||
start, end = 0, 100
|
||||
breaks = np.arange(101, step=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)]
|
||||
)
|
||||
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
|
||||
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
|
||||
breaks = date_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
if not breaks.freq.isAnchored() and tz is None:
|
||||
# matches expected only for non-anchored offsets and tz naive
|
||||
# (anchored/DST transitions cause unequal spacing in expected)
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)]
|
||||
)
|
||||
def test_constructor_timedelta(self, closed, name, freq, periods):
|
||||
start, end = Timedelta("0 days"), Timedelta("100 days")
|
||||
breaks = timedelta_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, expected_endpoint",
|
||||
[
|
||||
(0, 10, 3, 9),
|
||||
(0, 10, 1.5, 9),
|
||||
(0.5, 10, 3, 9.5),
|
||||
(Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")),
|
||||
(
|
||||
Timestamp("2018-01-01"),
|
||||
Timestamp("2018-02-09"),
|
||||
"MS",
|
||||
Timestamp("2018-02-01"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-20", tz="US/Eastern"),
|
||||
"5D12H",
|
||||
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_early_truncation(self, start, end, freq, expected_endpoint):
|
||||
# index truncates early if freq causes end to be skipped
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
result_endpoint = result.right[-1]
|
||||
assert result_endpoint == expected_endpoint
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq",
|
||||
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
|
||||
)
|
||||
def test_no_invalid_float_truncation(self, start, end, freq):
|
||||
# GH 21161
|
||||
if freq is None:
|
||||
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
else:
|
||||
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
result = interval_range(start=start, end=end, periods=4, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, mid, end",
|
||||
[
|
||||
(
|
||||
Timestamp("2018-03-10", tz="US/Eastern"),
|
||||
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-03-12", tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-11-03", tz="US/Eastern"),
|
||||
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-11-05", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_linspace_dst_transition(self, start, mid, end):
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
# accounts for the hour gained/lost during DST transition
|
||||
result = interval_range(start=start, end=end, periods=2)
|
||||
expected = IntervalIndex.from_breaks([start, mid, end])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", [2, 2.0])
|
||||
@pytest.mark.parametrize("end", [10, 10.0])
|
||||
@pytest.mark.parametrize("start", [0, 0.0])
|
||||
def test_float_subtype(self, start, end, freq):
|
||||
# Has float subtype if any of start/end/freq are float, even if all
|
||||
# resulting endpoints can safely be upcast to integers
|
||||
|
||||
# defined from start/end/freq
|
||||
index = interval_range(start=start, end=end, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from start/periods/freq
|
||||
index = interval_range(start=start, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from end/periods/freq
|
||||
index = interval_range(end=end, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
index = interval_range(start=start, end=end, periods=5)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end) else "float64"
|
||||
assert result == expected
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
# float value for periods
|
||||
expected = interval_range(start=0, periods=10)
|
||||
result = interval_range(start=0, periods=10.5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timestamp-like start/end
|
||||
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timestamp
|
||||
equiv_freq = [
|
||||
"D",
|
||||
Day(),
|
||||
Timedelta(days=1),
|
||||
timedelta(days=1),
|
||||
DateOffset(days=1),
|
||||
]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timedelta-like start/end
|
||||
start, end = Timedelta(days=1), Timedelta(days=10)
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timedelta
|
||||
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, "
|
||||
"exactly three must be specified"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=5)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(periods=2)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=5, periods=6, freq=1.5)
|
||||
|
||||
# mixed units
|
||||
msg = "start, end, freq need to be type compatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timestamp("20130101"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timedelta("1 day"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
|
||||
|
||||
# invalid periods
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, periods="foo")
|
||||
|
||||
# invalid start
|
||||
msg = "start must be numeric or datetime-like, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start="foo", periods=10)
|
||||
|
||||
# invalid end
|
||||
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Interval(0, 1), periods=10)
|
||||
|
||||
# invalid freq for datetime-like
|
||||
msg = "freq must be numeric or convertible to DateOffset, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
|
||||
|
||||
# mixed tz
|
||||
start = Timestamp("2017-01-01", tz="US/Eastern")
|
||||
end = Timestamp("2017-01-07", tz="US/Pacific")
|
||||
msg = "Start and end cannot both be tz-aware with different timezones"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=start, end=end)
|
||||
@@ -0,0 +1,197 @@
|
||||
from itertools import permutations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.interval import IntervalTree
|
||||
|
||||
from pandas import compat
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def skipif_32bit(param):
|
||||
"""
|
||||
Skip parameters in a parametrize on 32bit systems. Specifically used
|
||||
here to skip leaf_size parameters related to GH 23440.
|
||||
"""
|
||||
marks = pytest.mark.skipif(
|
||||
compat.is_platform_32bit(), reason="GH 23440: int type mismatch on 32bit"
|
||||
)
|
||||
return pytest.param(param, marks=marks)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
scope="class", params=["int32", "int64", "float32", "float64", "uint64"]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
|
||||
def leaf_size(request):
|
||||
"""
|
||||
Fixture to specify IntervalTree leaf_size parameter; to be used with the
|
||||
tree fixture.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.arange(5, dtype="int64"),
|
||||
np.arange(5, dtype="int32"),
|
||||
np.arange(5, dtype="uint64"),
|
||||
np.arange(5, dtype="float64"),
|
||||
np.arange(5, dtype="float32"),
|
||||
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
|
||||
np.array([0, 1, 2, 3, 4, np.nan], dtype="float32"),
|
||||
]
|
||||
)
|
||||
def tree(request, leaf_size):
|
||||
left = request.param
|
||||
return IntervalTree(left, left + 2, leaf_size=leaf_size)
|
||||
|
||||
|
||||
class TestIntervalTree:
|
||||
def test_get_loc(self, tree):
|
||||
result = tree.get_loc(1)
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(tree.get_loc(2))
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="-1"):
|
||||
tree.get_loc(-1)
|
||||
|
||||
def test_get_indexer(self, tree):
|
||||
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
|
||||
expected = np.array([0, 4, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([3.0]))
|
||||
|
||||
def test_get_indexer_non_unique(self, tree):
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
|
||||
|
||||
result = indexer[:1]
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[1:3])
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[3:])
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_duplicates(self, dtype):
|
||||
left = np.array([0, 0, 0], dtype=dtype)
|
||||
tree = IntervalTree(left, left + 1)
|
||||
|
||||
result = np.sort(tree.get_loc(0.5))
|
||||
expected = np.array([0, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([0.5]))
|
||||
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
|
||||
result = np.sort(indexer)
|
||||
expected = np.array([0, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_loc_closed(self, closed):
|
||||
tree = IntervalTree([0], [1], closed=closed)
|
||||
for p, errors in [(0, tree.open_left), (1, tree.open_right)]:
|
||||
if errors:
|
||||
with pytest.raises(KeyError, match=str(p)):
|
||||
tree.get_loc(p)
|
||||
else:
|
||||
result = tree.get_loc(p)
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
|
||||
)
|
||||
def test_get_indexer_closed(self, closed, leaf_size):
|
||||
x = np.arange(1000, dtype="float64")
|
||||
found = x.astype("intp")
|
||||
not_found = (-1 * np.ones(1000)).astype("intp")
|
||||
|
||||
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
|
||||
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
|
||||
|
||||
expected = found if tree.closed_left else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
|
||||
|
||||
expected = found if tree.closed_right else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
|
||||
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
|
||||
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
|
||||
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
|
||||
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("order", map(list, permutations(range(3))))
|
||||
def test_is_overlapping(self, closed, order, left, right, expected):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize("order", map(list, permutations(range(3))))
|
||||
def test_is_overlapping_endpoints(self, closed, order):
|
||||
"""shared endpoints are marked as overlapping"""
|
||||
# GH 23309
|
||||
left, right = np.arange(3), np.arange(1, 4)
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(np.array([], dtype="int64"), np.array([], dtype="int64")),
|
||||
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
|
||||
(np.array([np.nan]), np.array([np.nan])),
|
||||
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping_trivial(self, closed, left, right):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left, right, closed=closed)
|
||||
assert tree.is_overlapping is False
|
||||
|
||||
@pytest.mark.skipif(compat.is_platform_32bit(), reason="GH 23440")
|
||||
def test_construction_overflow(self):
|
||||
# GH 25485
|
||||
left, right = np.arange(101), [np.iinfo(np.int64).max] * 101
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
# pivot should be average of left/right medians
|
||||
result = tree.root.pivot
|
||||
expected = (50 + np.iinfo(np.int64).max) / 2
|
||||
assert result == expected
|
||||
@@ -0,0 +1,187 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Index, IntervalIndex, Timestamp, interval_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(scope="class", params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, False])
|
||||
def sort(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def monotonic_index(start, end, dtype="int64", closed="right"):
|
||||
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
def empty_index(dtype="int64", closed="right"):
|
||||
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def test_union(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(0, 13, closed=closed)
|
||||
result = index[::-1].union(other, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
result = other[::-1].union(index, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
tm.assert_index_equal(index.union(index, sort=sort), index)
|
||||
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
index = empty_index(dtype="int64", closed=closed)
|
||||
result = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = empty_index(dtype="float64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_intersection(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(5, 11, closed=closed)
|
||||
result = index[::-1].intersection(other, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
result = other[::-1].intersection(index, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
tm.assert_index_equal(index.intersection(index, sort=sort), index)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
other = monotonic_index(300, 314, closed=closed)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = monotonic_index(300, 314, dtype="float64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: nested intervals
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: duplicate element
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225
|
||||
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(0, 2)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: duplicate nan element
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
expected = IntervalIndex([np.nan])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference(self, closed, sort):
|
||||
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
|
||||
result = index.difference(index[:1], sort=sort)
|
||||
expected = index[1:]
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.difference(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
result = index[1:].symmetric_difference(index[:-1], sort=sort)
|
||||
expected = IntervalIndex([index[0], index[-1]])
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.symmetric_difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.symmetric_difference(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_set_incompatible_types(self, closed, op_name, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
set_op = getattr(index, op_name)
|
||||
|
||||
# TODO: standardize return type of non-union setops type(self vs other)
|
||||
# non-IntervalIndex
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
else:
|
||||
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
|
||||
result = set_op(Index([1, 2, 3]), sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mixed closed
|
||||
msg = (
|
||||
"can only do set operations between two IntervalIndex objects "
|
||||
"that are closed on the same side"
|
||||
)
|
||||
for other_closed in {"right", "left", "both", "neither"} - {closed}:
|
||||
other = monotonic_index(0, 11, closed=other_closed)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
set_op(other, sort=sort)
|
||||
|
||||
# GH 19016: incompatible dtypes
|
||||
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
|
||||
msg = (
|
||||
"can only do {op} between two IntervalIndex objects that have "
|
||||
"compatible dtypes"
|
||||
).format(op=op_name)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
set_op(other, sort=sort)
|
||||
@@ -0,0 +1,85 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index_names():
|
||||
# names that match those in the idx fixture for testing equality of
|
||||
# names assigned to the idx
|
||||
return ["first", "second"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def holder():
|
||||
# the MultiIndex constructor used to base compatibility with pickle
|
||||
return MultiIndex
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def compat_props():
|
||||
# a MultiIndex must have these properties associated with it
|
||||
return ["shape", "ndim", "size"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def narrow_multi_index():
|
||||
"""
|
||||
Return a MultiIndex that is narrower than the display (<80 characters).
|
||||
"""
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wide_multi_index():
|
||||
"""
|
||||
Return a MultiIndex that is wider than the display (>80 characters).
|
||||
"""
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
levels = [ci, ci.codes + 9, dti, dti, dti]
|
||||
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
||||
return pd.MultiIndex.from_arrays(levels, names=names)
|
||||
@@ -0,0 +1,356 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import _np_version_under1p17
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, date_range, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
|
||||
# GH8083 test the base class for shift
|
||||
msg = "Not supported for type MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate():
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert "foo" not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
|
||||
msg = "after < before"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.truncate(3, 1)
|
||||
|
||||
|
||||
def test_where():
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
|
||||
msg = r"\.where is not supported for MultiIndex operations"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
i.where(True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
|
||||
def test_where_array_like(klass):
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
cond = [False, True]
|
||||
msg = r"\.where is not supported for MultiIndex operations"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
i.where(klass(cond))
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match="^Too many levels"):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range("2011-01-01", freq="M", periods=3)
|
||||
dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern")
|
||||
pi = period_range("2011-01", freq="M", periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
||||
)
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
["a", "b", "c", "a", "b", "c"],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays(
|
||||
[
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
]
|
||||
)
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, "x", "y", "z"],
|
||||
[1.1, np.nan, 3.3, "x", "y", "z"],
|
||||
["a", "b", "c", "x", "y", "z"],
|
||||
dti.append(pd.Index(["x", "y", "z"])),
|
||||
dti_tz.append(pd.Index(["x", "y", "z"])),
|
||||
pi.append(pd.Index(["x", "y", "z"])),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_take(idx):
|
||||
indexer = [4, 3, 0, 2]
|
||||
result = idx.take(indexer)
|
||||
expected = idx[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
# TODO: Remove Commented Code
|
||||
# if not isinstance(idx,
|
||||
# (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
|
||||
# GH 10791
|
||||
msg = "'MultiIndex' object has no attribute 'freq'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.freq
|
||||
|
||||
|
||||
def test_take_invalid_kwargs(idx):
|
||||
idx = idx
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_take_fill_value():
|
||||
# GH 12631
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
(np.nan, pd.NaT),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first.tolist()
|
||||
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
# we don't infer UInt64
|
||||
if isinstance(index, pd.UInt64Index):
|
||||
expected = index.astype("int64")
|
||||
else:
|
||||
expected = index
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(idx, mapper):
|
||||
|
||||
if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
|
||||
pytest.skip("skipping tests for {}".format(type(idx)))
|
||||
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to UInt64 for a dict
|
||||
if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
|
||||
expected = idx.astype("int64")
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = pd.Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs(idx, func):
|
||||
# test ufuncs of numpy. see:
|
||||
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
|
||||
|
||||
if _np_version_under1p17:
|
||||
expected_exception = AttributeError
|
||||
msg = "'tuple' object has no attribute '{}'".format(func.__name__)
|
||||
else:
|
||||
expected_exception = TypeError
|
||||
msg = (
|
||||
"loop of ufunc does not support argument 0 of type tuple which"
|
||||
" has no callable {} method"
|
||||
).format(func.__name__)
|
||||
with pytest.raises(expected_exception, match=msg):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_type_funcs(idx, func):
|
||||
msg = (
|
||||
"ufunc '{}' not supported for the input types, and the inputs"
|
||||
" could not be safely coerced to any supported types according to"
|
||||
" the casting rule ''safe''"
|
||||
).format(func.__name__)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
@@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas.util.testing import assert_copy
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype("O")
|
||||
assert_copy(actual.levels, expected.levels)
|
||||
assert_copy(actual.codes, expected.codes)
|
||||
assert [level.name for level in actual.levels] == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype("category")
|
||||
@@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = "cannot perform {method}".format(method=method)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_boolean_context_compat(idx):
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bool(idx)
|
||||
|
||||
|
||||
def test_boolean_context_compat2():
|
||||
|
||||
# boolean context compat
|
||||
# GH7897
|
||||
i1 = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
i2 = MultiIndex.from_tuples([("A", 1), ("A", 3)])
|
||||
common = i1.intersection(i2)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bool(common)
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [["a", "b", "c"], [4]]
|
||||
levels2 = [[1, 2, 3], ["a"]]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
assert mi1._tuples is not None
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Non-inplace doesn't kill _tuples [implementation detail]
|
||||
tm.assert_almost_equal(mi1._tuples, vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Inplace should kill _tuples
|
||||
mi1.set_levels(levels2, inplace=True)
|
||||
tm.assert_almost_equal(mi1.values, vals2)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(1, "a")] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
new_values = mi2.set_codes(codes2).values
|
||||
|
||||
# Not inplace shouldn't change
|
||||
tm.assert_almost_equal(mi2._tuples, vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
# ...and again setting inplace should kill _tuples, etc
|
||||
mi2.set_codes(codes2, inplace=True)
|
||||
tm.assert_almost_equal(mi2.values, new_values)
|
||||
|
||||
|
||||
def test_ndarray_compat_properties(idx, compat_props):
|
||||
assert idx.T.equals(idx)
|
||||
assert idx.transpose().equals(idx)
|
||||
|
||||
values = idx.values
|
||||
for prop in compat_props:
|
||||
assert getattr(idx, prop) == getattr(values, prop)
|
||||
|
||||
# test for validity
|
||||
idx.nbytes
|
||||
idx.values.nbytes
|
||||
|
||||
|
||||
def test_compat(indices):
|
||||
assert indices.tolist() == list(indices)
|
||||
|
||||
|
||||
def test_pickle_compat_construction(holder):
|
||||
# this is testing for pickle compat
|
||||
# need an object to create with
|
||||
with pytest.raises(TypeError, match="Must pass both levels and codes"):
|
||||
holder()
|
||||
@@ -0,0 +1,672 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import Timestamp
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["first"]
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], ["one", "two"]]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (["foo"], ["bar"])
|
||||
msg = r"MultiIndex\.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=("foo", "bar"),
|
||||
)
|
||||
renamed = [["foor"], ["barr"]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = (
|
||||
r"On level 0, code max \(3\) >= length of level \(1\)\."
|
||||
" NOTE: this index is in an inconsistent state"
|
||||
)
|
||||
label_error = r"Unequal code lengths: \[4, 2\]"
|
||||
code_value_error = r"On level 0, code value \(-2\) < -1"
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([["a"], ["b"]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# test set_codes with verify_integrity=False
|
||||
# the setting should not raise any value error
|
||||
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
||||
|
||||
# code value smaller than -1
|
||||
with pytest.raises(ValueError, match=code_value_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
||||
|
||||
|
||||
def test_na_levels():
|
||||
# GH26408
|
||||
# test if codes are re-assigned value -1 for levels
|
||||
# with mising values (NaN, NaT, None)
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# verify set_levels and set_codes
|
||||
result = MultiIndex(
|
||||
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
||||
).set_codes([[0, -1, 1, 2, 3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_labels_deprecated(idx):
|
||||
# GH23752
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]],
|
||||
labels=[[0, 1, 2, 3]],
|
||||
names=["first"],
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.labels
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
||||
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
||||
assert result.levels[1].equals(Index(["a", "b"]))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_tuples(idx):
|
||||
arrays = tuple(
|
||||
tuple(np.asarray(lev).take(level_codes))
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
)
|
||||
|
||||
# tuple of tuples as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_datetimetz():
|
||||
idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo")
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_timedelta():
|
||||
idx1 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx2 = pd.timedelta_range("2 hours", freq="H", periods=3)
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_period():
|
||||
idx1 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
idx2 = pd.period_range("2015-01-01", freq="H", periods=3)
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3)
|
||||
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays(
|
||||
[pd.Series(idx1), pd.Series(idx2), pd.Series(idx3), pd.Series(idx4)]
|
||||
)
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
||||
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list("ABC")[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_sequence_of_arrays",
|
||||
[
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
[1, [2]],
|
||||
"a",
|
||||
["a"],
|
||||
["a", "b"],
|
||||
[["a"], "b"],
|
||||
(1,),
|
||||
(1, 2),
|
||||
([1], 2),
|
||||
(1, [2]),
|
||||
"a",
|
||||
("a",),
|
||||
("a", "b"),
|
||||
(["a"], "b"),
|
||||
[(1,), 2],
|
||||
[1, (2,)],
|
||||
[("a",), "b"],
|
||||
((1,), 2),
|
||||
(1, (2,)),
|
||||
(("a",), "b"),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
||||
msg = "Input must be a list / sequence of array-likes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
||||
)
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = "^all arrays must be same length$"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = "Cannot infer number of levels from empty list"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = "Input must be a list / sequence of tuple-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
msg = "Names should be list-like for a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Index(li, name="abc")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Index(li, name="a")
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame(
|
||||
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
||||
).set_index(["a", "b"])
|
||||
idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
||||
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=["A"])
|
||||
expected = pd.Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
||||
)
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ["A", "B"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("N", list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ["A", "B", "C"]
|
||||
lvl2 = list(range(N))
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
||||
)
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range("2000-01-01", periods=2)
|
||||
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike(
|
||||
[
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(1, pd.Timestamp("2000-01-02")),
|
||||
(2, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
]
|
||||
)
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [False, True])
|
||||
@pytest.mark.parametrize("f", [lambda x: x, lambda x: pd.Series(x), lambda x: x.values])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ["foo", "bar"]
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
||||
expected = pd.CategoricalIndex(
|
||||
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
||||
)
|
||||
|
||||
result = pd.MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ["foo", "bar"]
|
||||
result = pd.Index(index)
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = pd.Index(index, names=["A", "B"])
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["A", "B"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
||||
)
|
||||
expected = pd.MultiIndex.from_tuples(
|
||||
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
||||
)
|
||||
result = pd.MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_frame",
|
||||
[
|
||||
pd.Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
pd.Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27,
|
||||
],
|
||||
)
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
||||
pd.MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
OrderedDict(
|
||||
[
|
||||
("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")),
|
||||
("a", [1, 1, 1, 2, 2, 2]),
|
||||
("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)),
|
||||
("c", ["x", "x", "y", "z", "x", "y"]),
|
||||
]
|
||||
)
|
||||
)
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
mi = pd.MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
||||
)
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
mi = pd.MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names,expected_error_msg",
|
||||
[
|
||||
("bad_input", "Names should be list-like for a MultiIndex"),
|
||||
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
||||
],
|
||||
)
|
||||
def test_from_frame_invalid_names(names, expected_error_msg):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
pd.MultiIndex.from_frame(df, names=names)
|
||||
@@ -0,0 +1,100 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_contains_top_level():
|
||||
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
|
||||
assert "A" in midx
|
||||
assert "A" not in midx._engine
|
||||
|
||||
|
||||
def test_contains_with_nat():
|
||||
# MI with a NaT
|
||||
mi = MultiIndex(
|
||||
levels=[["C"], pd.date_range("2012-01-01", periods=5)],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
|
||||
names=[None, "B"],
|
||||
)
|
||||
assert ("C", pd.Timestamp("2012-01-01")) in mi
|
||||
for val in mi.values:
|
||||
assert val in mi
|
||||
|
||||
|
||||
def test_contains(idx):
|
||||
assert ("foo", "two") in idx
|
||||
assert ("bar", "two") not in idx
|
||||
assert None not in idx
|
||||
|
||||
|
||||
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
|
||||
def test_isin_nan_pypy():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, True])
|
||||
)
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [("foo", 2), ("bar", 3), ("quux", 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
|
||||
def test_isin_nan_not_pypy():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, False]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, False])
|
||||
)
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
|
||||
vals_0 = ["foo", "bar", "quux"]
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
msg = "Too many levels: Index has only 2 levels, not 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=5)
|
||||
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=-5)
|
||||
|
||||
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
|
||||
idx.isin(vals_0, level=1.0)
|
||||
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
|
||||
idx.isin(vals_1, level=-1.0)
|
||||
with pytest.raises(KeyError, match="'Level A not found'"):
|
||||
idx.isin(vals_1, level="A")
|
||||
|
||||
idx.names = ["A", "B"]
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
|
||||
|
||||
with pytest.raises(KeyError, match="'Level C not found'"):
|
||||
idx.isin(vals_1, level="C")
|
||||
@@ -0,0 +1,252 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_tolist(idx):
|
||||
result = idx.tolist()
|
||||
exp = list(idx.values)
|
||||
assert result == exp
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name="first")
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=["first"])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = pd.DataFrame(
|
||||
OrderedDict(
|
||||
[
|
||||
("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")),
|
||||
("a", [1, 1, 1, 2, 2, 2]),
|
||||
("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)),
|
||||
("c", ["x", "x", "y", "z", "x", "y"]),
|
||||
]
|
||||
)
|
||||
)
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ["z", 0, "a"]
|
||||
mi = pd.MultiIndex.from_arrays(
|
||||
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
||||
)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_hierarchical():
|
||||
index = MultiIndex.from_tuples([(1, "one"), (1, "two"), (2, "one"), (2, "two")])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = index.to_hierarchical(3)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||||
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1],
|
||||
],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
# K > 1
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = index.to_hierarchical(3, 2)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[
|
||||
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
|
||||
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||||
],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
# non-sorted
|
||||
index = MultiIndex.from_tuples(
|
||||
[(2, "c"), (1, "b"), (2, "a"), (2, "b")], names=["N1", "N2"]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = index.to_hierarchical(2)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(2, "c"),
|
||||
(2, "c"),
|
||||
(1, "b"),
|
||||
(1, "b"),
|
||||
(2, "a"),
|
||||
(2, "a"),
|
||||
(2, "b"),
|
||||
(2, "b"),
|
||||
],
|
||||
names=["N1", "N2"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
|
||||
def test_roundtrip_pickle_with_tz():
|
||||
return
|
||||
|
||||
# GH 8367
|
||||
# round-trip of timezone
|
||||
index = MultiIndex.from_product(
|
||||
[[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
unpickled = tm.round_trip_pickle(index)
|
||||
assert index.equal_levels(unpickled)
|
||||
|
||||
|
||||
def test_pickle(indices):
|
||||
return
|
||||
|
||||
unpickled = tm.round_trip_pickle(indices)
|
||||
assert indices.equals(unpickled)
|
||||
original_name, indices.name = indices.name, "foo"
|
||||
unpickled = tm.round_trip_pickle(indices)
|
||||
assert indices.equals(unpickled)
|
||||
indices.name = original_name
|
||||
|
||||
|
||||
def test_to_series(idx):
|
||||
# assert that we are creating a copy of the index
|
||||
|
||||
s = idx.to_series()
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name == idx.name
|
||||
|
||||
|
||||
def test_to_series_with_arguments(idx):
|
||||
# GH18699
|
||||
|
||||
# index kwarg
|
||||
s = idx.to_series(index=idx)
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is idx
|
||||
assert s.name == idx.name
|
||||
|
||||
# name kwarg
|
||||
idx = idx
|
||||
s = idx.to_series(name="__test")
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name != idx.name
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index(
|
||||
(
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
),
|
||||
tupleize_cols=False,
|
||||
)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,94 @@
|
||||
from copy import copy, deepcopy
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._shallow_copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_labels_deprecated(idx):
|
||||
# GH23752
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.copy(labels=idx.codes)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"kwarg, value",
|
||||
[
|
||||
("names", ["thrid", "fourth"]),
|
||||
("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]),
|
||||
("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]),
|
||||
],
|
||||
)
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
return
|
||||
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
||||
if kwarg == "names":
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
else:
|
||||
assert [list(i) for i in getattr(idx_copy, kwarg)] == value
|
||||
@@ -0,0 +1,141 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
||||
|
||||
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(["bar"])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop("foo")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop(index)
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(["foo", "two"])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop(mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(["foo", "two"], errors="ignore")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(["foo", ("qux", "one")])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ["foo", ("qux", "one"), "two"]
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc("foo")]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == "second"
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ("two", "three")
|
||||
|
||||
dropped = index.droplevel("two")
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
|
||||
dropped = index[:2].droplevel(["three", "one"])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
msg = (
|
||||
"Cannot remove 3 levels from an index with 3 levels: at least one"
|
||||
" level must be left"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index[:2].droplevel(["one", "two", "three"])
|
||||
|
||||
with pytest.raises(KeyError, match="'Level four not found'"):
|
||||
index[:2].droplevel(["one", "four"])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
||||
assert lexsorted_mi.is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(
|
||||
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
||||
)
|
||||
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi.is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
||||
@@ -0,0 +1,283 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import hashtable
|
||||
|
||||
from pandas import DatetimeIndex, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [None, ["first", "second"]])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
|
||||
)
|
||||
idx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
|
||||
eidx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
|
||||
)
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_get_unique_index(idx, dropna):
|
||||
mi = idx[[0, 1, 0, 1, 1, 0, 0]]
|
||||
expected = mi._shallow_copy(mi[[0, 1]])
|
||||
|
||||
result = mi._get_unique_index(dropna=dropna)
|
||||
assert result.unique
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
with pytest.raises(ValueError):
|
||||
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
with pytest.raises(ValueError):
|
||||
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
|
||||
for idx in [
|
||||
mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, "Num"]),
|
||||
mi.set_names(["Upper", "Num"]),
|
||||
]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
# single instance of NaN
|
||||
mi_nan = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan.is_unique is True
|
||||
assert mi_nan.has_duplicates is False
|
||||
|
||||
# multiple instances of NaN
|
||||
mi_nan_dup = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan_dup.is_unique is False
|
||||
assert mi_nan_dup.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [
|
||||
("x", "out", "z", 5, "y", "in", "z", 169),
|
||||
("x", "out", "z", 7, "y", "in", "z", 119),
|
||||
("x", "out", "z", 9, "y", "in", "z", 135),
|
||||
("x", "out", "z", 13, "y", "in", "z", 145),
|
||||
("x", "out", "z", 14, "y", "in", "z", 158),
|
||||
("x", "out", "z", 16, "y", "in", "z", 122),
|
||||
("x", "out", "z", 17, "y", "in", "z", 160),
|
||||
("x", "out", "z", 18, "y", "in", "z", 180),
|
||||
("x", "out", "z", 20, "y", "in", "z", 143),
|
||||
("x", "out", "z", 21, "y", "in", "z", 128),
|
||||
("x", "out", "z", 22, "y", "in", "z", 129),
|
||||
("x", "out", "z", 25, "y", "in", "z", 111),
|
||||
("x", "out", "z", 28, "y", "in", "z", 114),
|
||||
("x", "out", "z", 29, "y", "in", "z", 121),
|
||||
("x", "out", "z", 31, "y", "in", "z", 126),
|
||||
("x", "out", "z", 32, "y", "in", "z", 155),
|
||||
("x", "out", "z", 33, "y", "in", "z", 123),
|
||||
("x", "out", "z", 12, "y", "in", "z", 144),
|
||||
]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
def test_has_duplicates_overflow():
|
||||
# handle int64 overflow if possible
|
||||
def check(nlevels, with_nulls):
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
# no overflow
|
||||
check(4, False)
|
||||
check(4, True)
|
||||
|
||||
# overflow possible
|
||||
check(8, False)
|
||||
check(8, True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", np.array([False, False, False, True, True, False])),
|
||||
("last", np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keep", ["first", "last", False])
|
||||
def test_duplicated_large(keep):
|
||||
# GH 9125
|
||||
n, k = 200, 5000
|
||||
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
|
||||
codes = [np.random.choice(n, k * n) for lev in levels]
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated_object(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_duplicates():
|
||||
# GH5873
|
||||
for a in [101, 102]:
|
||||
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
|
||||
|
||||
for n in range(1, 6): # 1st level shape
|
||||
for m in range(1, 5): # 2nd level shape
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(
|
||||
levels=[list("abcde")[:n], list("WXYZ")[:m]],
|
||||
codes=np.random.permutation(list(codes)).T,
|
||||
)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
mi.duplicated(), np.zeros(len(mi), dtype="bool")
|
||||
)
|
||||
@@ -0,0 +1,223 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(["new1", "new2"])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(["new1", "new2"])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi3 = Index(mi.tolist(), names=mi.names)
|
||||
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
|
||||
assert mi.identical(mi3)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = pd.MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert not mi.is_(mi.set_names(["C", "D"]))
|
||||
mi2 = mi.view()
|
||||
mi2.set_names(["E", "F"], inplace=True)
|
||||
assert mi.is_(mi2)
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
mi5.set_levels(mi5.levels, inplace=True)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx.is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not idx.is_numeric()
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = pd.MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = pd.Series([True, True])
|
||||
result = pd.Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = pd.Series([False, False])
|
||||
result = pd.Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,204 @@
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_dtype_str(indices):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
dtype = indices.dtype_str
|
||||
assert isinstance(dtype, str)
|
||||
assert dtype == str(indices.dtype)
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
||||
)
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
warn_filters = warnings.filters
|
||||
warnings.filterwarnings("ignore", category=FutureWarning, module=".*format")
|
||||
# GH1538
|
||||
pd.set_option("display.multi_sparse", False)
|
||||
|
||||
result = idx.format()
|
||||
assert result[1] == "foo two"
|
||||
|
||||
tm.reset_display_options()
|
||||
|
||||
warnings.filters = warn_filters
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0],
|
||||
],
|
||||
)
|
||||
|
||||
result = index.format()
|
||||
assert result[3] == "1 0 0 0"
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.option_context("display.encoding", "UTF-8"):
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
def test_repr_roundtrip_raises():
|
||||
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
||||
with pytest.raises(TypeError):
|
||||
eval(repr(mi))
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
|
||||
class TestRepr:
|
||||
def test_repr(self, idx):
|
||||
result = idx[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
with pd.option_context("display.max_seq_items", 5):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
...
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
def test_rjust(self, narrow_multi_index):
|
||||
mi = narrow_multi_index
|
||||
result = mi[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[::500].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:08:20'),
|
||||
('abc', 10, '2000-01-01 00:16:40'),
|
||||
('abc', 10, '2000-01-01 00:25:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:00:01'),
|
||||
( 'a', 9, '2000-01-01 00:00:02'),
|
||||
( 'a', 9, '2000-01-01 00:00:03'),
|
||||
( 'a', 9, '2000-01-01 00:00:04'),
|
||||
( 'a', 9, '2000-01-01 00:00:05'),
|
||||
( 'a', 9, '2000-01-01 00:00:06'),
|
||||
( 'a', 9, '2000-01-01 00:00:07'),
|
||||
( 'a', 9, '2000-01-01 00:00:08'),
|
||||
( 'a', 9, '2000-01-01 00:00:09'),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10'),
|
||||
('abc', 10, '2000-01-01 00:33:11'),
|
||||
('abc', 10, '2000-01-01 00:33:12'),
|
||||
('abc', 10, '2000-01-01 00:33:13'),
|
||||
('abc', 10, '2000-01-01 00:33:14'),
|
||||
('abc', 10, '2000-01-01 00:33:15'),
|
||||
('abc', 10, '2000-01-01 00:33:16'),
|
||||
('abc', 10, '2000-01-01 00:33:17'),
|
||||
('abc', 10, '2000-01-01 00:33:18'),
|
||||
('abc', 10, '2000-01-01 00:33:19')],
|
||||
names=['a', 'b', 'dti'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_tuple_width(self, wide_multi_index):
|
||||
mi = wide_multi_index
|
||||
result = mi[:1].__repr__()
|
||||
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[:10].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
||||
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
||||
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
||||
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
||||
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
||||
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
||||
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
||||
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
||||
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
||||
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa
|
||||
assert result == expected
|
||||
@@ -0,0 +1,438 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import CategoricalIndex, Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
msg = "Too many levels: Index has only 2 levels, not 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx._get_level_number(2)
|
||||
with pytest.raises(KeyError, match="Level fourth not found"):
|
||||
idx._get_level_number("fourth")
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "first"
|
||||
|
||||
result = idx.get_level_values("first")
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
|
||||
)
|
||||
|
||||
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_value_duplicates():
|
||||
index = MultiIndex(
|
||||
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
|
||||
assert index.get_loc("D") == slice(0, 3)
|
||||
with pytest.raises(KeyError, match=r"^'D'$"):
|
||||
index._engine.get_value(np.array([]), "D")
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH 17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.Index(["a", np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH 17924
|
||||
arrays = [["a", "b", "b"], [1, np.nan, 2]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.Index(["a", np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_name_methods(idx, index_names):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
msg = "Length of names must match number of levels in MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
msg = "can't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.levels = new_levels
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
for inplace in [True, False]:
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_levels(["c"], level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
idx.set_levels("c", level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
idx.set_codes(1, level=0, inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# changing label w/ mutation
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing specific level w/ mutation
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = pd.MultiIndex.from_tuples([(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
# [w/ mutation]
|
||||
result = ind.copy()
|
||||
result.set_codes(codes=new_codes, level=1, inplace=True)
|
||||
assert result.equals(expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ind.set_codes(labels=new_codes, level=1)
|
||||
|
||||
|
||||
def test_set_labels_deprecated():
|
||||
# GH23752
|
||||
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_labels = range(129, -1, -1)
|
||||
expected = pd.MultiIndex.from_tuples([(0, i) for i in new_labels])
|
||||
|
||||
# [w/o mutation]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ind.set_labels(labels=new_labels, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
# [w/ mutation]
|
||||
result = ind.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result.set_labels(labels=new_labels, level=1, inplace=True)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match="Length of levels"):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of codes"):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="Names must be a"):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
|
||||
m = pd.MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names("first", level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, 0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(
|
||||
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
|
||||
)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
|
||||
lev2 = ["1", "2", "3"] * 2
|
||||
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
|
||||
df = pd.DataFrame(
|
||||
np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx
|
||||
)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
df.at[("grethe", "4"), "one"] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ["black"] * 3
|
||||
index = pd.MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
|
||||
|
||||
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = pd.MultiIndex.from_arrays(
|
||||
[expected_sizes, colors], names=["size", "color"]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,441 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
|
||||
|
||||
def test_slice_locs_partial(idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
|
||||
result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one"))
|
||||
assert result == (1, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(None, ("qux", "one"))
|
||||
assert result == (0, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(("foo", "two"), None)
|
||||
assert result == (1, len(sorted_idx))
|
||||
|
||||
result = sorted_idx.slice_locs("bar", "baz")
|
||||
assert result == (2, 4)
|
||||
|
||||
|
||||
def test_slice_locs():
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
|
||||
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
|
||||
sliced = stacked[slob]
|
||||
expected = df[5:16].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
slob = slice(
|
||||
*idx.slice_locs(
|
||||
df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30)
|
||||
)
|
||||
)
|
||||
sliced = stacked[slob]
|
||||
expected = df[6:15].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
|
||||
def test_slice_locs_with_type_mismatch():
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs((1, 3))
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
|
||||
df = tm.makeCustomDataframe(5, 5)
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(timedelta(seconds=30))
|
||||
# TODO: Try creating a UnicodeDecodeError in exception message
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(df.index[1], (16, "a"))
|
||||
|
||||
|
||||
def test_slice_locs_not_sorted():
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
# works
|
||||
sorted_index, _ = index.sortlevel(0)
|
||||
# should there be a test case here???
|
||||
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
|
||||
def test_slice_locs_not_contained():
|
||||
# some searchsorted action
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[0, 2, 4, 6], [0, 2, 4]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
|
||||
sortorder=0,
|
||||
)
|
||||
|
||||
result = index.slice_locs((1, 0), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(1, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((2, 2), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(2, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((1, 0), (6, 3))
|
||||
assert result == (3, 8)
|
||||
|
||||
result = index.slice_locs(-1, 10)
|
||||
assert result == (0, len(index))
|
||||
|
||||
|
||||
def test_putmask_with_wrong_mask(idx):
|
||||
# GH18368
|
||||
|
||||
msg = "putmask: mask and data must be the same size"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask(np.ones(len(idx) + 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask(np.ones(len(idx) - 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask("foo", 1)
|
||||
|
||||
|
||||
def test_get_indexer():
|
||||
major_axis = Index(np.arange(4))
|
||||
minor_axis = Index(np.arange(2))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
idx1 = index[:5]
|
||||
idx2 = index[[1, 3, 5]]
|
||||
|
||||
r1 = idx1.get_indexer(idx2)
|
||||
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method="pad")
|
||||
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
|
||||
assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method="pad")
|
||||
assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rffill1 = idx2.get_indexer(idx1, method="ffill")
|
||||
assert_almost_equal(r1, rffill1)
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method="backfill")
|
||||
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
|
||||
assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method="backfill")
|
||||
assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rbfill1 = idx2.get_indexer(idx1, method="bfill")
|
||||
assert_almost_equal(r1, rbfill1)
|
||||
|
||||
# pass non-MultiIndex
|
||||
r1 = idx1.get_indexer(idx2.values)
|
||||
rexp1 = idx1.get_indexer(idx2)
|
||||
assert_almost_equal(r1, rexp1)
|
||||
|
||||
r1 = idx1.get_indexer([1, 2, 3])
|
||||
assert (r1 == [-1, -1, -1]).all()
|
||||
|
||||
# create index with duplicates
|
||||
idx1 = Index(list(range(10)) + list(range(10)))
|
||||
idx2 = Index(list(range(20)))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(idx2)
|
||||
|
||||
|
||||
def test_get_indexer_nearest():
|
||||
midx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
msg = "method='nearest' not implemented yet for MultiIndex; see GitHub issue 9365"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
midx.get_indexer(["a"], method="nearest")
|
||||
msg = "tolerance not implemented yet for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
midx.get_indexer(["a"], method="pad", tolerance=2)
|
||||
|
||||
|
||||
def test_getitem(idx):
|
||||
# scalar
|
||||
assert idx[2] == ("bar", "one")
|
||||
|
||||
# slice
|
||||
result = idx[2:5]
|
||||
expected = idx[[2, 3, 4]]
|
||||
assert result.equals(expected)
|
||||
|
||||
# boolean
|
||||
result = idx[[True, False, True, False, True, True]]
|
||||
result2 = idx[np.array([True, False, True, False, True, True])]
|
||||
expected = idx[[0, 2, 4, 5]]
|
||||
assert result.equals(expected)
|
||||
assert result2.equals(expected)
|
||||
|
||||
|
||||
def test_getitem_group_select(idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
assert sorted_idx.get_loc("baz") == slice(3, 4)
|
||||
assert sorted_idx.get_loc("foo") == slice(0, 2)
|
||||
|
||||
|
||||
def test_get_indexer_consistency(idx):
|
||||
# See GH 16819
|
||||
if isinstance(idx, IntervalIndex):
|
||||
pass
|
||||
|
||||
if idx.is_unique or isinstance(idx, CategoricalIndex):
|
||||
indexer = idx.get_indexer(idx[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
else:
|
||||
e = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=e):
|
||||
idx.get_indexer(idx[0:2])
|
||||
|
||||
indexer, _ = idx.get_indexer_non_unique(idx[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ind1", [[True] * 5, pd.Index([True] * 5)])
|
||||
@pytest.mark.parametrize(
|
||||
"ind2",
|
||||
[[True, False, True, False, False], pd.Index([True, False, True, False, False])],
|
||||
)
|
||||
def test_getitem_bool_index_all(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ind1", [[True], pd.Index([True])])
|
||||
@pytest.mark.parametrize("ind2", [[False], pd.Index([False])])
|
||||
def test_getitem_bool_index_single(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = pd.MultiIndex(
|
||||
levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)],
|
||||
codes=[[], []],
|
||||
)
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
def test_get_loc(idx):
|
||||
assert idx.get_loc(("foo", "two")) == 1
|
||||
assert idx.get_loc(("baz", "two")) == 3
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.get_loc(("bar", "two"))
|
||||
with pytest.raises(KeyError, match=r"^'quux'$"):
|
||||
idx.get_loc("quux")
|
||||
|
||||
msg = "only the default get_loc method is currently supported for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.get_loc("foo", method="nearest")
|
||||
|
||||
# 3 levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(1, 1\)$"):
|
||||
index.get_loc((1, 1))
|
||||
assert index.get_loc((2, 0)) == slice(3, 5)
|
||||
|
||||
|
||||
def test_get_loc_duplicates():
|
||||
index = Index([2, 2, 2, 2])
|
||||
result = index.get_loc(2)
|
||||
expected = slice(0, 4)
|
||||
assert result == expected
|
||||
# pytest.raises(Exception, index.get_loc, 2)
|
||||
|
||||
index = Index(["c", "a", "a", "b", "b"])
|
||||
rs = index.get_loc("c")
|
||||
xp = 0
|
||||
assert rs == xp
|
||||
|
||||
|
||||
def test_get_loc_level():
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
loc, new_index = index.get_loc_level((0, 1))
|
||||
expected = slice(1, 2)
|
||||
exp_index = index[expected].droplevel(0).droplevel(0)
|
||||
assert loc == expected
|
||||
assert new_index.equals(exp_index)
|
||||
|
||||
loc, new_index = index.get_loc_level((0, 1, 0))
|
||||
expected = 1
|
||||
assert loc == expected
|
||||
assert new_index is None
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\(2, 2\)$"):
|
||||
index.get_loc_level((2, 2))
|
||||
# GH 22221: unused label
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
index.drop(2).get_loc_level(2)
|
||||
# Unused label on unsorted level:
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
index.drop(1, level=2).get_loc_level(2, level=2)
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[2000], list(range(4))],
|
||||
codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])],
|
||||
)
|
||||
result, new_index = index.get_loc_level((2000, slice(None, None)))
|
||||
expected = slice(None, None)
|
||||
assert result == expected
|
||||
assert new_index.equals(index.droplevel(0))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype1", [int, float, bool, str])
|
||||
@pytest.mark.parametrize("dtype2", [int, float, bool, str])
|
||||
def test_get_loc_multiple_dtypes(dtype1, dtype2):
|
||||
# GH 18520
|
||||
levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)]
|
||||
idx = pd.MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(idx[2]) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, 1])
|
||||
@pytest.mark.parametrize("dtypes", [[int, float], [float, int]])
|
||||
def test_get_loc_implicit_cast(level, dtypes):
|
||||
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
|
||||
levels = [["a", "b"], ["c", "d"]]
|
||||
key = ["b", "d"]
|
||||
lev_dtype, key_dtype = dtypes
|
||||
levels[level] = np.array([0, 1], dtype=lev_dtype)
|
||||
key[level] = key_dtype(1)
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
|
||||
def test_get_loc_cast_bool():
|
||||
# GH 19086 : int is casted to bool, but not vice-versa
|
||||
levels = [[False, True], np.arange(2, dtype="int64")]
|
||||
idx = MultiIndex.from_product(levels)
|
||||
|
||||
assert idx.get_loc((0, 1)) == 1
|
||||
assert idx.get_loc((1, 0)) == 2
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\(False, True\)$"):
|
||||
idx.get_loc((False, True))
|
||||
with pytest.raises(KeyError, match=r"^\(True, False\)$"):
|
||||
idx.get_loc((True, False))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, 1])
|
||||
def test_get_loc_nan(level, nulls_fixture):
|
||||
# GH 18485 : NaN in MultiIndex
|
||||
levels = [["a", "b"], ["c", "d"]]
|
||||
key = ["b", "d"]
|
||||
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
|
||||
key[level] = nulls_fixture
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
|
||||
def test_get_loc_missing_nan():
|
||||
# GH 8569
|
||||
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
|
||||
assert isinstance(idx.get_loc(1), slice)
|
||||
with pytest.raises(KeyError, match=r"^3\.0$"):
|
||||
idx.get_loc(3)
|
||||
with pytest.raises(KeyError, match=r"^nan$"):
|
||||
idx.get_loc(np.nan)
|
||||
with pytest.raises(KeyError, match=r"^\[nan\]$"):
|
||||
idx.get_loc([np.nan])
|
||||
|
||||
|
||||
def test_get_indexer_categorical_time():
|
||||
# https://github.com/pandas-dev/pandas/issues/21390
|
||||
midx = MultiIndex.from_product(
|
||||
[
|
||||
Categorical(["a", "b", "c"]),
|
||||
Categorical(date_range("2012-01-01", periods=3, freq="H")),
|
||||
]
|
||||
)
|
||||
result = midx.get_indexer(midx)
|
||||
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
|
||||
|
||||
|
||||
def test_timestamp_multiindex_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/26944
|
||||
idx = pd.MultiIndex.from_product(
|
||||
[
|
||||
pd.date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"),
|
||||
["x"],
|
||||
[3],
|
||||
]
|
||||
)
|
||||
df = pd.DataFrame({"foo": np.arange(len(idx))}, idx)
|
||||
result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"]
|
||||
qidx = pd.MultiIndex.from_product(
|
||||
[
|
||||
pd.date_range(
|
||||
start="2019-01-02T00:15:33",
|
||||
end="2019-01-05T02:15:33",
|
||||
freq="H",
|
||||
name="date",
|
||||
),
|
||||
["x"],
|
||||
[3],
|
||||
]
|
||||
)
|
||||
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
|
||||
tm.assert_series_equal(result, should_be)
|
||||
@@ -0,0 +1,297 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import IntervalIndex, MultiIndex, RangeIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
assert i.codes[0].dtype == "int8"
|
||||
assert i.codes[1].dtype == "int8"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(40)])
|
||||
assert i.codes[1].dtype == "int8"
|
||||
i = MultiIndex.from_product([["a"], range(400)])
|
||||
assert i.codes[1].dtype == "int16"
|
||||
i = MultiIndex.from_product([["a"], range(40000)])
|
||||
assert i.codes[1].dtype == "int32"
|
||||
|
||||
i = pd.MultiIndex.from_product([["a"], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.NaT),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
(1, pd.Timestamp("2000-01-04")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
]
|
||||
result = pd.MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10 ** 18, 10 ** 18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
# TODO(GH-24559): Remove the FutureWarning
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq="D")
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, pd.Int64Index(ints))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = list(range(70000))
|
||||
minor_axis = list(range(10))
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(range(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
def test_hash_collisions():
|
||||
# non-smoke test that we don't get hash collisions
|
||||
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(1000), np.arange(1000)], names=["one", "two"]
|
||||
)
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def take_invalid_kwargs():
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error():
|
||||
# GH12527
|
||||
df_below_1000000 = pd.DataFrame(
|
||||
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_below_1000000.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_below_1000000.loc[(3, 0), "dest"]
|
||||
df_above_1000000 = pd.DataFrame(
|
||||
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_above_1000000.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_above_1000000.loc[(3, 0), "dest"]
|
||||
|
||||
|
||||
def test_million_record_attribute_error():
|
||||
# GH 18165
|
||||
r = list(range(1000000))
|
||||
df = pd.DataFrame(
|
||||
{"a": r, "b": r}, index=pd.MultiIndex.from_tuples([(x, x) for x in r])
|
||||
)
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df["a"].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile("does not support mutable operations")
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = pd.MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert ind.is_monotonic
|
||||
ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1)
|
||||
df.index.names = ["fizz", "buzz"]
|
||||
|
||||
str(df)
|
||||
expected = pd.DataFrame(
|
||||
{"bar": np.arange(100), "foo": np.arange(100)},
|
||||
index=pd.MultiIndex.from_product(
|
||||
[range(10), range(10)], names=["fizz", "buzz"]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values("fizz")
|
||||
expected = pd.Int64Index(np.arange(10), name="fizz").repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values("buzz")
|
||||
expected = pd.Int64Index(np.tile(np.arange(10), 10), name="buzz")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_hash_error(indices):
|
||||
index = indices
|
||||
with pytest.raises(
|
||||
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
|
||||
):
|
||||
hash(indices)
|
||||
|
||||
|
||||
def test_mutability(indices):
|
||||
if not len(indices):
|
||||
return
|
||||
msg = "Index does not support mutable operations"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indices[0] = indices[0]
|
||||
|
||||
|
||||
def test_wrong_number_names(indices):
|
||||
with pytest.raises(ValueError, match="^Length"):
|
||||
indices.names = ["apple", "banana", "carrot"]
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
||||
@@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
|
||||
)
|
||||
def test_join_level(idx, other, join_type):
|
||||
join_index, lidx, ridx = other.join(
|
||||
idx, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
exp_level = other.join(idx.levels[1], how=join_type)
|
||||
assert join_index.levels[0].equals(idx.levels[0])
|
||||
assert join_index.levels[1].equals(exp_level)
|
||||
|
||||
# pare down levels
|
||||
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
|
||||
exp_values = idx.values[mask]
|
||||
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||||
|
||||
if join_type in ("outer", "inner"):
|
||||
join_index2, ridx2, lidx2 = idx.join(
|
||||
other, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
assert join_index.equals(join_index2)
|
||||
tm.assert_numpy_array_equal(lidx, lidx2)
|
||||
tm.assert_numpy_array_equal(ridx, ridx2)
|
||||
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||||
|
||||
|
||||
def test_join_level_corner_case(idx):
|
||||
# some corner cases
|
||||
index = Index(["three", "one", "two"])
|
||||
result = index.join(idx, level="second")
|
||||
assert isinstance(result, MultiIndex)
|
||||
|
||||
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
||||
idx.join(idx, level=1)
|
||||
|
||||
|
||||
def test_join_self(idx, join_type):
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert idx is joined
|
||||
|
||||
|
||||
def test_join_multi():
|
||||
# GH 10665
|
||||
midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
|
||||
idx = pd.Index([1, 2, 5], name="b")
|
||||
|
||||
# inner
|
||||
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
|
||||
exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
|
||||
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||||
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
# keep MultiIndex
|
||||
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
|
||||
exp_ridx = np.array(
|
||||
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
|
||||
)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_self_unique(idx, join_type):
|
||||
if idx.is_unique:
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert (idx == joined).all()
|
||||
@@ -0,0 +1,143 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
|
||||
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_fillna(idx):
|
||||
# GH 11343
|
||||
|
||||
# TODO: Remove or Refactor. Not Implemented for MultiIndex
|
||||
for name, index in [("idx", idx)]:
|
||||
if len(index) == 0:
|
||||
pass
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy()
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
else:
|
||||
idx = index.copy()
|
||||
result = idx.fillna(idx[0])
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
msg = "'value' must be a scalar, passed: "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.fillna([idx[0]])
|
||||
|
||||
idx = index.copy()
|
||||
values = idx.values
|
||||
|
||||
if isinstance(index, DatetimeIndexOpsMixin):
|
||||
values[1] = iNaT
|
||||
elif isinstance(index, (Int64Index, UInt64Index)):
|
||||
continue
|
||||
else:
|
||||
values[1] = np.nan
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
idx = index.__class__(values, freq=index.freq)
|
||||
else:
|
||||
idx = index.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
def test_dropna():
|
||||
# GH 6194
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
[1, 2, np.nan, np.nan, 5],
|
||||
["a", "b", "c", np.nan, "e"],
|
||||
]
|
||||
)
|
||||
|
||||
exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
|
||||
tm.assert_index_equal(idx.dropna(), exp)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), exp)
|
||||
|
||||
exp = pd.MultiIndex.from_arrays(
|
||||
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), exp)
|
||||
|
||||
msg = "invalid how option: xxx"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.dropna(how="xxx")
|
||||
|
||||
# GH26408
|
||||
# test if missing values are dropped for multiindex constructed
|
||||
# from codes and values
|
||||
idx = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
|
||||
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
|
||||
)
|
||||
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
|
||||
tm.assert_index_equal(idx.dropna(), expected)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), expected)
|
||||
|
||||
expected = MultiIndex.from_arrays(
|
||||
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), expected)
|
||||
|
||||
|
||||
def test_nulls(idx):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_hasnans_isnans(idx):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
index = idx.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is False
|
||||
|
||||
index = idx.copy()
|
||||
values = index.values
|
||||
values[1] = np.nan
|
||||
|
||||
index = idx.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is True
|
||||
|
||||
|
||||
def test_nan_stays_float():
|
||||
|
||||
# GH 7031
|
||||
idx0 = pd.MultiIndex(
|
||||
levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]
|
||||
)
|
||||
idx1 = pd.MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
|
||||
idxm = idx0.join(idx1, how="outer")
|
||||
assert pd.isna(idx0.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||||
|
||||
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||||
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||||
dfm = df0 - df1
|
||||
assert pd.isna(df0.index.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||||
@@ -0,0 +1,230 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, IntervalIndex, MultiIndex
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
def test_is_monotonic_increasing():
|
||||
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
|
||||
assert i.is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[1, 2, 3, 4],
|
||||
[
|
||||
"gb00b03mlx29",
|
||||
"lu0197800237",
|
||||
"nl0000289783",
|
||||
"nl0000289965",
|
||||
"nl0000301109",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
|
||||
def test_is_monotonic_decreasing():
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[4, 3, 2, 1],
|
||||
[
|
||||
"nl0000301109",
|
||||
"nl0000289965",
|
||||
"nl0000289783",
|
||||
"lu0197800237",
|
||||
"gb00b03mlx29",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_increasing():
|
||||
idx = pd.MultiIndex(
|
||||
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_decreasing():
|
||||
idx = pd.MultiIndex(
|
||||
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
|
||||
def test_searchsorted_monotonic(indices):
|
||||
# GH17271
|
||||
# not implemented for tuple searches in MultiIndex
|
||||
# or Intervals searches in IntervalIndex
|
||||
if isinstance(indices, (MultiIndex, IntervalIndex)):
|
||||
return
|
||||
|
||||
# nothing to test if the index is empty
|
||||
if indices.empty:
|
||||
return
|
||||
value = indices[0]
|
||||
|
||||
# determine the expected results (handle dupes for 'right')
|
||||
expected_left, expected_right = 0, (indices == value).argmin()
|
||||
if expected_right == 0:
|
||||
# all values are the same, expected_right should be length
|
||||
expected_right = len(indices)
|
||||
|
||||
# test _searchsorted_monotonic in all cases
|
||||
# test searchsorted only for increasing
|
||||
if indices.is_monotonic_increasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side="left")
|
||||
assert is_scalar(ssm_left)
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side="right")
|
||||
assert is_scalar(ssm_right)
|
||||
assert expected_right == ssm_right
|
||||
|
||||
ss_left = indices.searchsorted(value, side="left")
|
||||
assert is_scalar(ss_left)
|
||||
assert expected_left == ss_left
|
||||
|
||||
ss_right = indices.searchsorted(value, side="right")
|
||||
assert is_scalar(ss_right)
|
||||
assert expected_right == ss_right
|
||||
|
||||
elif indices.is_monotonic_decreasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side="left")
|
||||
assert is_scalar(ssm_left)
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side="right")
|
||||
assert is_scalar(ssm_right)
|
||||
assert expected_right == ssm_right
|
||||
|
||||
else:
|
||||
# non-monotonic should raise.
|
||||
with pytest.raises(ValueError):
|
||||
indices._searchsorted_monotonic(value, side="left")
|
||||
@@ -0,0 +1,123 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_slice_keep_name():
|
||||
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
|
||||
assert x[1:].names == x.names
|
||||
|
||||
|
||||
def test_index_name_retained():
|
||||
# GH9857
|
||||
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
|
||||
result = result.set_index("z")
|
||||
result.loc[10] = [9, 10]
|
||||
df_expected = pd.DataFrame(
|
||||
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
|
||||
)
|
||||
df_expected = df_expected.set_index("z")
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
|
||||
def test_changing_names(idx):
|
||||
|
||||
# names should be applied to levels
|
||||
level_names = [level.name for level in idx.levels]
|
||||
check_level_names(idx, idx.names)
|
||||
|
||||
view = idx.view()
|
||||
copy = idx.copy()
|
||||
shallow_copy = idx._shallow_copy()
|
||||
|
||||
# changing names should change level names on object
|
||||
new_names = [name + "a" for name in idx.names]
|
||||
idx.names = new_names
|
||||
check_level_names(idx, new_names)
|
||||
|
||||
# but not on copies
|
||||
check_level_names(view, level_names)
|
||||
check_level_names(copy, level_names)
|
||||
check_level_names(shallow_copy, level_names)
|
||||
|
||||
# and copies shouldn't change original
|
||||
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||||
check_level_names(idx, new_names)
|
||||
|
||||
|
||||
def test_take_preserve_name(idx):
|
||||
taken = idx.take([3, 0, 1])
|
||||
assert taken.names == idx.names
|
||||
|
||||
|
||||
def test_copy_names():
|
||||
# Check that adding a "names" parameter to the copy is honored
|
||||
# GH14302
|
||||
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
|
||||
multi_idx1 = multi_idx.copy()
|
||||
|
||||
assert multi_idx.equals(multi_idx1)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx1.names == ["MyName1", "MyName2"]
|
||||
|
||||
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx2)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx2.names == ["NewName1", "NewName2"]
|
||||
|
||||
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx3)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx3.names == ["NewName1", "NewName2"]
|
||||
|
||||
|
||||
def test_names(idx, index_names):
|
||||
|
||||
# names are assigned in setup
|
||||
names = index_names
|
||||
level_names = [level.name for level in idx.levels]
|
||||
assert names == level_names
|
||||
|
||||
# setting bad names on existing
|
||||
index = idx
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", list(index.names) + ["third"])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", [])
|
||||
|
||||
# initializing with bad names (should always be equivalent)
|
||||
major_axis, minor_axis = idx.levels
|
||||
major_codes, minor_codes = idx.codes
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first", "second", "third"],
|
||||
)
|
||||
|
||||
# names are assigned
|
||||
index.names = ["a", "b"]
|
||||
ind_names = list(index.names)
|
||||
level_names = [level.name for level in index.levels]
|
||||
assert ind_names == level_names
|
||||
|
||||
|
||||
def test_duplicate_level_names_access_raises(idx):
|
||||
# GH19029
|
||||
idx.names = ["foo", "foo"]
|
||||
with pytest.raises(ValueError, match="name foo occurs multiple times"):
|
||||
idx._get_level_number("foo")
|
||||
@@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex():
|
||||
# GH10331
|
||||
dr = pd.date_range("2016-01-01", "2016-01-03", freq="12H")
|
||||
abc = ["a", "b", "c"]
|
||||
ix = pd.MultiIndex.from_product([dr, abc])
|
||||
df = pd.DataFrame({"c1": range(0, 15)}, index=ix)
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# c1
|
||||
# 2016-01-01 00:00:00 a 0
|
||||
# b 1
|
||||
# c 2
|
||||
# 2016-01-01 12:00:00 a 3
|
||||
# b 4
|
||||
# c 5
|
||||
# 2016-01-02 00:00:00 a 6
|
||||
# b 7
|
||||
# c 8
|
||||
# 2016-01-02 12:00:00 a 9
|
||||
# b 10
|
||||
# c 11
|
||||
# 2016-01-03 00:00:00 a 12
|
||||
# b 13
|
||||
# c 14
|
||||
|
||||
# partial string matching on a single index
|
||||
for df_swap in (df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)):
|
||||
df_swap = df_swap.sort_index()
|
||||
just_a = df_swap.loc["a"]
|
||||
result = just_a.loc["2016-01-01"]
|
||||
expected = df.loc[idx[:, "a"], :].iloc[0:2]
|
||||
expected.index = expected.index.droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# indexing with IndexSlice
|
||||
result = df.loc[idx["2016-01-01":"2016-02-01", :], :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# match on secondary index
|
||||
result = df_swap.loc[idx[:, "2016-01-01":"2016-01-01"], :]
|
||||
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Even though this syntax works on a single index, this is somewhat
|
||||
# ambiguous and we don't want to extend this behavior forward to work
|
||||
# in multi-indexes. This would amount to selecting a scalar from a
|
||||
# column.
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df["2016-01-01"]
|
||||
|
||||
# partial string match on year only
|
||||
result = df.loc["2016"]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date
|
||||
result = df.loc["2016-01-01"]
|
||||
expected = df.iloc[0:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date and hour, from middle
|
||||
result = df.loc["2016-01-02 12"]
|
||||
expected = df.iloc[9:12]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on secondary index
|
||||
result = df_swap.loc[idx[:, "2016-01-02"], :]
|
||||
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# tuple selector with partial string match on date
|
||||
result = df.loc[("2016-01-01", "a"), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Slicing date on first level should break (of course)
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df_swap.loc["2016-01-01"]
|
||||
|
||||
# GH12685 (partial string with daily resolution or below)
|
||||
dr = date_range("2013-01-01", periods=100, freq="D")
|
||||
ix = MultiIndex.from_product([dr, ["a", "b"]])
|
||||
df = DataFrame(np.random.randn(200, 1), columns=["A"], index=ix)
|
||||
|
||||
result = df.loc[idx["2013-03":"2013-03", :], :]
|
||||
expected = df.iloc[118:180]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_reindex(idx):
|
||||
result, indexer = idx.reindex(list(idx[:4]))
|
||||
assert isinstance(result, MultiIndex)
|
||||
check_level_names(result, idx[:4].names)
|
||||
|
||||
result, indexer = idx.reindex(list(idx))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert indexer is None
|
||||
check_level_names(result, idx.names)
|
||||
|
||||
|
||||
def test_reindex_level(idx):
|
||||
index = Index(["one"])
|
||||
|
||||
target, indexer = idx.reindex(index, level="second")
|
||||
target2, indexer2 = index.reindex(idx, level="second")
|
||||
|
||||
exp_index = idx.join(index, level="second", how="right")
|
||||
exp_index2 = idx.join(index, level="second", how="left")
|
||||
|
||||
assert target.equals(exp_index)
|
||||
exp_indexer = np.array([0, 2, 4])
|
||||
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||||
|
||||
assert target2.equals(exp_index2)
|
||||
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||||
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
idx.reindex(idx, method="pad", level="second")
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
index.reindex(index, method="bfill", level="first")
|
||||
|
||||
|
||||
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
||||
# GH6552
|
||||
idx = idx.copy()
|
||||
target = idx.copy()
|
||||
idx.names = target.names = [None, None]
|
||||
|
||||
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
|
||||
# list & ndarray cases
|
||||
assert idx.reindex([])[0].names == [None, None]
|
||||
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||||
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(target.values)[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||||
|
||||
idx.names = ["foo", "bar"]
|
||||
assert idx.reindex([])[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
||||
# GH7774
|
||||
idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
|
||||
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
|
||||
# GH7774
|
||||
idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||||
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
|
||||
|
||||
|
||||
def test_reindex_base(idx):
|
||||
idx = idx
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
|
||||
def test_reindex_non_unique():
|
||||
idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
||||
a = pd.Series(np.arange(4), index=idx)
|
||||
new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
msg = "cannot handle a non-unique multi-index!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.reindex(new_idx)
|
||||
@@ -0,0 +1,129 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_insert(idx):
|
||||
# key contained in all levels
|
||||
new_index = idx.insert(0, ("bar", "two"))
|
||||
assert new_index.equal_levels(idx)
|
||||
assert new_index[0] == ("bar", "two")
|
||||
|
||||
# key not contained in all levels
|
||||
new_index = idx.insert(0, ("abc", "three"))
|
||||
|
||||
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
|
||||
tm.assert_index_equal(new_index.levels[0], exp0)
|
||||
|
||||
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
|
||||
tm.assert_index_equal(new_index.levels[1], exp1)
|
||||
assert new_index[0] == ("abc", "three")
|
||||
|
||||
# key wrong length
|
||||
msg = "Item must have length equal to number of levels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.insert(0, ("foo2",))
|
||||
|
||||
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
|
||||
left.set_index(["1st", "2nd"], inplace=True)
|
||||
ts = left["3rd"].copy(deep=True)
|
||||
|
||||
left.loc[("b", "x"), "3rd"] = 2
|
||||
left.loc[("b", "a"), "3rd"] = -1
|
||||
left.loc[("b", "b"), "3rd"] = 3
|
||||
left.loc[("a", "x"), "3rd"] = 4
|
||||
left.loc[("a", "w"), "3rd"] = 5
|
||||
left.loc[("a", "a"), "3rd"] = 6
|
||||
|
||||
ts.loc[("b", "x")] = 2
|
||||
ts.loc["b", "a"] = -1
|
||||
ts.loc[("b", "b")] = 3
|
||||
ts.loc["a", "x"] = 4
|
||||
ts.loc[("a", "w")] = 5
|
||||
ts.loc["a", "a"] = 6
|
||||
|
||||
right = pd.DataFrame(
|
||||
[
|
||||
["a", "b", 0],
|
||||
["b", "d", 1],
|
||||
["b", "x", 2],
|
||||
["b", "a", -1],
|
||||
["b", "b", 3],
|
||||
["a", "x", 4],
|
||||
["a", "w", 5],
|
||||
["a", "a", 6],
|
||||
],
|
||||
columns=["1st", "2nd", "3rd"],
|
||||
)
|
||||
right.set_index(["1st", "2nd"], inplace=True)
|
||||
# FIXME data types changes to float because
|
||||
# of intermediate nan insertion;
|
||||
tm.assert_frame_equal(left, right, check_dtype=False)
|
||||
tm.assert_series_equal(ts, right["3rd"])
|
||||
|
||||
# GH9250
|
||||
idx = (
|
||||
[("test1", i) for i in range(5)]
|
||||
+ [("test2", i) for i in range(6)]
|
||||
+ [("test", 17), ("test", 18)]
|
||||
)
|
||||
|
||||
left = pd.Series(np.linspace(0, 10, 11), pd.MultiIndex.from_tuples(idx[:-2]))
|
||||
|
||||
left.loc[("test", 17)] = 11
|
||||
left.loc[("test", 18)] = 12
|
||||
|
||||
right = pd.Series(np.linspace(0, 12, 13), pd.MultiIndex.from_tuples(idx))
|
||||
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
|
||||
def test_append(idx):
|
||||
result = idx[:3].append(idx[3:])
|
||||
assert result.equals(idx)
|
||||
|
||||
foos = [idx[:1], idx[1:3], idx[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
assert result.equals(idx)
|
||||
|
||||
# empty
|
||||
result = idx.append([])
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(m.repeat(reps), expected)
|
||||
|
||||
|
||||
def test_insert_base(idx):
|
||||
|
||||
result = idx[1:4]
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
|
||||
def test_delete_base(idx):
|
||||
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
with pytest.raises((IndexError, ValueError)):
|
||||
# Exception raised depends on NumPy version.
|
||||
idx.delete(len(idx))
|
||||
@@ -0,0 +1,363 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_ops_error_cases(idx, case, sort, method):
|
||||
# non-iterable input
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case, sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection_base(idx, sort):
|
||||
first = idx[:5]
|
||||
second = idx[:3]
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
tm.assert_index_equal(intersect, second.sort_values())
|
||||
assert tm.equalContents(intersect, second)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.intersection(case, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, second.sort_values())
|
||||
assert tm.equalContents(result, second)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_base(idx, sort):
|
||||
first = idx[3:]
|
||||
second = idx[:5]
|
||||
everything = idx
|
||||
union = first.union(second, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(union, everything.sort_values())
|
||||
assert tm.equalContents(union, everything)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.union(case, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, everything.sort_values())
|
||||
assert tm.equalContents(result, everything)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_base(idx, sort):
|
||||
second = idx[4:]
|
||||
answer = idx[:4]
|
||||
result = idx.difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
assert result.equals(answer)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = idx.difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_symmetric_difference(idx, sort):
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
answer = idx[[-1, 0]]
|
||||
result = first.symmetric_difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.symmetric_difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_empty(idx):
|
||||
# GH 15270
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference(idx, sort):
|
||||
|
||||
first = idx
|
||||
result = first.difference(idx[-3:], sort=sort)
|
||||
vals = idx[:-3].values
|
||||
|
||||
if sort is None:
|
||||
vals = sorted(vals)
|
||||
|
||||
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
|
||||
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty difference: reflexive
|
||||
result = idx.difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: superset
|
||||
result = idx[-3:].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: degenerate
|
||||
result = idx[:0].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# names not the same
|
||||
chunklet = idx[-3:]
|
||||
chunklet.names = ["foo", "baz"]
|
||||
result = first.difference(chunklet, sort=sort)
|
||||
assert result.names == (None, None)
|
||||
|
||||
# empty, but non-equal
|
||||
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
||||
assert len(result) == 0
|
||||
|
||||
# raise Exception called with non-MultiIndex
|
||||
result = first.difference(first.values, sort=sort)
|
||||
assert result.equals(first[:0])
|
||||
|
||||
# name from empty array
|
||||
result = first.difference([], sort=sort)
|
||||
assert first.equals(result)
|
||||
assert first.names == result.names
|
||||
|
||||
# name from non-empty array
|
||||
result = first.difference([("foo", "one")], sort=sort)
|
||||
expected = pd.MultiIndex.from_tuples(
|
||||
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
|
||||
)
|
||||
expected.names = first.names
|
||||
assert first.names == result.names
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3, 4, 5], sort=sort)
|
||||
|
||||
|
||||
def test_difference_sort_special():
|
||||
# GH-24959
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
# sort=None, the default
|
||||
result = idx.difference([])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_difference_sort_special_true():
|
||||
# TODO decide on True behaviour
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
result = idx.difference([], sort=True)
|
||||
expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable():
|
||||
# GH-24959
|
||||
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
|
||||
other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
# sort=None, the default
|
||||
# MultiIndex.difference deviates here from other difference
|
||||
# implementations in not catching the TypeError
|
||||
with pytest.raises(TypeError):
|
||||
result = idx.difference(other)
|
||||
|
||||
# sort=False
|
||||
result = idx.difference(other, sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_difference_sort_incomparable_true():
|
||||
# TODO decide on True behaviour
|
||||
# # sort=True, raises
|
||||
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
idx.difference(other, sort=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_union = piece1.union(piece2, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_union, idx.sort_values())
|
||||
|
||||
assert tm.equalContents(the_union, idx)
|
||||
|
||||
# corner case, pass self or empty thing:
|
||||
the_union = idx.union(idx, sort=sort)
|
||||
assert the_union is idx
|
||||
|
||||
the_union = idx.union(idx[:0], sort=sort)
|
||||
assert the_union is idx
|
||||
|
||||
# won't work in python 3
|
||||
# tuples = _index.values
|
||||
# result = _index[:4] | tuples[4:]
|
||||
# assert result.equals(tuples)
|
||||
|
||||
# not valid for python 3
|
||||
# def test_union_with_regular_index(self):
|
||||
# other = Index(['A', 'B', 'C'])
|
||||
|
||||
# result = other.union(idx)
|
||||
# assert ('foo', 'one') in result
|
||||
# assert 'B' in result
|
||||
|
||||
# result2 = _index.union(other)
|
||||
# assert result.equals(result2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_int = piece1.intersection(piece2, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_int, idx[3:5])
|
||||
assert tm.equalContents(the_int, idx[3:5])
|
||||
|
||||
# corner case, pass self
|
||||
the_int = idx.intersection(idx, sort=sort)
|
||||
assert the_int is idx
|
||||
|
||||
# empty intersection: disjoint
|
||||
empty = idx[:2].intersection(idx[2:], sort=sort)
|
||||
expected = idx[:0]
|
||||
assert empty.equals(expected)
|
||||
|
||||
# can't do in python 3
|
||||
# tuples = _index.values
|
||||
# result = _index & tuples
|
||||
# assert result.equals(tuples)
|
||||
|
||||
|
||||
def test_intersect_equal_sort():
|
||||
# GH-24959
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_intersect_equal_sort_true():
|
||||
# TODO decide on True behaviour
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
sorted_ = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
|
||||
def test_union_sort_other_empty(slice_):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
other = idx[slice_]
|
||||
tm.assert_index_equal(idx.union(other), idx)
|
||||
# MultiIndex does not special case empty.union(idx)
|
||||
# tm.assert_index_equal(other.union(idx), idx)
|
||||
|
||||
# sort=False
|
||||
tm.assert_index_equal(idx.union(other, sort=False), idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_union_sort_other_empty_sort(slice_):
|
||||
# TODO decide on True behaviour
|
||||
# # sort=True
|
||||
idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
other = idx[:0]
|
||||
result = idx.union(other, sort=True)
|
||||
expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable():
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
result = idx.union(idx[:1])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_union_sort_other_incomparable_sort():
|
||||
# TODO decide on True behaviour
|
||||
# # sort=True
|
||||
idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
with pytest.raises(TypeError, match="Cannot compare"):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_disallow_true(method):
|
||||
idx1 = pd.MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
idx2 = pd.MultiIndex.from_product([["b", "c"], [1, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
@@ -0,0 +1,276 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning, UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_sortlevel(idx):
|
||||
import random
|
||||
|
||||
tuples = list(idx)
|
||||
random.shuffle(tuples)
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_not_sort_remaining():
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
|
||||
assert sorted_idx.equals(mi)
|
||||
|
||||
|
||||
def test_sortlevel_deterministic():
|
||||
tuples = [
|
||||
("bar", "one"),
|
||||
("foo", "two"),
|
||||
("qux", "two"),
|
||||
("foo", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sort(indices):
|
||||
with pytest.raises(TypeError):
|
||||
indices.sort()
|
||||
|
||||
|
||||
def test_numpy_argsort(idx):
|
||||
result = np.argsort(idx)
|
||||
expected = idx.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, kind="mergesort")
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, order=("a", "b"))
|
||||
|
||||
|
||||
def test_unsortedindex():
|
||||
# GH 11897
|
||||
mi = pd.MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
df = pd.DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc(axis=0)["z", "a"]
|
||||
expected = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
df.loc(axis=0)["z", slice("a")]
|
||||
df.sort_index(inplace=True)
|
||||
assert len(df.loc(axis=0)["z", :]) == 2
|
||||
|
||||
with pytest.raises(KeyError, match="'q'"):
|
||||
df.loc(axis=0)["q", :]
|
||||
|
||||
|
||||
def test_unsortedindex_doc_examples():
|
||||
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
|
||||
dfm = DataFrame(
|
||||
{"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
|
||||
)
|
||||
|
||||
dfm = dfm.set_index(["jim", "joe"])
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
dfm.loc[(1, "z")]
|
||||
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert not dfm.index.is_lexsorted()
|
||||
assert dfm.index.lexsort_depth == 1
|
||||
|
||||
# sort it
|
||||
dfm = dfm.sort_index()
|
||||
dfm.loc[(1, "z")]
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert dfm.index.is_lexsorted()
|
||||
assert dfm.index.lexsort_depth == 2
|
||||
|
||||
|
||||
def test_reconstruct_sort():
|
||||
|
||||
# starts off lexsorted & monotonic
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert mi.is_lexsorted()
|
||||
assert mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert recons.is_lexsorted()
|
||||
assert recons.is_monotonic
|
||||
assert mi is recons
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = pd.MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
assert not mi.is_lexsorted()
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_lexsorted()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex(
|
||||
levels=[["b", "d", "a"], [1, 2, 3]],
|
||||
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||||
names=["col1", "col2"],
|
||||
)
|
||||
assert not mi.is_lexsorted()
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_lexsorted()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
|
||||
def test_reconstruct_remove_unused():
|
||||
# xref to GH 2770
|
||||
df = DataFrame(
|
||||
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
|
||||
columns=["first", "second", "third"],
|
||||
)
|
||||
df2 = df.set_index(["first", "second"], drop=False)
|
||||
df2 = df2[df2["first"] != "deleteMe"]
|
||||
|
||||
# removed levels are there
|
||||
expected = MultiIndex(
|
||||
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
|
||||
codes=[[1, 2], [1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[["keepMe", "keepMeToo"], [2, 3]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index.remove_unused_levels()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# idempotent
|
||||
result2 = result.remove_unused_levels()
|
||||
tm.assert_index_equal(result2, expected)
|
||||
assert result2.is_(result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
|
||||
)
|
||||
def test_remove_unused_levels_large(first_type, second_type):
|
||||
# GH16556
|
||||
|
||||
# because tests should be deterministic (and this test in particular
|
||||
# checks that levels are removed, which is not the case for every
|
||||
# random input):
|
||||
rng = np.random.RandomState(4) # seed is arbitrary value that works
|
||||
|
||||
size = 1 << 16
|
||||
df = DataFrame(
|
||||
dict(
|
||||
first=rng.randint(0, 1 << 13, size).astype(first_type),
|
||||
second=rng.randint(0, 1 << 10, size).astype(second_type),
|
||||
third=rng.rand(size),
|
||||
)
|
||||
)
|
||||
df = df.groupby(["first", "second"]).sum()
|
||||
df = df[df.third < 0.1]
|
||||
|
||||
result = df.index.remove_unused_levels()
|
||||
assert len(result.levels[0]) < len(df.index.levels[0])
|
||||
assert len(result.levels[1]) < len(df.index.levels[1])
|
||||
assert result.equals(df.index)
|
||||
|
||||
expected = df.reset_index().set_index(["first", "second"]).index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
|
||||
@pytest.mark.parametrize(
|
||||
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
|
||||
)
|
||||
def test_remove_unused_nan(level0, level1):
|
||||
# GH 18417
|
||||
mi = pd.MultiIndex(
|
||||
levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]
|
||||
)
|
||||
|
||||
result = mi.remove_unused_levels()
|
||||
tm.assert_index_equal(result, mi)
|
||||
for level in 0, 1:
|
||||
assert "unused" not in result.levels[level]
|
||||
|
||||
|
||||
def test_argsort(idx):
|
||||
result = idx.argsort()
|
||||
expected = idx.values.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,121 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import PeriodIndex, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndexArithmetic:
|
||||
# ---------------------------------------------------------------
|
||||
# PeriodIndex.shift is used by __add__ and __sub__
|
||||
|
||||
def test_pi_shift_ndarray(self):
|
||||
idx = PeriodIndex(
|
||||
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
|
||||
)
|
||||
result = idx.shift(np.array([1, 2, 3, 4]))
|
||||
expected = PeriodIndex(
|
||||
["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.shift(np.array([1, -2, 3, -4]))
|
||||
expected = PeriodIndex(
|
||||
["2011-02", "2010-12", "NaT", "2010-12"], freq="M", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_shift(self):
|
||||
pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="A", start="1/1/2002", end="12/1/2010")
|
||||
|
||||
tm.assert_index_equal(pi1.shift(0), pi1)
|
||||
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(1), pi2)
|
||||
|
||||
pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="A", start="1/1/2000", end="12/1/2008")
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(-1), pi2)
|
||||
|
||||
pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="M", start="2/1/2001", end="1/1/2010")
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(1), pi2)
|
||||
|
||||
pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="M", start="12/1/2000", end="11/1/2009")
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(-1), pi2)
|
||||
|
||||
pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="D", start="1/2/2001", end="12/2/2009")
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(1), pi2)
|
||||
|
||||
pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009")
|
||||
pi2 = period_range(freq="D", start="12/31/2000", end="11/30/2009")
|
||||
assert len(pi1) == len(pi2)
|
||||
tm.assert_index_equal(pi1.shift(-1), pi2)
|
||||
|
||||
def test_shift_corner_cases(self):
|
||||
# GH#9903
|
||||
idx = pd.PeriodIndex([], name="xxx", freq="H")
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
# period shift doesn't accept freq
|
||||
idx.shift(1, freq="H")
|
||||
|
||||
tm.assert_index_equal(idx.shift(0), idx)
|
||||
tm.assert_index_equal(idx.shift(3), idx)
|
||||
|
||||
idx = pd.PeriodIndex(
|
||||
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
|
||||
name="xxx",
|
||||
freq="H",
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(0), idx)
|
||||
exp = pd.PeriodIndex(
|
||||
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
|
||||
name="xxx",
|
||||
freq="H",
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(3), exp)
|
||||
exp = pd.PeriodIndex(
|
||||
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
|
||||
name="xxx",
|
||||
freq="H",
|
||||
)
|
||||
tm.assert_index_equal(idx.shift(-3), exp)
|
||||
|
||||
def test_shift_nat(self):
|
||||
idx = PeriodIndex(
|
||||
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
|
||||
)
|
||||
result = idx.shift(1)
|
||||
expected = PeriodIndex(
|
||||
["2011-02", "2011-03", "NaT", "2011-05"], freq="M", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_shift_gh8083(self):
|
||||
# test shift for PeriodIndex
|
||||
# GH#8083
|
||||
drange = pd.period_range("20130101", periods=5, freq="D")
|
||||
result = drange.shift(1)
|
||||
expected = PeriodIndex(
|
||||
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_shift_periods(self):
|
||||
# GH #22458 : argument 'n' was deprecated in favor of 'periods'
|
||||
idx = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
tm.assert_index_equal(idx.shift(periods=0), idx)
|
||||
tm.assert_index_equal(idx.shift(0), idx)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
|
||||
tm.assert_index_equal(idx.shift(n=0), idx)
|
||||
@@ -0,0 +1,149 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, PeriodIndex, Series, period_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
def test_asfreq(self):
|
||||
pi1 = period_range(freq="A", start="1/1/2001", end="1/1/2001")
|
||||
pi2 = period_range(freq="Q", start="1/1/2001", end="1/1/2001")
|
||||
pi3 = period_range(freq="M", start="1/1/2001", end="1/1/2001")
|
||||
pi4 = period_range(freq="D", start="1/1/2001", end="1/1/2001")
|
||||
pi5 = period_range(freq="H", start="1/1/2001", end="1/1/2001 00:00")
|
||||
pi6 = period_range(freq="Min", start="1/1/2001", end="1/1/2001 00:00")
|
||||
pi7 = period_range(freq="S", start="1/1/2001", end="1/1/2001 00:00:00")
|
||||
|
||||
assert pi1.asfreq("Q", "S") == pi2
|
||||
assert pi1.asfreq("Q", "s") == pi2
|
||||
assert pi1.asfreq("M", "start") == pi3
|
||||
assert pi1.asfreq("D", "StarT") == pi4
|
||||
assert pi1.asfreq("H", "beGIN") == pi5
|
||||
assert pi1.asfreq("Min", "S") == pi6
|
||||
assert pi1.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi2.asfreq("A", "S") == pi1
|
||||
assert pi2.asfreq("M", "S") == pi3
|
||||
assert pi2.asfreq("D", "S") == pi4
|
||||
assert pi2.asfreq("H", "S") == pi5
|
||||
assert pi2.asfreq("Min", "S") == pi6
|
||||
assert pi2.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi3.asfreq("A", "S") == pi1
|
||||
assert pi3.asfreq("Q", "S") == pi2
|
||||
assert pi3.asfreq("D", "S") == pi4
|
||||
assert pi3.asfreq("H", "S") == pi5
|
||||
assert pi3.asfreq("Min", "S") == pi6
|
||||
assert pi3.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi4.asfreq("A", "S") == pi1
|
||||
assert pi4.asfreq("Q", "S") == pi2
|
||||
assert pi4.asfreq("M", "S") == pi3
|
||||
assert pi4.asfreq("H", "S") == pi5
|
||||
assert pi4.asfreq("Min", "S") == pi6
|
||||
assert pi4.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi5.asfreq("A", "S") == pi1
|
||||
assert pi5.asfreq("Q", "S") == pi2
|
||||
assert pi5.asfreq("M", "S") == pi3
|
||||
assert pi5.asfreq("D", "S") == pi4
|
||||
assert pi5.asfreq("Min", "S") == pi6
|
||||
assert pi5.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi6.asfreq("A", "S") == pi1
|
||||
assert pi6.asfreq("Q", "S") == pi2
|
||||
assert pi6.asfreq("M", "S") == pi3
|
||||
assert pi6.asfreq("D", "S") == pi4
|
||||
assert pi6.asfreq("H", "S") == pi5
|
||||
assert pi6.asfreq("S", "S") == pi7
|
||||
|
||||
assert pi7.asfreq("A", "S") == pi1
|
||||
assert pi7.asfreq("Q", "S") == pi2
|
||||
assert pi7.asfreq("M", "S") == pi3
|
||||
assert pi7.asfreq("D", "S") == pi4
|
||||
assert pi7.asfreq("H", "S") == pi5
|
||||
assert pi7.asfreq("Min", "S") == pi6
|
||||
|
||||
msg = "How must be one of S or E"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pi7.asfreq("T", "foo")
|
||||
result1 = pi1.asfreq("3M")
|
||||
result2 = pi1.asfreq("M")
|
||||
expected = period_range(freq="M", start="2001-12", end="2001-12")
|
||||
tm.assert_numpy_array_equal(result1.asi8, expected.asi8)
|
||||
assert result1.freqstr == "3M"
|
||||
tm.assert_numpy_array_equal(result2.asi8, expected.asi8)
|
||||
assert result2.freqstr == "M"
|
||||
|
||||
def test_asfreq_nat(self):
|
||||
idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-04"], freq="M")
|
||||
result = idx.asfreq(freq="Q")
|
||||
expected = PeriodIndex(["2011Q1", "2011Q1", "NaT", "2011Q2"], freq="Q")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "3D"])
|
||||
def test_asfreq_mult_pi(self, freq):
|
||||
pi = PeriodIndex(["2001-01", "2001-02", "NaT", "2001-03"], freq="2M")
|
||||
|
||||
result = pi.asfreq(freq)
|
||||
exp = PeriodIndex(["2001-02-28", "2001-03-31", "NaT", "2001-04-30"], freq=freq)
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == exp.freq
|
||||
|
||||
result = pi.asfreq(freq, how="S")
|
||||
exp = PeriodIndex(["2001-01-01", "2001-02-01", "NaT", "2001-03-01"], freq=freq)
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == exp.freq
|
||||
|
||||
def test_asfreq_combined_pi(self):
|
||||
pi = pd.PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H")
|
||||
exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="25H")
|
||||
for freq, how in zip(["1D1H", "1H1D"], ["S", "E"]):
|
||||
result = pi.asfreq(freq, how=how)
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == exp.freq
|
||||
|
||||
for freq in ["1D1H", "1H1D"]:
|
||||
pi = pd.PeriodIndex(
|
||||
["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq
|
||||
)
|
||||
result = pi.asfreq("H")
|
||||
exp = PeriodIndex(["2001-01-02 00:00", "2001-01-03 02:00", "NaT"], freq="H")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == exp.freq
|
||||
|
||||
pi = pd.PeriodIndex(
|
||||
["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq
|
||||
)
|
||||
result = pi.asfreq("H", how="S")
|
||||
exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == exp.freq
|
||||
|
||||
def test_asfreq_ts(self):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/31/2010")
|
||||
ts = Series(np.random.randn(len(index)), index=index)
|
||||
df = DataFrame(np.random.randn(len(index), 3), index=index)
|
||||
|
||||
result = ts.asfreq("D", how="end")
|
||||
df_result = df.asfreq("D", how="end")
|
||||
exp_index = index.asfreq("D", how="end")
|
||||
assert len(result) == len(ts)
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
tm.assert_index_equal(df_result.index, exp_index)
|
||||
|
||||
result = ts.asfreq("D", how="start")
|
||||
assert len(result) == len(ts)
|
||||
tm.assert_index_equal(result.index, index.asfreq("D", how="start"))
|
||||
|
||||
def test_astype_asfreq(self):
|
||||
pi1 = PeriodIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="D")
|
||||
exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M")
|
||||
tm.assert_index_equal(pi1.asfreq("M"), exp)
|
||||
tm.assert_index_equal(pi1.astype("period[M]"), exp)
|
||||
|
||||
exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="3M")
|
||||
tm.assert_index_equal(pi1.asfreq("3M"), exp)
|
||||
tm.assert_index_equal(pi1.astype("period[3M]"), exp)
|
||||
@@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Int64Index, NaT, Period, PeriodIndex, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndexAsType:
|
||||
@pytest.mark.parametrize("dtype", [float, "timedelta64", "timedelta64[ns]"])
|
||||
def test_astype_raises(self, dtype):
|
||||
# GH#13149, GH#13209
|
||||
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D")
|
||||
msg = "Cannot cast PeriodArray to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.astype(dtype)
|
||||
|
||||
def test_astype_conversion(self):
|
||||
# GH#13149, GH#13209
|
||||
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D")
|
||||
|
||||
result = idx.astype(object)
|
||||
expected = Index(
|
||||
[Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3,
|
||||
dtype="object",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(np.int64)
|
||||
expected = Int64Index([16937] + [-9223372036854775808] * 3, dtype=np.int64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(str)
|
||||
expected = Index(str(x) for x in idx)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
idx = period_range("1990", "2009", freq="A")
|
||||
result = idx.astype("i8")
|
||||
tm.assert_index_equal(result, Index(idx.asi8))
|
||||
tm.assert_numpy_array_equal(result.values, idx.asi8)
|
||||
|
||||
def test_astype_uint(self):
|
||||
arr = period_range("2000", periods=2)
|
||||
expected = pd.UInt64Index(np.array([10957, 10958], dtype="uint64"))
|
||||
tm.assert_index_equal(arr.astype("uint64"), expected)
|
||||
tm.assert_index_equal(arr.astype("uint32"), expected)
|
||||
|
||||
def test_astype_object(self):
|
||||
idx = pd.PeriodIndex([], freq="M")
|
||||
|
||||
exp = np.array([], dtype=object)
|
||||
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
|
||||
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
|
||||
|
||||
exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object)
|
||||
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
|
||||
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
|
||||
|
||||
exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object)
|
||||
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
|
||||
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
|
||||
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
|
||||
|
||||
# TODO: de-duplicate this version (from test_ops) with the one above
|
||||
# (from test_period)
|
||||
def test_astype_object2(self):
|
||||
idx = pd.period_range(start="2013-01-01", periods=4, freq="M", name="idx")
|
||||
expected_list = [
|
||||
pd.Period("2013-01-31", freq="M"),
|
||||
pd.Period("2013-02-28", freq="M"),
|
||||
pd.Period("2013-03-31", freq="M"),
|
||||
pd.Period("2013-04-30", freq="M"),
|
||||
]
|
||||
expected = pd.Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
assert isinstance(result, Index)
|
||||
assert result.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
idx = PeriodIndex(
|
||||
["2013-01-01", "2013-01-02", "NaT", "2013-01-04"], freq="D", name="idx"
|
||||
)
|
||||
expected_list = [
|
||||
pd.Period("2013-01-01", freq="D"),
|
||||
pd.Period("2013-01-02", freq="D"),
|
||||
pd.Period("NaT", freq="D"),
|
||||
pd.Period("2013-01-04", freq="D"),
|
||||
]
|
||||
expected = pd.Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
assert isinstance(result, Index)
|
||||
assert result.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
for i in [0, 1, 3]:
|
||||
assert result[i] == expected[i]
|
||||
assert result[2] is pd.NaT
|
||||
assert result.name == expected.name
|
||||
|
||||
result_list = idx.tolist()
|
||||
for i in [0, 1, 3]:
|
||||
assert result_list[i] == expected_list[i]
|
||||
assert result_list[2] is pd.NaT
|
||||
|
||||
def test_astype_category(self):
|
||||
obj = pd.period_range("2000", periods=2)
|
||||
result = obj.astype("category")
|
||||
expected = pd.CategoricalIndex(
|
||||
[pd.Period("2000-01-01", freq="D"), pd.Period("2000-01-02", freq="D")]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype("category")
|
||||
expected = expected.values
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_astype_array_fallback(self):
|
||||
obj = pd.period_range("2000", periods=2)
|
||||
result = obj.astype(bool)
|
||||
expected = pd.Index(np.array([True, True]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype(bool)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,562 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Period, PeriodIndex, Series, date_range, offsets, period_range
|
||||
import pandas.core.indexes.period as period
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
def setup_method(self, method):
|
||||
pass
|
||||
|
||||
def test_construction_base_constructor(self):
|
||||
# GH 13664
|
||||
arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="M")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr)))
|
||||
|
||||
arr = [np.nan, pd.NaT, pd.Period("2011-03", freq="M")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr)))
|
||||
|
||||
arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="D")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))
|
||||
|
||||
tm.assert_index_equal(
|
||||
pd.Index(np.array(arr)), pd.Index(np.array(arr), dtype=object)
|
||||
)
|
||||
|
||||
def test_constructor_use_start_freq(self):
|
||||
# GH #1118
|
||||
p = Period("4/2/2012", freq="B")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
index = PeriodIndex(start=p, periods=10)
|
||||
expected = period_range(start="4/2/2012", periods=10, freq="B")
|
||||
tm.assert_index_equal(index, expected)
|
||||
|
||||
index = period_range(start=p, periods=10)
|
||||
tm.assert_index_equal(index, expected)
|
||||
|
||||
def test_constructor_field_arrays(self):
|
||||
# GH #1264
|
||||
|
||||
years = np.arange(1990, 2010).repeat(4)[2:-2]
|
||||
quarters = np.tile(np.arange(1, 5), 20)[2:-2]
|
||||
|
||||
index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC")
|
||||
expected = period_range("1990Q3", "2009Q2", freq="Q-DEC")
|
||||
tm.assert_index_equal(index, expected)
|
||||
|
||||
index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC")
|
||||
tm.assert_numpy_array_equal(index.asi8, index2.asi8)
|
||||
|
||||
index = PeriodIndex(year=years, quarter=quarters)
|
||||
tm.assert_index_equal(index, expected)
|
||||
|
||||
years = [2007, 2007, 2007]
|
||||
months = [1, 2]
|
||||
|
||||
msg = "Mismatched Period array lengths"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(year=years, month=months, freq="M")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(year=years, month=months, freq="2M")
|
||||
|
||||
msg = "Can either instantiate from fields or endpoints, but not both"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(
|
||||
year=years, month=months, freq="M", start=Period("2007-01", freq="M")
|
||||
)
|
||||
|
||||
years = [2007, 2007, 2007]
|
||||
months = [1, 2, 3]
|
||||
idx = PeriodIndex(year=years, month=months, freq="M")
|
||||
exp = period_range("2007-01", periods=3, freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
def test_constructor_U(self):
|
||||
# U was used as undefined period
|
||||
with pytest.raises(ValueError, match="Invalid frequency: X"):
|
||||
period_range("2007-1-1", periods=500, freq="X")
|
||||
|
||||
def test_constructor_nano(self):
|
||||
idx = period_range(
|
||||
start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N"
|
||||
)
|
||||
exp = PeriodIndex(
|
||||
[
|
||||
Period(ordinal=1, freq="N"),
|
||||
Period(ordinal=2, freq="N"),
|
||||
Period(ordinal=3, freq="N"),
|
||||
Period(ordinal=4, freq="N"),
|
||||
],
|
||||
freq="N",
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
def test_constructor_arrays_negative_year(self):
|
||||
years = np.arange(1960, 2000, dtype=np.int64).repeat(4)
|
||||
quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40)
|
||||
|
||||
pindex = PeriodIndex(year=years, quarter=quarters)
|
||||
|
||||
tm.assert_index_equal(pindex.year, pd.Index(years))
|
||||
tm.assert_index_equal(pindex.quarter, pd.Index(quarters))
|
||||
|
||||
def test_constructor_invalid_quarters(self):
|
||||
msg = "Quarter must be 1 <= q <= 4"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC")
|
||||
|
||||
def test_constructor_corner(self):
|
||||
msg = "Not enough parameters to construct Period range"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(periods=10, freq="A")
|
||||
|
||||
start = Period("2007", freq="A-JUN")
|
||||
end = Period("2010", freq="A-DEC")
|
||||
|
||||
msg = "start and end must have same freq"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(start=start, end=end)
|
||||
|
||||
msg = (
|
||||
"Of the three parameters: start, end, and periods, exactly two"
|
||||
" must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(start=start)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(end=end)
|
||||
|
||||
result = period_range("2007-01", periods=10.5, freq="M")
|
||||
exp = period_range("2007-01", periods=10, freq="M")
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_constructor_fromarraylike(self):
|
||||
idx = period_range("2007-01", periods=20, freq="M")
|
||||
|
||||
# values is an array of Period, thus can retrieve freq
|
||||
tm.assert_index_equal(PeriodIndex(idx.values), idx)
|
||||
tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
|
||||
|
||||
msg = "freq not specified and cannot be inferred"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(idx._ndarray_values)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(list(idx._ndarray_values))
|
||||
|
||||
msg = "'Period' object is not iterable"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
PeriodIndex(data=Period("2007", freq="A"))
|
||||
|
||||
result = PeriodIndex(iter(idx))
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
result = PeriodIndex(idx)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
result = PeriodIndex(idx, freq="M")
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
result = PeriodIndex(idx, freq=offsets.MonthEnd())
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result.freq == "M"
|
||||
|
||||
result = PeriodIndex(idx, freq="2M")
|
||||
tm.assert_index_equal(result, idx.asfreq("2M"))
|
||||
assert result.freq == "2M"
|
||||
|
||||
result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
|
||||
tm.assert_index_equal(result, idx.asfreq("2M"))
|
||||
assert result.freq == "2M"
|
||||
|
||||
result = PeriodIndex(idx, freq="D")
|
||||
exp = idx.asfreq("D", "e")
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_constructor_datetime64arr(self):
|
||||
vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64)
|
||||
vals = vals.view(np.dtype("M8[us]"))
|
||||
|
||||
msg = r"Wrong dtype: datetime64\[us\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(vals, freq="D")
|
||||
|
||||
@pytest.mark.parametrize("box", [None, "series", "index"])
|
||||
def test_constructor_datetime64arr_ok(self, box):
|
||||
# https://github.com/pandas-dev/pandas/issues/23438
|
||||
data = pd.date_range("2017", periods=4, freq="M")
|
||||
if box is None:
|
||||
data = data._values
|
||||
elif box == "series":
|
||||
data = pd.Series(data)
|
||||
|
||||
result = PeriodIndex(data, freq="D")
|
||||
expected = PeriodIndex(
|
||||
["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_dtype(self):
|
||||
# passing a dtype with a tz should localize
|
||||
idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]")
|
||||
exp = PeriodIndex(["2013-01", "2013-03"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
assert idx.dtype == "period[M]"
|
||||
|
||||
idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]")
|
||||
exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
assert idx.dtype == "period[3D]"
|
||||
|
||||
# if we already have a freq and its not the same, then asfreq
|
||||
# (not changed)
|
||||
idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D")
|
||||
|
||||
res = PeriodIndex(idx, dtype="period[M]")
|
||||
exp = PeriodIndex(["2013-01", "2013-01"], freq="M")
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.dtype == "period[M]"
|
||||
|
||||
res = PeriodIndex(idx, freq="M")
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.dtype == "period[M]"
|
||||
|
||||
msg = "specified freq and dtype are different"
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(["2011-01"], freq="M", dtype="period[D]")
|
||||
|
||||
def test_constructor_empty(self):
|
||||
idx = pd.PeriodIndex([], freq="M")
|
||||
assert isinstance(idx, PeriodIndex)
|
||||
assert len(idx) == 0
|
||||
assert idx.freq == "M"
|
||||
|
||||
with pytest.raises(ValueError, match="freq not specified"):
|
||||
pd.PeriodIndex([])
|
||||
|
||||
def test_constructor_pi_nat(self):
|
||||
idx = PeriodIndex(
|
||||
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")]
|
||||
)
|
||||
exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex(
|
||||
np.array([Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")])
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex(
|
||||
[pd.NaT, pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")]
|
||||
)
|
||||
exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex(
|
||||
np.array(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
Period("2011-01", freq="M"),
|
||||
Period("2011-01", freq="M"),
|
||||
]
|
||||
)
|
||||
)
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex([pd.NaT, pd.NaT, "2011-01", "2011-01"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
with pytest.raises(ValueError, match="freq not specified"):
|
||||
PeriodIndex([pd.NaT, pd.NaT])
|
||||
|
||||
with pytest.raises(ValueError, match="freq not specified"):
|
||||
PeriodIndex(np.array([pd.NaT, pd.NaT]))
|
||||
|
||||
with pytest.raises(ValueError, match="freq not specified"):
|
||||
PeriodIndex(["NaT", "NaT"])
|
||||
|
||||
with pytest.raises(ValueError, match="freq not specified"):
|
||||
PeriodIndex(np.array(["NaT", "NaT"]))
|
||||
|
||||
def test_constructor_incompat_freq(self):
|
||||
msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
|
||||
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(
|
||||
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")]
|
||||
)
|
||||
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(
|
||||
np.array(
|
||||
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")]
|
||||
)
|
||||
)
|
||||
|
||||
# first element is pd.NaT
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(
|
||||
[pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]
|
||||
)
|
||||
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(
|
||||
np.array(
|
||||
[pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]
|
||||
)
|
||||
)
|
||||
|
||||
def test_constructor_mixed(self):
|
||||
idx = PeriodIndex(["2011-01", pd.NaT, Period("2011-01", freq="M")])
|
||||
exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex(["NaT", pd.NaT, Period("2011-01", freq="M")])
|
||||
exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
idx = PeriodIndex([Period("2011-01-01", freq="D"), pd.NaT, "2012-01-01"])
|
||||
exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D")
|
||||
tm.assert_index_equal(idx, exp)
|
||||
|
||||
def test_constructor_simple_new(self):
|
||||
idx = period_range("2007-01", name="p", periods=2, freq="M")
|
||||
result = idx._simple_new(idx, name="p", freq=idx.freq)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
result = idx._simple_new(idx.astype("i8"), name="p", freq=idx.freq)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
def test_constructor_simple_new_empty(self):
|
||||
# GH13079
|
||||
idx = PeriodIndex([], freq="M", name="p")
|
||||
result = idx._simple_new(idx, name="p", freq="M")
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
@pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])])
|
||||
def test_constructor_floats(self, floats):
|
||||
msg = r"PeriodIndex\._simple_new does not accept floats"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.PeriodIndex._simple_new(floats, freq="M")
|
||||
|
||||
msg = "PeriodIndex does not allow floating point in construction"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.PeriodIndex(floats, freq="M")
|
||||
|
||||
def test_constructor_nat(self):
|
||||
msg = "start and end must not be NaT"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start="NaT", end="2011-01-01", freq="M")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start="2011-01-01", end="NaT", freq="M")
|
||||
|
||||
def test_constructor_year_and_quarter(self):
|
||||
year = pd.Series([2001, 2002, 2003])
|
||||
quarter = year - 2000
|
||||
idx = PeriodIndex(year=year, quarter=quarter)
|
||||
strs = ["{t[0]:d}Q{t[1]:d}".format(t=t) for t in zip(quarter, year)]
|
||||
lops = list(map(Period, strs))
|
||||
p = PeriodIndex(lops)
|
||||
tm.assert_index_equal(p, idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, warning", [(PeriodIndex, FutureWarning), (period_range, None)]
|
||||
)
|
||||
def test_constructor_freq_mult(self, func, warning):
|
||||
# GH #7811
|
||||
with tm.assert_produces_warning(warning):
|
||||
# must be the same, but for sure...
|
||||
pidx = func(start="2014-01", freq="2M", periods=4)
|
||||
expected = PeriodIndex(["2014-01", "2014-03", "2014-05", "2014-07"], freq="2M")
|
||||
tm.assert_index_equal(pidx, expected)
|
||||
|
||||
with tm.assert_produces_warning(warning):
|
||||
pidx = func(start="2014-01-02", end="2014-01-15", freq="3D")
|
||||
expected = PeriodIndex(
|
||||
["2014-01-02", "2014-01-05", "2014-01-08", "2014-01-11", "2014-01-14"],
|
||||
freq="3D",
|
||||
)
|
||||
tm.assert_index_equal(pidx, expected)
|
||||
|
||||
with tm.assert_produces_warning(warning):
|
||||
pidx = func(end="2014-01-01 17:00", freq="4H", periods=3)
|
||||
expected = PeriodIndex(
|
||||
["2014-01-01 09:00", "2014-01-01 13:00", "2014-01-01 17:00"], freq="4H"
|
||||
)
|
||||
tm.assert_index_equal(pidx, expected)
|
||||
|
||||
msg = "Frequency must be positive, because it represents span: -1M"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(["2011-01"], freq="-1M")
|
||||
|
||||
msg = "Frequency must be positive, because it represents span: 0M"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(["2011-01"], freq="0M")
|
||||
|
||||
msg = "Frequency must be positive, because it represents span: 0M"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range("2011-01", periods=3, freq="0M")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["A", "M", "D", "T", "S"])
|
||||
@pytest.mark.parametrize("mult", [1, 2, 3, 4, 5])
|
||||
def test_constructor_freq_mult_dti_compat(self, mult, freq):
|
||||
freqstr = str(mult) + freq
|
||||
pidx = period_range(start="2014-04-01", freq=freqstr, periods=10)
|
||||
expected = date_range(start="2014-04-01", freq=freqstr, periods=10).to_period(
|
||||
freqstr
|
||||
)
|
||||
tm.assert_index_equal(pidx, expected)
|
||||
|
||||
def test_constructor_freq_combined(self):
|
||||
for freq in ["1D1H", "1H1D"]:
|
||||
pidx = PeriodIndex(["2016-01-01", "2016-01-02"], freq=freq)
|
||||
expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 00:00"], freq="25H")
|
||||
for freq in ["1D1H", "1H1D"]:
|
||||
pidx = period_range(start="2016-01-01", periods=2, freq=freq)
|
||||
expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 01:00"], freq="25H")
|
||||
tm.assert_index_equal(pidx, expected)
|
||||
|
||||
def test_constructor_range_based_deprecated(self):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pi = PeriodIndex(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 9
|
||||
|
||||
def test_constructor_range_based_deprecated_different_freq(self):
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
PeriodIndex(start="2000", periods=2)
|
||||
|
||||
warning, = m
|
||||
assert 'freq="A-DEC"' in str(warning.message)
|
||||
|
||||
def test_constructor(self):
|
||||
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 9
|
||||
|
||||
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 4 * 9
|
||||
|
||||
pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 12 * 9
|
||||
|
||||
pi = period_range(freq="D", start="1/1/2001", end="12/31/2009")
|
||||
assert len(pi) == 365 * 9 + 2
|
||||
|
||||
pi = period_range(freq="B", start="1/1/2001", end="12/31/2009")
|
||||
assert len(pi) == 261 * 9
|
||||
|
||||
pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00")
|
||||
assert len(pi) == 365 * 24
|
||||
|
||||
pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59")
|
||||
assert len(pi) == 24 * 60
|
||||
|
||||
pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59")
|
||||
assert len(pi) == 24 * 60 * 60
|
||||
|
||||
start = Period("02-Apr-2005", "B")
|
||||
i1 = period_range(start=start, periods=20)
|
||||
assert len(i1) == 20
|
||||
assert i1.freq == start.freq
|
||||
assert i1[0] == start
|
||||
|
||||
end_intv = Period("2006-12-31", "W")
|
||||
i1 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == 10
|
||||
assert i1.freq == end_intv.freq
|
||||
assert i1[-1] == end_intv
|
||||
|
||||
end_intv = Period("2006-12-31", "1w")
|
||||
i2 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == len(i2)
|
||||
assert (i1 == i2).all()
|
||||
assert i1.freq == i2.freq
|
||||
|
||||
end_intv = Period("2006-12-31", ("w", 1))
|
||||
i2 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == len(i2)
|
||||
assert (i1 == i2).all()
|
||||
assert i1.freq == i2.freq
|
||||
|
||||
end_intv = Period("2005-05-01", "B")
|
||||
i1 = period_range(start=start, end=end_intv)
|
||||
|
||||
# infer freq from first element
|
||||
i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
|
||||
assert len(i2) == 2
|
||||
assert i2[0] == end_intv
|
||||
|
||||
i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
|
||||
assert len(i2) == 2
|
||||
assert i2[0] == end_intv
|
||||
|
||||
# Mixed freq should fail
|
||||
vals = [end_intv, Period("2006-12-31", "w")]
|
||||
msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(vals)
|
||||
vals = np.array(vals)
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(vals)
|
||||
|
||||
def test_constructor_error(self):
|
||||
start = Period("02-Apr-2005", "B")
|
||||
end_intv = Period("2006-12-31", ("w", 1))
|
||||
|
||||
msg = "start and end must have same freq"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(start=start, end=end_intv)
|
||||
|
||||
msg = (
|
||||
"Of the three parameters: start, end, and periods, "
|
||||
"exactly two must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(start=start)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq", ["M", "Q", "A", "D", "B", "T", "S", "L", "U", "N", "H"]
|
||||
)
|
||||
def test_recreate_from_data(self, freq):
|
||||
org = period_range(start="2001/04/01", freq=freq, periods=1)
|
||||
idx = PeriodIndex(org.values, freq=freq)
|
||||
tm.assert_index_equal(idx, org)
|
||||
|
||||
def test_map_with_string_constructor(self):
|
||||
raw = [2005, 2007, 2009]
|
||||
index = PeriodIndex(raw, freq="A")
|
||||
|
||||
expected = Index([str(num) for num in raw])
|
||||
res = index.map(str)
|
||||
|
||||
# should return an Index
|
||||
assert isinstance(res, Index)
|
||||
|
||||
# preserve element types
|
||||
assert all(isinstance(resi, str) for resi in res)
|
||||
|
||||
# lastly, values should compare equal
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
|
||||
class TestSeriesPeriod:
|
||||
def setup_method(self, method):
|
||||
self.series = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
def test_constructor_cant_cast_period(self):
|
||||
msg = "Cannot cast PeriodArray to dtype float64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Series(period_range("2000-01-01", periods=10, freq="D"), dtype=float)
|
||||
|
||||
def test_constructor_cast_object(self):
|
||||
s = Series(period_range("1/1/2000", periods=10), dtype=PeriodDtype("D"))
|
||||
exp = Series(period_range("1/1/2000", periods=10))
|
||||
tm.assert_series_equal(s, exp)
|
||||
@@ -0,0 +1,211 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import PeriodIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_to_native_types():
|
||||
index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D")
|
||||
|
||||
# First, with no arguments.
|
||||
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype="=U10")
|
||||
|
||||
result = index.to_native_types()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# No NaN values, so na_rep has no effect
|
||||
result = index.to_native_types(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Make sure slicing works
|
||||
expected = np.array(["2017-01-01", "2017-01-03"], dtype="=U10")
|
||||
|
||||
result = index.to_native_types([0, 2])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Make sure date formatting works
|
||||
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype="=U10")
|
||||
|
||||
result = index.to_native_types(date_format="%m-%Y-%d")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# NULL object handling should work
|
||||
index = PeriodIndex(["2017-01-01", pd.NaT, "2017-01-03"], freq="D")
|
||||
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index.to_native_types(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestPeriodIndexRendering:
|
||||
def test_frame_repr(self):
|
||||
df = pd.DataFrame({"A": [1, 2, 3]}, index=pd.date_range("2000", periods=3))
|
||||
result = repr(df)
|
||||
expected = " A\n2000-01-01 1\n2000-01-02 2\n2000-01-03 3"
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
|
||||
def test_representation(self, method):
|
||||
# GH#7601
|
||||
idx1 = PeriodIndex([], freq="D")
|
||||
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
||||
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
|
||||
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
|
||||
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
||||
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
||||
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
||||
idx10 = PeriodIndex(["2011-01-01", "2011-02-01"], freq="3D")
|
||||
|
||||
exp1 = "PeriodIndex([], dtype='period[D]', freq='D')"
|
||||
|
||||
exp2 = "PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')"
|
||||
|
||||
exp3 = "PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', freq='D')"
|
||||
|
||||
exp4 = (
|
||||
"PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
|
||||
"dtype='period[D]', freq='D')"
|
||||
)
|
||||
|
||||
exp5 = (
|
||||
"PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', "
|
||||
"freq='A-DEC')"
|
||||
)
|
||||
|
||||
exp6 = (
|
||||
"PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], "
|
||||
"dtype='period[H]', freq='H')"
|
||||
)
|
||||
|
||||
exp7 = "PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', freq='Q-DEC')"
|
||||
|
||||
exp8 = "PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', freq='Q-DEC')"
|
||||
|
||||
exp9 = (
|
||||
"PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], "
|
||||
"dtype='period[Q-DEC]', freq='Q-DEC')"
|
||||
)
|
||||
|
||||
exp10 = (
|
||||
"PeriodIndex(['2011-01-01', '2011-02-01'], "
|
||||
"dtype='period[3D]', freq='3D')"
|
||||
)
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10],
|
||||
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10],
|
||||
):
|
||||
result = getattr(idx, method)()
|
||||
assert result == expected
|
||||
|
||||
def test_representation_to_series(self):
|
||||
# GH#10971
|
||||
idx1 = PeriodIndex([], freq="D")
|
||||
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
||||
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
|
||||
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
|
||||
|
||||
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
||||
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
||||
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
||||
|
||||
exp1 = """Series([], dtype: period[D])"""
|
||||
|
||||
exp2 = """0 2011-01-01
|
||||
dtype: period[D]"""
|
||||
|
||||
exp3 = """0 2011-01-01
|
||||
1 2011-01-02
|
||||
dtype: period[D]"""
|
||||
|
||||
exp4 = """0 2011-01-01
|
||||
1 2011-01-02
|
||||
2 2011-01-03
|
||||
dtype: period[D]"""
|
||||
|
||||
exp5 = """0 2011
|
||||
1 2012
|
||||
2 2013
|
||||
dtype: period[A-DEC]"""
|
||||
|
||||
exp6 = """0 2011-01-01 09:00
|
||||
1 2012-02-01 10:00
|
||||
2 NaT
|
||||
dtype: period[H]"""
|
||||
|
||||
exp7 = """0 2013Q1
|
||||
dtype: period[Q-DEC]"""
|
||||
|
||||
exp8 = """0 2013Q1
|
||||
1 2013Q2
|
||||
dtype: period[Q-DEC]"""
|
||||
|
||||
exp9 = """0 2013Q1
|
||||
1 2013Q2
|
||||
2 2013Q3
|
||||
dtype: period[Q-DEC]"""
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
|
||||
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
|
||||
):
|
||||
result = repr(pd.Series(idx))
|
||||
assert result == expected
|
||||
|
||||
def test_summary(self):
|
||||
# GH#9116
|
||||
idx1 = PeriodIndex([], freq="D")
|
||||
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
||||
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
|
||||
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
|
||||
|
||||
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
||||
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
||||
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
||||
|
||||
exp1 = """PeriodIndex: 0 entries
|
||||
Freq: D"""
|
||||
|
||||
exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01
|
||||
Freq: D"""
|
||||
|
||||
exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02
|
||||
Freq: D"""
|
||||
|
||||
exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03
|
||||
Freq: D"""
|
||||
|
||||
exp5 = """PeriodIndex: 3 entries, 2011 to 2013
|
||||
Freq: A-DEC"""
|
||||
|
||||
exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT
|
||||
Freq: H"""
|
||||
|
||||
exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1
|
||||
Freq: Q-DEC"""
|
||||
|
||||
exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2
|
||||
Freq: Q-DEC"""
|
||||
|
||||
exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3
|
||||
Freq: Q-DEC"""
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
|
||||
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
|
||||
):
|
||||
result = idx._summary()
|
||||
assert result == expected
|
||||
@@ -0,0 +1,687 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import period as libperiod
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Period, PeriodIndex, Series, notna, period_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_ellipsis(self):
|
||||
# GH#21282
|
||||
idx = period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
|
||||
result = idx[...]
|
||||
assert result.equals(idx)
|
||||
assert result is not idx
|
||||
|
||||
def test_getitem(self):
|
||||
idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
|
||||
for idx in [idx1]:
|
||||
result = idx[0]
|
||||
assert result == pd.Period("2011-01-01", freq="D")
|
||||
|
||||
result = idx[-1]
|
||||
assert result == pd.Period("2011-01-31", freq="D")
|
||||
|
||||
result = idx[0:5]
|
||||
expected = pd.period_range("2011-01-01", "2011-01-05", freq="D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx[0:10:2]
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-07", "2011-01-09"],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx[-20:-5:3]
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-01-12", "2011-01-15", "2011-01-18", "2011-01-21", "2011-01-24"],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx[4::-1]
|
||||
expected = PeriodIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
def test_getitem_index(self):
|
||||
idx = period_range("2007-01", periods=10, freq="M", name="x")
|
||||
|
||||
result = idx[[1, 3, 5]]
|
||||
exp = pd.PeriodIndex(["2007-02", "2007-04", "2007-06"], freq="M", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = idx[[True, True, False, False, False, True, True, False, False, False]]
|
||||
exp = pd.PeriodIndex(
|
||||
["2007-01", "2007-02", "2007-06", "2007-07"], freq="M", name="x"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_getitem_partial(self):
|
||||
rng = period_range("2007-01", periods=50, freq="M")
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'2006'$"):
|
||||
ts["2006"]
|
||||
|
||||
result = ts["2008"]
|
||||
assert (result.index.year == 2008).all()
|
||||
|
||||
result = ts["2008":"2009"]
|
||||
assert len(result) == 24
|
||||
|
||||
result = ts["2008-1":"2009-12"]
|
||||
assert len(result) == 24
|
||||
|
||||
result = ts["2008Q1":"2009Q4"]
|
||||
assert len(result) == 24
|
||||
|
||||
result = ts[:"2009"]
|
||||
assert len(result) == 36
|
||||
|
||||
result = ts["2009":]
|
||||
assert len(result) == 50 - 24
|
||||
|
||||
exp = result
|
||||
result = ts[24:]
|
||||
tm.assert_series_equal(exp, result)
|
||||
|
||||
ts = ts[10:].append(ts[10:])
|
||||
msg = "left slice bound for non-unique label: '2008'"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts[slice("2008", "2009")]
|
||||
|
||||
def test_getitem_datetime(self):
|
||||
rng = period_range(start="2012-01-01", periods=10, freq="W-MON")
|
||||
ts = Series(range(len(rng)), index=rng)
|
||||
|
||||
dt1 = datetime(2011, 10, 2)
|
||||
dt4 = datetime(2012, 4, 20)
|
||||
|
||||
rs = ts[dt1:dt4]
|
||||
tm.assert_series_equal(rs, ts)
|
||||
|
||||
def test_getitem_nat(self):
|
||||
idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M")
|
||||
assert idx[0] == pd.Period("2011-01", freq="M")
|
||||
assert idx[1] is pd.NaT
|
||||
|
||||
s = pd.Series([0, 1, 2], index=idx)
|
||||
assert s[pd.NaT] == 1
|
||||
|
||||
s = pd.Series(idx, index=idx)
|
||||
assert s[pd.Period("2011-01", freq="M")] == pd.Period("2011-01", freq="M")
|
||||
assert s[pd.NaT] is pd.NaT
|
||||
|
||||
def test_getitem_list_periods(self):
|
||||
# GH 7710
|
||||
rng = period_range(start="2012-01-01", periods=10, freq="D")
|
||||
ts = Series(range(len(rng)), index=rng)
|
||||
exp = ts.iloc[[1]]
|
||||
tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp)
|
||||
|
||||
def test_getitem_seconds(self):
|
||||
# GH#6716
|
||||
didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
|
||||
pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
|
||||
|
||||
for idx in [didx, pidx]:
|
||||
# getitem against index should raise ValueError
|
||||
values = [
|
||||
"2014",
|
||||
"2013/02",
|
||||
"2013/01/02",
|
||||
"2013/02/01 9H",
|
||||
"2013/02/01 09:00",
|
||||
]
|
||||
for v in values:
|
||||
# GH7116
|
||||
# these show deprecations as we are trying
|
||||
# to slice with non-integer indexers
|
||||
# with pytest.raises(IndexError):
|
||||
# idx[v]
|
||||
continue
|
||||
|
||||
s = Series(np.random.rand(len(idx)), index=idx)
|
||||
tm.assert_series_equal(s["2013/01/01 10:00"], s[3600:3660])
|
||||
tm.assert_series_equal(s["2013/01/01 9H"], s[:3600])
|
||||
for d in ["2013/01/01", "2013/01", "2013"]:
|
||||
tm.assert_series_equal(s[d], s)
|
||||
|
||||
def test_getitem_day(self):
|
||||
# GH#6716
|
||||
# Confirm DatetimeIndex and PeriodIndex works identically
|
||||
didx = pd.date_range(start="2013/01/01", freq="D", periods=400)
|
||||
pidx = period_range(start="2013/01/01", freq="D", periods=400)
|
||||
|
||||
for idx in [didx, pidx]:
|
||||
# getitem against index should raise ValueError
|
||||
values = [
|
||||
"2014",
|
||||
"2013/02",
|
||||
"2013/01/02",
|
||||
"2013/02/01 9H",
|
||||
"2013/02/01 09:00",
|
||||
]
|
||||
for v in values:
|
||||
|
||||
# GH7116
|
||||
# these show deprecations as we are trying
|
||||
# to slice with non-integer indexers
|
||||
# with pytest.raises(IndexError):
|
||||
# idx[v]
|
||||
continue
|
||||
|
||||
s = Series(np.random.rand(len(idx)), index=idx)
|
||||
tm.assert_series_equal(s["2013/01"], s[0:31])
|
||||
tm.assert_series_equal(s["2013/02"], s[31:59])
|
||||
tm.assert_series_equal(s["2014"], s[365:])
|
||||
|
||||
invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
|
||||
for v in invalid:
|
||||
with pytest.raises(KeyError, match=v):
|
||||
s[v]
|
||||
|
||||
|
||||
class TestWhere:
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where(self, klass):
|
||||
i = period_range("20130101", periods=5, freq="D")
|
||||
cond = [True] * len(i)
|
||||
expected = i
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * (len(i) - 1)
|
||||
expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq="D")
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_other(self):
|
||||
i = period_range("20130101", periods=5, freq="D")
|
||||
for arr in [np.nan, pd.NaT]:
|
||||
result = i.where(notna(i), other=np.nan)
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D")
|
||||
result = i.where(notna(i2), i2)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D")
|
||||
result = i.where(notna(i2), i2.values)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take(self):
|
||||
# GH#10295
|
||||
idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
|
||||
for idx in [idx1]:
|
||||
result = idx.take([0])
|
||||
assert result == pd.Period("2011-01-01", freq="D")
|
||||
|
||||
result = idx.take([5])
|
||||
assert result == pd.Period("2011-01-06", freq="D")
|
||||
|
||||
result = idx.take([0, 1, 2])
|
||||
expected = pd.period_range("2011-01-01", "2011-01-03", freq="D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == "D"
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([0, 2, 4])
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05"], freq="D", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx.take([7, 4, 1])
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-01-08", "2011-01-05", "2011-01-02"], freq="D", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx.take([3, 2, 5])
|
||||
expected = PeriodIndex(
|
||||
["2011-01-04", "2011-01-03", "2011-01-06"], freq="D", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
result = idx.take([-3, 2, 5])
|
||||
expected = PeriodIndex(
|
||||
["2011-01-29", "2011-01-03", "2011-01-06"], freq="D", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
assert result.freq == "D"
|
||||
|
||||
def test_take_misc(self):
|
||||
index = period_range(start="1/1/10", end="12/31/12", freq="D", name="idx")
|
||||
expected = PeriodIndex(
|
||||
[
|
||||
datetime(2010, 1, 6),
|
||||
datetime(2010, 1, 7),
|
||||
datetime(2010, 1, 9),
|
||||
datetime(2010, 1, 13),
|
||||
],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
|
||||
taken1 = index.take([5, 6, 8, 12])
|
||||
taken2 = index[[5, 6, 8, 12]]
|
||||
|
||||
for taken in [taken1, taken2]:
|
||||
tm.assert_index_equal(taken, expected)
|
||||
assert isinstance(taken, PeriodIndex)
|
||||
assert taken.freq == index.freq
|
||||
assert taken.name == expected.name
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# GH#12631
|
||||
idx = pd.PeriodIndex(
|
||||
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", freq="D"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-02-01", "2011-01-01", "NaT"], name="xxx", freq="D"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.PeriodIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
class TestIndexing:
|
||||
def test_get_loc_msg(self):
|
||||
idx = period_range("2000-1-1", freq="A", periods=10)
|
||||
bad_period = Period("2012", "A")
|
||||
with pytest.raises(KeyError, match=r"^Period\('2012', 'A-DEC'\)$"):
|
||||
idx.get_loc(bad_period)
|
||||
|
||||
try:
|
||||
idx.get_loc(bad_period)
|
||||
except KeyError as inst:
|
||||
assert inst.args[0] == bad_period
|
||||
|
||||
def test_get_loc_nat(self):
|
||||
didx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-03"])
|
||||
pidx = PeriodIndex(["2011-01-01", "NaT", "2011-01-03"], freq="M")
|
||||
|
||||
# check DatetimeIndex compat
|
||||
for idx in [didx, pidx]:
|
||||
assert idx.get_loc(pd.NaT) == 1
|
||||
assert idx.get_loc(None) == 1
|
||||
assert idx.get_loc(float("nan")) == 1
|
||||
assert idx.get_loc(np.nan) == 1
|
||||
|
||||
def test_get_loc(self):
|
||||
# GH 17717
|
||||
p0 = pd.Period("2017-09-01")
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
|
||||
# get the location of p1/p2 from
|
||||
# monotonic increasing PeriodIndex with non-duplicate
|
||||
idx0 = pd.PeriodIndex([p0, p1, p2])
|
||||
expected_idx1_p1 = 1
|
||||
expected_idx1_p2 = 2
|
||||
|
||||
assert idx0.get_loc(p1) == expected_idx1_p1
|
||||
assert idx0.get_loc(str(p1)) == expected_idx1_p1
|
||||
assert idx0.get_loc(p2) == expected_idx1_p2
|
||||
assert idx0.get_loc(str(p2)) == expected_idx1_p2
|
||||
|
||||
msg = "Cannot interpret 'foo' as period"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx0.get_loc("foo")
|
||||
with pytest.raises(KeyError, match=r"^1\.1$"):
|
||||
idx0.get_loc(1.1)
|
||||
|
||||
msg = (
|
||||
r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\],"
|
||||
r" dtype='period\[D\]', freq='D'\)' is an invalid key"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx0.get_loc(idx0)
|
||||
|
||||
# get the location of p1/p2 from
|
||||
# monotonic increasing PeriodIndex with duplicate
|
||||
idx1 = pd.PeriodIndex([p1, p1, p2])
|
||||
expected_idx1_p1 = slice(0, 2)
|
||||
expected_idx1_p2 = 2
|
||||
|
||||
assert idx1.get_loc(p1) == expected_idx1_p1
|
||||
assert idx1.get_loc(str(p1)) == expected_idx1_p1
|
||||
assert idx1.get_loc(p2) == expected_idx1_p2
|
||||
assert idx1.get_loc(str(p2)) == expected_idx1_p2
|
||||
|
||||
msg = "Cannot interpret 'foo' as period"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx1.get_loc("foo")
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.1$"):
|
||||
idx1.get_loc(1.1)
|
||||
|
||||
msg = (
|
||||
r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\],"
|
||||
r" dtype='period\[D\]', freq='D'\)' is an invalid key"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx1.get_loc(idx1)
|
||||
|
||||
# get the location of p1/p2 from
|
||||
# non-monotonic increasing/decreasing PeriodIndex with duplicate
|
||||
idx2 = pd.PeriodIndex([p2, p1, p2])
|
||||
expected_idx2_p1 = 1
|
||||
expected_idx2_p2 = np.array([True, False, True])
|
||||
|
||||
assert idx2.get_loc(p1) == expected_idx2_p1
|
||||
assert idx2.get_loc(str(p1)) == expected_idx2_p1
|
||||
tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2)
|
||||
tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2)
|
||||
|
||||
def test_is_monotonic_increasing(self):
|
||||
# GH 17717
|
||||
p0 = pd.Period("2017-09-01")
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
|
||||
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
|
||||
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
|
||||
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
|
||||
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
|
||||
idx = pd.PeriodIndex([p1, p2, p0])
|
||||
|
||||
assert idx_inc0.is_monotonic_increasing is True
|
||||
assert idx_inc1.is_monotonic_increasing is True
|
||||
assert idx_dec0.is_monotonic_increasing is False
|
||||
assert idx_dec1.is_monotonic_increasing is False
|
||||
assert idx.is_monotonic_increasing is False
|
||||
|
||||
def test_is_monotonic_decreasing(self):
|
||||
# GH 17717
|
||||
p0 = pd.Period("2017-09-01")
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
|
||||
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
|
||||
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
|
||||
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
|
||||
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
|
||||
idx = pd.PeriodIndex([p1, p2, p0])
|
||||
|
||||
assert idx_inc0.is_monotonic_decreasing is False
|
||||
assert idx_inc1.is_monotonic_decreasing is False
|
||||
assert idx_dec0.is_monotonic_decreasing is True
|
||||
assert idx_dec1.is_monotonic_decreasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
|
||||
def test_contains(self):
|
||||
# GH 17717
|
||||
p0 = pd.Period("2017-09-01")
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
p3 = pd.Period("2017-09-04")
|
||||
|
||||
ps0 = [p0, p1, p2]
|
||||
idx0 = pd.PeriodIndex(ps0)
|
||||
|
||||
for p in ps0:
|
||||
assert p in idx0
|
||||
assert str(p) in idx0
|
||||
|
||||
assert "2017-09-01 00:00:01" in idx0
|
||||
|
||||
assert "2017-09" in idx0
|
||||
|
||||
assert p3 not in idx0
|
||||
|
||||
def test_get_value(self):
|
||||
# GH 17717
|
||||
p0 = pd.Period("2017-09-01")
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
|
||||
idx0 = pd.PeriodIndex([p0, p1, p2])
|
||||
input0 = np.array([1, 2, 3])
|
||||
expected0 = 2
|
||||
|
||||
result0 = idx0.get_value(input0, p1)
|
||||
assert result0 == expected0
|
||||
|
||||
idx1 = pd.PeriodIndex([p1, p1, p2])
|
||||
input1 = np.array([1, 2, 3])
|
||||
expected1 = np.array([1, 2])
|
||||
|
||||
result1 = idx1.get_value(input1, p1)
|
||||
tm.assert_numpy_array_equal(result1, expected1)
|
||||
|
||||
idx2 = pd.PeriodIndex([p1, p2, p1])
|
||||
input2 = np.array([1, 2, 3])
|
||||
expected2 = np.array([1, 3])
|
||||
|
||||
result2 = idx2.get_value(input2, p1)
|
||||
tm.assert_numpy_array_equal(result2, expected2)
|
||||
|
||||
def test_get_indexer(self):
|
||||
# GH 17717
|
||||
p1 = pd.Period("2017-09-01")
|
||||
p2 = pd.Period("2017-09-04")
|
||||
p3 = pd.Period("2017-09-07")
|
||||
|
||||
tp0 = pd.Period("2017-08-31")
|
||||
tp1 = pd.Period("2017-09-02")
|
||||
tp2 = pd.Period("2017-09-05")
|
||||
tp3 = pd.Period("2017-09-09")
|
||||
|
||||
idx = pd.PeriodIndex([p1, p2, p3])
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
|
||||
target = pd.PeriodIndex([tp0, tp1, tp2, tp3])
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "pad"), np.array([-1, 0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "backfill"), np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest"), np.array([0, 0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
|
||||
res = idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 day"))
|
||||
tm.assert_numpy_array_equal(res, np.array([0, 0, 1, -1], dtype=np.intp))
|
||||
|
||||
def test_get_indexer_non_unique(self):
|
||||
# GH 17717
|
||||
p1 = pd.Period("2017-09-02")
|
||||
p2 = pd.Period("2017-09-03")
|
||||
p3 = pd.Period("2017-09-04")
|
||||
p4 = pd.Period("2017-09-05")
|
||||
|
||||
idx1 = pd.PeriodIndex([p1, p2, p1])
|
||||
idx2 = pd.PeriodIndex([p2, p1, p3, p4])
|
||||
|
||||
result = idx1.get_indexer_non_unique(idx2)
|
||||
expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp)
|
||||
expected_missing = np.array([2, 3], dtype=np.int64)
|
||||
|
||||
tm.assert_numpy_array_equal(result[0], expected_indexer)
|
||||
tm.assert_numpy_array_equal(result[1], expected_missing)
|
||||
|
||||
# TODO: This method came from test_period; de-dup with version above
|
||||
def test_get_loc2(self):
|
||||
idx = pd.period_range("2000-01-01", periods=3)
|
||||
|
||||
for method in [None, "pad", "backfill", "nearest"]:
|
||||
assert idx.get_loc(idx[1], method) == 1
|
||||
assert idx.get_loc(idx[1].asfreq("H", how="start"), method) == 1
|
||||
assert idx.get_loc(idx[1].to_timestamp(), method) == 1
|
||||
assert idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method) == 1
|
||||
assert idx.get_loc(str(idx[1]), method) == 1
|
||||
|
||||
idx = pd.period_range("2000-01-01", periods=5)[::2]
|
||||
assert idx.get_loc("2000-01-02T12", method="nearest", tolerance="1 day") == 1
|
||||
assert (
|
||||
idx.get_loc("2000-01-02T12", method="nearest", tolerance=pd.Timedelta("1D"))
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
idx.get_loc(
|
||||
"2000-01-02T12", method="nearest", tolerance=np.timedelta64(1, "D")
|
||||
)
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
idx.get_loc("2000-01-02T12", method="nearest", tolerance=timedelta(1)) == 1
|
||||
)
|
||||
|
||||
msg = "unit abbreviation w/o a number"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.get_loc("2000-01-10", method="nearest", tolerance="foo")
|
||||
|
||||
msg = "Input has different freq=None from PeriodArray\\(freq=D\\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.get_loc("2000-01-10", method="nearest", tolerance="1 hour")
|
||||
with pytest.raises(KeyError, match=r"^Period\('2000-01-10', 'D'\)$"):
|
||||
idx.get_loc("2000-01-10", method="nearest", tolerance="1 day")
|
||||
with pytest.raises(
|
||||
ValueError, match="list-like tolerance size must match target index size"
|
||||
):
|
||||
idx.get_loc(
|
||||
"2000-01-10",
|
||||
method="nearest",
|
||||
tolerance=[
|
||||
pd.Timedelta("1 day").to_timedelta64(),
|
||||
pd.Timedelta("1 day").to_timedelta64(),
|
||||
],
|
||||
)
|
||||
|
||||
# TODO: This method came from test_period; de-dup with version above
|
||||
def test_get_indexer2(self):
|
||||
idx = pd.period_range("2000-01-01", periods=3).asfreq("H", how="start")
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
|
||||
target = pd.PeriodIndex(
|
||||
["1999-12-31T23", "2000-01-01T12", "2000-01-02T01"], freq="H"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest", tolerance="1 hour"),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
|
||||
msg = "Input has different freq=None from PeriodArray\\(freq=H\\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.get_indexer(target, "nearest", tolerance="1 minute")
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest", tolerance="1 day"),
|
||||
np.array([0, 1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_raw = [
|
||||
pd.Timedelta("1 hour"),
|
||||
pd.Timedelta("1 hour"),
|
||||
np.timedelta64(1, "D"),
|
||||
]
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(
|
||||
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
|
||||
),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_bad = [
|
||||
pd.Timedelta("2 hour").to_timedelta64(),
|
||||
pd.Timedelta("1 hour").to_timedelta64(),
|
||||
np.timedelta64(1, "M"),
|
||||
]
|
||||
with pytest.raises(
|
||||
libperiod.IncompatibleFrequency, match="Input has different freq=None from"
|
||||
):
|
||||
idx.get_indexer(target, "nearest", tolerance=tol_bad)
|
||||
|
||||
def test_indexing(self):
|
||||
# GH 4390, iat incorrectly indexing
|
||||
index = period_range("1/1/2001", periods=10)
|
||||
s = Series(np.random.randn(10), index=index)
|
||||
expected = s[index[0]]
|
||||
result = s.iat[0]
|
||||
assert expected == result
|
||||
|
||||
def test_period_index_indexer(self):
|
||||
# GH4125
|
||||
idx = pd.period_range("2002-01", "2003-12", freq="M")
|
||||
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
|
||||
tm.assert_frame_equal(df, df.loc[idx])
|
||||
tm.assert_frame_equal(df, df.loc[list(idx)])
|
||||
tm.assert_frame_equal(df, df.loc[list(idx)])
|
||||
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
|
||||
tm.assert_frame_equal(df, df.loc[list(idx)])
|
||||
@@ -0,0 +1,347 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.tests.test_base import Ops
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndexOps(Ops):
|
||||
def setup_method(self, method):
|
||||
super().setup_method(method)
|
||||
mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
|
||||
self.is_valid_objs = [o for o in self.objs if mask(o)]
|
||||
self.not_valid_objs = [o for o in self.objs if not mask(o)]
|
||||
|
||||
def test_ops_properties(self):
|
||||
f = lambda x: isinstance(x, PeriodIndex)
|
||||
self.check_ops_properties(PeriodArray._field_ops, f)
|
||||
self.check_ops_properties(PeriodArray._object_ops, f)
|
||||
self.check_ops_properties(PeriodArray._bool_ops, f)
|
||||
|
||||
def test_resolution(self):
|
||||
for freq, expected in zip(
|
||||
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
|
||||
[
|
||||
"day",
|
||||
"day",
|
||||
"day",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"millisecond",
|
||||
"microsecond",
|
||||
],
|
||||
):
|
||||
|
||||
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
|
||||
assert idx.resolution == expected
|
||||
|
||||
def test_value_counts_unique(self):
|
||||
# GH 7735
|
||||
idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")
|
||||
|
||||
exp_idx = PeriodIndex(
|
||||
[
|
||||
"2011-01-01 18:00",
|
||||
"2011-01-01 17:00",
|
||||
"2011-01-01 16:00",
|
||||
"2011-01-01 15:00",
|
||||
"2011-01-01 14:00",
|
||||
"2011-01-01 13:00",
|
||||
"2011-01-01 12:00",
|
||||
"2011-01-01 11:00",
|
||||
"2011-01-01 10:00",
|
||||
"2011-01-01 09:00",
|
||||
],
|
||||
freq="H",
|
||||
)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
idx = PeriodIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
freq="H",
|
||||
)
|
||||
|
||||
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
|
||||
expected = Series([3, 2], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
|
||||
expected = Series([3, 2, 1], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
||||
|
||||
def test_drop_duplicates_metadata(self):
|
||||
# GH 10115
|
||||
idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx) # freq will not be reset
|
||||
result = idx_dup.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
def test_drop_duplicates(self):
|
||||
# to check Index/Series compat
|
||||
base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
idx = base.append(base[:5])
|
||||
|
||||
res = idx.drop_duplicates()
|
||||
tm.assert_index_equal(res, base)
|
||||
res = Series(idx).drop_duplicates()
|
||||
tm.assert_series_equal(res, Series(base))
|
||||
|
||||
res = idx.drop_duplicates(keep="last")
|
||||
exp = base[5:].append(base[:5])
|
||||
tm.assert_index_equal(res, exp)
|
||||
res = Series(idx).drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
|
||||
|
||||
res = idx.drop_duplicates(keep=False)
|
||||
tm.assert_index_equal(res, base[5:])
|
||||
res = Series(idx).drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
|
||||
|
||||
def test_order_compat(self):
|
||||
def _check_freq(index, expected_index):
|
||||
if isinstance(index, PeriodIndex):
|
||||
assert index.freq == expected_index.freq
|
||||
|
||||
pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
|
||||
# for compatibility check
|
||||
iidx = Index([2011, 2012, 2013], name="idx")
|
||||
for idx in [pidx, iidx]:
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, idx[::-1])
|
||||
_check_freq(ordered, idx[::-1])
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, idx[::-1])
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
|
||||
_check_freq(ordered, idx[::-1])
|
||||
|
||||
pidx = PeriodIndex(
|
||||
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
|
||||
)
|
||||
pexpected = PeriodIndex(
|
||||
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
|
||||
)
|
||||
# for compatibility check
|
||||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
||||
iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
|
||||
for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 4, 0])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
_check_freq(ordered, idx)
|
||||
|
||||
pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
||||
|
||||
result = pidx.sort_values()
|
||||
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == "D"
|
||||
|
||||
result = pidx.sort_values(ascending=False)
|
||||
expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == "D"
|
||||
|
||||
def test_order(self):
|
||||
for freq in ["D", "2D", "4D"]:
|
||||
idx = PeriodIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
||||
)
|
||||
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
assert ordered.freq == idx.freq
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq == freq
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
||||
assert ordered.freq == idx.freq
|
||||
assert ordered.freq == freq
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq == freq
|
||||
|
||||
idx1 = PeriodIndex(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
)
|
||||
exp1 = PeriodIndex(
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
)
|
||||
|
||||
idx2 = PeriodIndex(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
)
|
||||
exp2 = PeriodIndex(
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
)
|
||||
|
||||
idx3 = PeriodIndex(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
|
||||
)
|
||||
exp3 = PeriodIndex(
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
|
||||
)
|
||||
|
||||
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq == "D"
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
assert ordered.freq == "D"
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq == "D"
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 4, 0])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq == "D"
|
||||
|
||||
def test_shift(self):
|
||||
# This is tested in test_arithmetic
|
||||
pass
|
||||
|
||||
def test_nat(self):
|
||||
assert pd.PeriodIndex._na_value is NaT
|
||||
assert pd.PeriodIndex([], freq="M")._na_value is NaT
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01-01", "NaT"], freq="D")
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "M"])
|
||||
def test_equals(self, freq):
|
||||
# GH#13107
|
||||
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = pd.PeriodIndex._simple_new(
|
||||
idx._values._simple_new(idx._values.asi8, freq="H")
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
def test_freq_setter_deprecated(self):
|
||||
# GH 20678
|
||||
idx = pd.period_range("2018Q1", periods=4, freq="Q")
|
||||
|
||||
# no warning for getter
|
||||
with tm.assert_produces_warning(None):
|
||||
idx.freq
|
||||
|
||||
# warning for setter
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.freq = pd.offsets.Day()
|
||||
@@ -0,0 +1,135 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Period, Series, period_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
def setup_method(self, method):
|
||||
pass
|
||||
|
||||
def test_slice_with_negative_step(self):
|
||||
ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M"))
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
|
||||
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[Period("2014-10") :: -1], SLC[9::-1])
|
||||
assert_slices_equivalent(SLC["2014-10"::-1], SLC[9::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[: Period("2014-10") : -1], SLC[:8:-1])
|
||||
assert_slices_equivalent(SLC[:"2014-10":-1], SLC[:8:-1])
|
||||
|
||||
assert_slices_equivalent(SLC["2015-02":"2014-10":-1], SLC[13:8:-1])
|
||||
assert_slices_equivalent(
|
||||
SLC[Period("2015-02") : Period("2014-10") : -1], SLC[13:8:-1]
|
||||
)
|
||||
assert_slices_equivalent(SLC["2015-02" : Period("2014-10") : -1], SLC[13:8:-1])
|
||||
assert_slices_equivalent(SLC[Period("2015-02") : "2014-10" : -1], SLC[13:8:-1])
|
||||
|
||||
assert_slices_equivalent(SLC["2014-10":"2015-02":-1], SLC[:0])
|
||||
|
||||
def test_slice_with_zero_step_raises(self):
|
||||
ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M"))
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
|
||||
def test_slice_keep_name(self):
|
||||
idx = period_range("20010101", periods=10, freq="D", name="bob")
|
||||
assert idx.name == idx[1:].name
|
||||
|
||||
def test_pindex_slice_index(self):
|
||||
pi = period_range(start="1/1/10", end="12/31/12", freq="M")
|
||||
s = Series(np.random.rand(len(pi)), index=pi)
|
||||
res = s["2010"]
|
||||
exp = s[0:12]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = s["2011"]
|
||||
exp = s[12:24]
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_range_slice_day(self):
|
||||
# GH#6716
|
||||
didx = pd.date_range(start="2013/01/01", freq="D", periods=400)
|
||||
pidx = period_range(start="2013/01/01", freq="D", periods=400)
|
||||
|
||||
for idx in [didx, pidx]:
|
||||
# slices against index should raise IndexError
|
||||
values = [
|
||||
"2014",
|
||||
"2013/02",
|
||||
"2013/01/02",
|
||||
"2013/02/01 9H",
|
||||
"2013/02/01 09:00",
|
||||
]
|
||||
for v in values:
|
||||
with pytest.raises(TypeError):
|
||||
idx[v:]
|
||||
|
||||
s = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
tm.assert_series_equal(s["2013/01/02":], s[1:])
|
||||
tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5])
|
||||
tm.assert_series_equal(s["2013/02":], s[31:])
|
||||
tm.assert_series_equal(s["2014":], s[365:])
|
||||
|
||||
invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
|
||||
for v in invalid:
|
||||
with pytest.raises(TypeError):
|
||||
idx[v:]
|
||||
|
||||
def test_range_slice_seconds(self):
|
||||
# GH#6716
|
||||
didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
|
||||
pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
|
||||
|
||||
for idx in [didx, pidx]:
|
||||
# slices against index should raise IndexError
|
||||
values = [
|
||||
"2014",
|
||||
"2013/02",
|
||||
"2013/01/02",
|
||||
"2013/02/01 9H",
|
||||
"2013/02/01 09:00",
|
||||
]
|
||||
for v in values:
|
||||
with pytest.raises(TypeError):
|
||||
idx[v:]
|
||||
|
||||
s = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660])
|
||||
tm.assert_series_equal(
|
||||
s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]
|
||||
)
|
||||
tm.assert_series_equal(s["2013/01/01 10H":], s[3600:])
|
||||
tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860])
|
||||
for d in ["2013/01/01", "2013/01", "2013"]:
|
||||
tm.assert_series_equal(s[d:], s)
|
||||
|
||||
def test_range_slice_outofbounds(self):
|
||||
# GH#5407
|
||||
didx = pd.date_range(start="2013/10/01", freq="D", periods=10)
|
||||
pidx = period_range(start="2013/10/01", freq="D", periods=10)
|
||||
|
||||
for idx in [didx, pidx]:
|
||||
df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
|
||||
empty = DataFrame(index=idx.__class__([], freq="D"), columns=["units"])
|
||||
empty["units"] = empty["units"].astype("int64")
|
||||
|
||||
tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty)
|
||||
tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2])
|
||||
tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2])
|
||||
tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty)
|
||||
tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty)
|
||||
tm.assert_frame_equal(df["2013-06":"2013-09"], empty)
|
||||
tm.assert_frame_equal(df["2013-11":"2013-12"], empty)
|
||||
@@ -0,0 +1,630 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
date_range,
|
||||
offsets,
|
||||
period_range,
|
||||
)
|
||||
from pandas.util import testing as tm
|
||||
|
||||
from ..datetimelike import DatetimeLike
|
||||
|
||||
|
||||
class TestPeriodIndex(DatetimeLike):
|
||||
_holder = PeriodIndex
|
||||
|
||||
def setup_method(self, method):
|
||||
self.indices = dict(
|
||||
index=tm.makePeriodIndex(10),
|
||||
index_dec=period_range("20130101", periods=10, freq="D")[::-1],
|
||||
)
|
||||
self.setup_indices()
|
||||
|
||||
def create_index(self):
|
||||
return period_range("20130101", periods=5, freq="D")
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "M", "A"])
|
||||
def test_pickle_round_trip(self, freq):
|
||||
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq)
|
||||
result = tm.round_trip_pickle(idx)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
def test_where(self):
|
||||
# This is handled in test_indexing
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("use_numpy", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
pd.period_range("2000-01-01", periods=3, freq="D"),
|
||||
pd.period_range("2001-01-01", periods=3, freq="2D"),
|
||||
pd.PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"),
|
||||
],
|
||||
)
|
||||
def test_repeat_freqstr(self, index, use_numpy):
|
||||
# GH10183
|
||||
expected = PeriodIndex([p for p in index for _ in range(3)])
|
||||
result = np.repeat(index, 3) if use_numpy else index.repeat(3)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freqstr == index.freqstr
|
||||
|
||||
def test_fillna_period(self):
|
||||
# GH 11343
|
||||
idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H")
|
||||
|
||||
exp = pd.PeriodIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H"
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01 10:00", freq="H")), exp)
|
||||
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Period("2011-01-01 09:00", freq="H"),
|
||||
"x",
|
||||
pd.Period("2011-01-01 11:00", freq="H"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Period("2011-01-01 09:00", freq="H"),
|
||||
pd.Period("2011-01-01", freq="D"),
|
||||
pd.Period("2011-01-01 11:00", freq="H"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp)
|
||||
|
||||
def test_no_millisecond_field(self):
|
||||
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
DatetimeIndex.millisecond
|
||||
|
||||
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
DatetimeIndex([]).millisecond
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_freq(self, sort):
|
||||
# GH14323: difference of Period MUST preserve frequency
|
||||
# but the ability to union results must be preserved
|
||||
|
||||
index = period_range("20160920", "20160925", freq="D")
|
||||
|
||||
other = period_range("20160921", "20160924", freq="D")
|
||||
expected = PeriodIndex(["20160920", "20160925"], freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
other = period_range("20160922", "20160925", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
expected = PeriodIndex(["20160920", "20160921"], freq="D")
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
def test_hash_error(self):
|
||||
index = period_range("20010101", periods=10)
|
||||
msg = "unhashable type: '{}'".format(type(index).__name__)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
hash(index)
|
||||
|
||||
def test_make_time_series(self):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
series = Series(1, index=index)
|
||||
assert isinstance(series, Series)
|
||||
|
||||
def test_shallow_copy_empty(self):
|
||||
|
||||
# GH13067
|
||||
idx = PeriodIndex([], freq="M")
|
||||
result = idx._shallow_copy()
|
||||
expected = idx
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_shallow_copy_i8(self):
|
||||
# GH-24391
|
||||
pi = period_range("2018-01-01", periods=3, freq="2D")
|
||||
result = pi._shallow_copy(pi.asi8, freq=pi.freq)
|
||||
tm.assert_index_equal(result, pi)
|
||||
|
||||
def test_shallow_copy_changing_freq_raises(self):
|
||||
pi = period_range("2018-01-01", periods=3, freq="2D")
|
||||
msg = "specified freq and dtype are different"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
pi._shallow_copy(pi, freq="H")
|
||||
|
||||
def test_dtype_str(self):
|
||||
pi = pd.PeriodIndex([], freq="M")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert pi.dtype_str == "period[M]"
|
||||
assert pi.dtype_str == str(pi.dtype)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pi = pd.PeriodIndex([], freq="3M")
|
||||
assert pi.dtype_str == "period[3M]"
|
||||
assert pi.dtype_str == str(pi.dtype)
|
||||
|
||||
def test_view_asi8(self):
|
||||
idx = pd.PeriodIndex([], freq="M")
|
||||
|
||||
exp = np.array([], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(idx.view("i8"), exp)
|
||||
tm.assert_numpy_array_equal(idx.asi8, exp)
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
|
||||
|
||||
exp = np.array([492, -9223372036854775808], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(idx.view("i8"), exp)
|
||||
tm.assert_numpy_array_equal(idx.asi8, exp)
|
||||
|
||||
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
|
||||
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
|
||||
tm.assert_numpy_array_equal(idx.view("i8"), exp)
|
||||
tm.assert_numpy_array_equal(idx.asi8, exp)
|
||||
|
||||
def test_values(self):
|
||||
idx = pd.PeriodIndex([], freq="M")
|
||||
|
||||
exp = np.array([], dtype=np.object)
|
||||
tm.assert_numpy_array_equal(idx.values, exp)
|
||||
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tm.assert_numpy_array_equal(idx.get_values(), exp)
|
||||
exp = np.array([], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
|
||||
|
||||
exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object)
|
||||
tm.assert_numpy_array_equal(idx.values, exp)
|
||||
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
|
||||
exp = np.array([492, -9223372036854775808], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
|
||||
|
||||
exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object)
|
||||
tm.assert_numpy_array_equal(idx.values, exp)
|
||||
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
|
||||
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
|
||||
|
||||
def test_period_index_length(self):
|
||||
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 9
|
||||
|
||||
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 4 * 9
|
||||
|
||||
pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
|
||||
assert len(pi) == 12 * 9
|
||||
|
||||
start = Period("02-Apr-2005", "B")
|
||||
i1 = period_range(start=start, periods=20)
|
||||
assert len(i1) == 20
|
||||
assert i1.freq == start.freq
|
||||
assert i1[0] == start
|
||||
|
||||
end_intv = Period("2006-12-31", "W")
|
||||
i1 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == 10
|
||||
assert i1.freq == end_intv.freq
|
||||
assert i1[-1] == end_intv
|
||||
|
||||
end_intv = Period("2006-12-31", "1w")
|
||||
i2 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == len(i2)
|
||||
assert (i1 == i2).all()
|
||||
assert i1.freq == i2.freq
|
||||
|
||||
end_intv = Period("2006-12-31", ("w", 1))
|
||||
i2 = period_range(end=end_intv, periods=10)
|
||||
assert len(i1) == len(i2)
|
||||
assert (i1 == i2).all()
|
||||
assert i1.freq == i2.freq
|
||||
|
||||
msg = "start and end must have same freq"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start=start, end=end_intv)
|
||||
|
||||
end_intv = Period("2005-05-01", "B")
|
||||
i1 = period_range(start=start, end=end_intv)
|
||||
|
||||
msg = (
|
||||
"Of the three parameters: start, end, and periods, exactly two"
|
||||
" must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start=start)
|
||||
|
||||
# infer freq from first element
|
||||
i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
|
||||
assert len(i2) == 2
|
||||
assert i2[0] == end_intv
|
||||
|
||||
i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
|
||||
assert len(i2) == 2
|
||||
assert i2[0] == end_intv
|
||||
|
||||
# Mixed freq should fail
|
||||
vals = [end_intv, Period("2006-12-31", "w")]
|
||||
msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
PeriodIndex(vals)
|
||||
vals = np.array(vals)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodIndex(vals)
|
||||
|
||||
def test_fields(self):
|
||||
# year, month, day, hour, minute
|
||||
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
|
||||
# qyear
|
||||
pi = period_range(freq="A", start="1/1/2001", end="12/1/2005")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="M", start="1/1/2001", end="1/1/2002")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="D", start="12/1/2001", end="6/1/2001")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="B", start="12/1/2001", end="6/1/2001")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20")
|
||||
self._check_all_fields(pi)
|
||||
|
||||
pi = period_range(
|
||||
freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00"
|
||||
)
|
||||
self._check_all_fields(pi)
|
||||
|
||||
end_intv = Period("2006-12-31", "W")
|
||||
i1 = period_range(end=end_intv, periods=10)
|
||||
self._check_all_fields(i1)
|
||||
|
||||
def _check_all_fields(self, periodindex):
|
||||
fields = [
|
||||
"year",
|
||||
"month",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"weekofyear",
|
||||
"week",
|
||||
"dayofweek",
|
||||
"dayofyear",
|
||||
"quarter",
|
||||
"qyear",
|
||||
"days_in_month",
|
||||
]
|
||||
|
||||
periods = list(periodindex)
|
||||
s = pd.Series(periodindex)
|
||||
|
||||
for field in fields:
|
||||
field_idx = getattr(periodindex, field)
|
||||
assert len(periodindex) == len(field_idx)
|
||||
for x, val in zip(periods, field_idx):
|
||||
assert getattr(x, field) == val
|
||||
|
||||
if len(s) == 0:
|
||||
continue
|
||||
|
||||
field_s = getattr(s.dt, field)
|
||||
assert len(periodindex) == len(field_s)
|
||||
for x, val in zip(periods, field_s):
|
||||
assert getattr(x, field) == val
|
||||
|
||||
def test_period_set_index_reindex(self):
|
||||
# GH 6631
|
||||
df = DataFrame(np.random.random(6))
|
||||
idx1 = period_range("2011/01/01", periods=6, freq="M")
|
||||
idx2 = period_range("2013", periods=6, freq="A")
|
||||
|
||||
df = df.set_index(idx1)
|
||||
tm.assert_index_equal(df.index, idx1)
|
||||
df = df.set_index(idx2)
|
||||
tm.assert_index_equal(df.index, idx2)
|
||||
|
||||
def test_factorize(self):
|
||||
idx1 = PeriodIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
|
||||
)
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
|
||||
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
arr, idx = idx1.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
idx2 = pd.PeriodIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
|
||||
)
|
||||
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
|
||||
arr, idx = idx2.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
|
||||
exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M")
|
||||
arr, idx = idx2.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
def test_is_(self):
|
||||
create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
index = create_index()
|
||||
assert index.is_(index)
|
||||
assert not index.is_(create_index())
|
||||
assert index.is_(index.view())
|
||||
assert index.is_(index.view().view().view().view().view())
|
||||
assert index.view().is_(index)
|
||||
ind2 = index.view()
|
||||
index.name = "Apple"
|
||||
assert ind2.is_(index)
|
||||
assert not index.is_(index[:])
|
||||
assert not index.is_(index.asfreq("M"))
|
||||
assert not index.is_(index.asfreq("A"))
|
||||
|
||||
assert not index.is_(index - 2)
|
||||
assert not index.is_(index - 0)
|
||||
|
||||
def test_contains(self):
|
||||
rng = period_range("2007-01", freq="M", periods=10)
|
||||
|
||||
assert Period("2007-01", freq="M") in rng
|
||||
assert not Period("2007-01", freq="D") in rng
|
||||
assert not Period("2007-01", freq="2M") in rng
|
||||
|
||||
def test_contains_nat(self):
|
||||
# see gh-13582
|
||||
idx = period_range("2007-01", freq="M", periods=10)
|
||||
assert pd.NaT not in idx
|
||||
assert None not in idx
|
||||
assert float("nan") not in idx
|
||||
assert np.nan not in idx
|
||||
|
||||
idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M")
|
||||
assert pd.NaT in idx
|
||||
assert None in idx
|
||||
assert float("nan") in idx
|
||||
assert np.nan in idx
|
||||
|
||||
def test_periods_number_check(self):
|
||||
msg = (
|
||||
"Of the three parameters: start, end, and periods, exactly two"
|
||||
" must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range("2011-1-1", "2012-1-1", "B")
|
||||
|
||||
def test_start_time(self):
|
||||
# GH 17157
|
||||
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
|
||||
expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS")
|
||||
tm.assert_index_equal(index.start_time, expected_index)
|
||||
|
||||
def test_end_time(self):
|
||||
# GH 17157
|
||||
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
|
||||
expected_index = date_range("2016-01-01", end="2016-05-31", freq="M")
|
||||
expected_index = expected_index.shift(1, freq="D").shift(-1, freq="ns")
|
||||
tm.assert_index_equal(index.end_time, expected_index)
|
||||
|
||||
def test_index_duplicate_periods(self):
|
||||
# monotonic
|
||||
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts[2007]
|
||||
expected = ts[1:3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
result[:] = 1
|
||||
assert (ts[1:3] == 1).all()
|
||||
|
||||
# not monotonic
|
||||
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts[2007]
|
||||
expected = ts[idx == 2007]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_index_unique(self):
|
||||
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
|
||||
expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN")
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
assert idx.nunique() == 3
|
||||
|
||||
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern")
|
||||
expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern")
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
assert idx.nunique() == 3
|
||||
|
||||
def test_shift(self):
|
||||
# This is tested in test_arithmetic
|
||||
pass
|
||||
|
||||
@td.skip_if_32bit
|
||||
def test_ndarray_compat_properties(self):
|
||||
super().test_ndarray_compat_properties()
|
||||
|
||||
def test_negative_ordinals(self):
|
||||
Period(ordinal=-1000, freq="A")
|
||||
Period(ordinal=0, freq="A")
|
||||
|
||||
idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A")
|
||||
idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A")
|
||||
tm.assert_index_equal(idx1, idx2)
|
||||
|
||||
def test_pindex_fieldaccessor_nat(self):
|
||||
idx = PeriodIndex(
|
||||
["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name"
|
||||
)
|
||||
|
||||
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name")
|
||||
tm.assert_index_equal(idx.year, exp)
|
||||
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name")
|
||||
tm.assert_index_equal(idx.month, exp)
|
||||
|
||||
def test_pindex_qaccess(self):
|
||||
pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
|
||||
s = Series(np.random.rand(len(pi)), index=pi).cumsum()
|
||||
# Todo: fix these accessors!
|
||||
assert s["05Q4"] == s[2]
|
||||
|
||||
def test_pindex_multiples(self):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pi = PeriodIndex(start="1/1/11", end="12/31/11", freq="2M")
|
||||
expected = PeriodIndex(
|
||||
["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"],
|
||||
freq="2M",
|
||||
)
|
||||
tm.assert_index_equal(pi, expected)
|
||||
assert pi.freq == offsets.MonthEnd(2)
|
||||
assert pi.freqstr == "2M"
|
||||
|
||||
pi = period_range(start="1/1/11", end="12/31/11", freq="2M")
|
||||
tm.assert_index_equal(pi, expected)
|
||||
assert pi.freq == offsets.MonthEnd(2)
|
||||
assert pi.freqstr == "2M"
|
||||
|
||||
pi = period_range(start="1/1/11", periods=6, freq="2M")
|
||||
tm.assert_index_equal(pi, expected)
|
||||
assert pi.freq == offsets.MonthEnd(2)
|
||||
assert pi.freqstr == "2M"
|
||||
|
||||
def test_iteration(self):
|
||||
index = period_range(start="1/1/10", periods=4, freq="B")
|
||||
|
||||
result = list(index)
|
||||
assert isinstance(result[0], Period)
|
||||
assert result[0].freq == index.freq
|
||||
|
||||
def test_is_full(self):
|
||||
index = PeriodIndex([2005, 2007, 2009], freq="A")
|
||||
assert not index.is_full
|
||||
|
||||
index = PeriodIndex([2005, 2006, 2007], freq="A")
|
||||
assert index.is_full
|
||||
|
||||
index = PeriodIndex([2005, 2005, 2007], freq="A")
|
||||
assert not index.is_full
|
||||
|
||||
index = PeriodIndex([2005, 2005, 2006], freq="A")
|
||||
assert index.is_full
|
||||
|
||||
index = PeriodIndex([2006, 2005, 2005], freq="A")
|
||||
with pytest.raises(ValueError, match="Index is not monotonic"):
|
||||
index.is_full
|
||||
|
||||
assert index[:0].is_full
|
||||
|
||||
def test_with_multi_index(self):
|
||||
# #1705
|
||||
index = date_range("1/1/2012", periods=4, freq="12H")
|
||||
index_as_arrays = [index.to_period(freq="D"), index.hour]
|
||||
|
||||
s = Series([0, 1, 2, 3], index_as_arrays)
|
||||
|
||||
assert isinstance(s.index.levels[0], PeriodIndex)
|
||||
|
||||
assert isinstance(s.index.values[0][0], Period)
|
||||
|
||||
def test_convert_array_of_periods(self):
|
||||
rng = period_range("1/1/2000", periods=20, freq="D")
|
||||
periods = list(rng)
|
||||
|
||||
result = pd.Index(periods)
|
||||
assert isinstance(result, PeriodIndex)
|
||||
|
||||
def test_append_concat(self):
|
||||
# #1815
|
||||
d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
|
||||
d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")
|
||||
|
||||
s1 = Series(np.random.randn(10), d1)
|
||||
s2 = Series(np.random.randn(10), d2)
|
||||
|
||||
s1 = s1.to_period()
|
||||
s2 = s2.to_period()
|
||||
|
||||
# drops index
|
||||
result = pd.concat([s1, s2])
|
||||
assert isinstance(result.index, PeriodIndex)
|
||||
assert result.index[0] == s1.index[0]
|
||||
|
||||
def test_pickle_freq(self):
|
||||
# GH2891
|
||||
prng = period_range("1/1/2011", "1/1/2012", freq="M")
|
||||
new_prng = tm.round_trip_pickle(prng)
|
||||
assert new_prng.freq == offsets.MonthEnd()
|
||||
assert new_prng.freqstr == "M"
|
||||
|
||||
def test_map(self):
|
||||
# test_map_dictlike generally tests
|
||||
|
||||
index = PeriodIndex([2005, 2007, 2009], freq="A")
|
||||
result = index.map(lambda x: x.ordinal)
|
||||
exp = Index([x.ordinal for x in index])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_join_self(self, join_type):
|
||||
index = period_range("1/1/2000", periods=10)
|
||||
joined = index.join(index, how=join_type)
|
||||
assert index is joined
|
||||
|
||||
def test_insert(self):
|
||||
# GH 18295 (test missing)
|
||||
expected = PeriodIndex(
|
||||
["2017Q1", pd.NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q"
|
||||
)
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = period_range("2017Q1", periods=4, freq="Q").insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_maybe_convert_timedelta():
|
||||
pi = PeriodIndex(["2000", "2001"], freq="D")
|
||||
offset = offsets.Day(2)
|
||||
assert pi._maybe_convert_timedelta(offset) == 2
|
||||
assert pi._maybe_convert_timedelta(2) == 2
|
||||
|
||||
offset = offsets.BusinessDay()
|
||||
msg = r"Input has different freq=B from PeriodIndex\(freq=D\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pi._maybe_convert_timedelta(offset)
|
||||
@@ -0,0 +1,99 @@
|
||||
import pytest
|
||||
|
||||
from pandas import NaT, Period, PeriodIndex, date_range, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodRange:
|
||||
@pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"])
|
||||
def test_construction_from_string(self, freq):
|
||||
# non-empty
|
||||
expected = date_range(
|
||||
start="2017-01-01", periods=5, freq=freq, name="foo"
|
||||
).to_period()
|
||||
start, end = str(expected[0]), str(expected[-1])
|
||||
|
||||
result = period_range(start=start, end=end, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(start=start, periods=5, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(end=end, periods=5, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty
|
||||
expected = PeriodIndex([], freq=freq, name="foo")
|
||||
|
||||
result = period_range(start=start, periods=0, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(end=end, periods=0, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(start=end, end=start, freq=freq, name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_construction_from_period(self):
|
||||
# upsampling
|
||||
start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q")
|
||||
expected = date_range(
|
||||
start="2017-03-31", end="2018-03-31", freq="M", name="foo"
|
||||
).to_period()
|
||||
result = period_range(start=start, end=end, freq="M", name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# downsampling
|
||||
start, end = Period("2017-1", freq="M"), Period("2019-12", freq="M")
|
||||
expected = date_range(
|
||||
start="2017-01-31", end="2019-12-31", freq="Q", name="foo"
|
||||
).to_period()
|
||||
result = period_range(start=start, end=end, freq="Q", name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty
|
||||
expected = PeriodIndex([], freq="W", name="foo")
|
||||
|
||||
result = period_range(start=start, periods=0, freq="W", name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(end=end, periods=0, freq="W", name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = period_range(start=end, end=start, freq="W", name="foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the three parameters: start, end, and periods, "
|
||||
"exactly two must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start="2017Q1")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(end="2017Q1")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(periods=5)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start="2017Q1", end="2018Q1", periods=8, freq="Q")
|
||||
|
||||
# start/end NaT
|
||||
msg = "start and end must not be NaT"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start=NaT, end="2018Q1")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
period_range(start="2017Q1", end=NaT)
|
||||
|
||||
# invalid periods param
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
period_range(start="2017Q1", periods="foo")
|
||||
@@ -0,0 +1,17 @@
|
||||
"""Tests for PeriodIndex behaving like a vectorized Period scalar"""
|
||||
|
||||
from pandas import Timedelta, date_range, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodIndexOps:
|
||||
def test_start_time(self):
|
||||
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
|
||||
expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS")
|
||||
tm.assert_index_equal(index.start_time, expected_index)
|
||||
|
||||
def test_end_time(self):
|
||||
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
|
||||
expected_index = date_range("2016-01-01", end="2016-05-31", freq="M")
|
||||
expected_index += Timedelta(1, "D") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(index.end_time, expected_index)
|
||||
@@ -0,0 +1,355 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, PeriodIndex, date_range, period_range
|
||||
import pandas.core.indexes.period as period
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def _permute(obj):
|
||||
return obj.take(np.random.permutation(len(obj)))
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
def test_joins(self, join_type):
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
|
||||
joined = index.join(index[:-5], how=join_type)
|
||||
|
||||
assert isinstance(joined, PeriodIndex)
|
||||
assert joined.freq == index.freq
|
||||
|
||||
def test_join_self(self, join_type):
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
|
||||
res = index.join(index, how=join_type)
|
||||
assert index is res
|
||||
|
||||
def test_join_does_not_recur(self):
|
||||
df = tm.makeCustomDataframe(
|
||||
3,
|
||||
2,
|
||||
data_gen_f=lambda *args: np.random.randint(2),
|
||||
c_idx_type="p",
|
||||
r_idx_type="dt",
|
||||
)
|
||||
s = df.iloc[:2, 0]
|
||||
|
||||
res = s.index.join(df.columns, how="outer")
|
||||
expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object)
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union(self, sort):
|
||||
# union
|
||||
other1 = pd.period_range("1/1/2000", freq="D", periods=5)
|
||||
rng1 = pd.period_range("1/6/2000", freq="D", periods=5)
|
||||
expected1 = pd.PeriodIndex(
|
||||
[
|
||||
"2000-01-06",
|
||||
"2000-01-07",
|
||||
"2000-01-08",
|
||||
"2000-01-09",
|
||||
"2000-01-10",
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
"2000-01-04",
|
||||
"2000-01-05",
|
||||
],
|
||||
freq="D",
|
||||
)
|
||||
|
||||
rng2 = pd.period_range("1/1/2000", freq="D", periods=5)
|
||||
other2 = pd.period_range("1/4/2000", freq="D", periods=5)
|
||||
expected2 = pd.period_range("1/1/2000", freq="D", periods=8)
|
||||
|
||||
rng3 = pd.period_range("1/1/2000", freq="D", periods=5)
|
||||
other3 = pd.PeriodIndex([], freq="D")
|
||||
expected3 = pd.period_range("1/1/2000", freq="D", periods=5)
|
||||
|
||||
rng4 = pd.period_range("2000-01-01 09:00", freq="H", periods=5)
|
||||
other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
|
||||
expected4 = pd.PeriodIndex(
|
||||
[
|
||||
"2000-01-01 09:00",
|
||||
"2000-01-01 10:00",
|
||||
"2000-01-01 11:00",
|
||||
"2000-01-01 12:00",
|
||||
"2000-01-01 13:00",
|
||||
"2000-01-02 09:00",
|
||||
"2000-01-02 10:00",
|
||||
"2000-01-02 11:00",
|
||||
"2000-01-02 12:00",
|
||||
"2000-01-02 13:00",
|
||||
],
|
||||
freq="H",
|
||||
)
|
||||
|
||||
rng5 = pd.PeriodIndex(
|
||||
["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T"
|
||||
)
|
||||
other5 = pd.PeriodIndex(
|
||||
["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T"
|
||||
)
|
||||
expected5 = pd.PeriodIndex(
|
||||
[
|
||||
"2000-01-01 09:01",
|
||||
"2000-01-01 09:03",
|
||||
"2000-01-01 09:05",
|
||||
"2000-01-01 09:08",
|
||||
],
|
||||
freq="T",
|
||||
)
|
||||
|
||||
rng6 = pd.period_range("2000-01-01", freq="M", periods=7)
|
||||
other6 = pd.period_range("2000-04-01", freq="M", periods=7)
|
||||
expected6 = pd.period_range("2000-01-01", freq="M", periods=10)
|
||||
|
||||
rng7 = pd.period_range("2003-01-01", freq="A", periods=5)
|
||||
other7 = pd.period_range("1998-01-01", freq="A", periods=8)
|
||||
expected7 = pd.PeriodIndex(
|
||||
[
|
||||
"2003",
|
||||
"2004",
|
||||
"2005",
|
||||
"2006",
|
||||
"2007",
|
||||
"1998",
|
||||
"1999",
|
||||
"2000",
|
||||
"2001",
|
||||
"2002",
|
||||
],
|
||||
freq="A",
|
||||
)
|
||||
|
||||
rng8 = pd.PeriodIndex(
|
||||
["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D"
|
||||
)
|
||||
other8 = pd.period_range("1/6/2000", freq="D", periods=5)
|
||||
expected8 = pd.PeriodIndex(
|
||||
[
|
||||
"1/3/2000",
|
||||
"1/2/2000",
|
||||
"1/1/2000",
|
||||
"1/5/2000",
|
||||
"1/4/2000",
|
||||
"1/6/2000",
|
||||
"1/7/2000",
|
||||
"1/8/2000",
|
||||
"1/9/2000",
|
||||
"1/10/2000",
|
||||
],
|
||||
freq="D",
|
||||
)
|
||||
|
||||
for rng, other, expected in [
|
||||
(rng1, other1, expected1),
|
||||
(rng2, other2, expected2),
|
||||
(rng3, other3, expected3),
|
||||
(rng4, other4, expected4),
|
||||
(rng5, other5, expected5),
|
||||
(rng6, other6, expected6),
|
||||
(rng7, other7, expected7),
|
||||
(rng8, other8, expected8),
|
||||
]:
|
||||
|
||||
result_union = rng.union(other, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result_union, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_misc(self, sort):
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
|
||||
result = index[:-5].union(index[10:], sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# not in order
|
||||
result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, index)
|
||||
assert tm.equalContents(result, index)
|
||||
|
||||
# raise if different frequencies
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
|
||||
with pytest.raises(period.IncompatibleFrequency):
|
||||
index.union(index2, sort=sort)
|
||||
|
||||
index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
|
||||
with pytest.raises(period.IncompatibleFrequency):
|
||||
index.join(index3)
|
||||
|
||||
def test_union_dataframe_index(self):
|
||||
rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M")
|
||||
s1 = pd.Series(np.random.randn(len(rng1)), rng1)
|
||||
|
||||
rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M")
|
||||
s2 = pd.Series(np.random.randn(len(rng2)), rng2)
|
||||
df = pd.DataFrame({"s1": s1, "s2": s2})
|
||||
|
||||
exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
|
||||
tm.assert_index_equal(df.index, exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection(self, sort):
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
|
||||
result = index[:-5].intersection(index[10:], sort=sort)
|
||||
tm.assert_index_equal(result, index[10:-5])
|
||||
|
||||
# not in order
|
||||
left = _permute(index[:-5])
|
||||
right = _permute(index[10:])
|
||||
result = left.intersection(right, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, index[10:-5])
|
||||
assert tm.equalContents(result, index[10:-5])
|
||||
|
||||
# raise if different frequencies
|
||||
index = period_range("1/1/2000", "1/20/2000", freq="D")
|
||||
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
|
||||
with pytest.raises(period.IncompatibleFrequency):
|
||||
index.intersection(index2, sort=sort)
|
||||
|
||||
index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
|
||||
with pytest.raises(period.IncompatibleFrequency):
|
||||
index.intersection(index3, sort=sort)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection_cases(self, sort):
|
||||
base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx")
|
||||
|
||||
# if target has the same name, it is preserved
|
||||
rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx")
|
||||
expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx")
|
||||
|
||||
# if target name is different, it will be reset
|
||||
rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other")
|
||||
expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None)
|
||||
|
||||
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
|
||||
expected4 = PeriodIndex([], name="idx", freq="D")
|
||||
|
||||
for (rng, expected) in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# non-monotonic
|
||||
base = PeriodIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
|
||||
rng2 = PeriodIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
|
||||
freq="D",
|
||||
name="idx",
|
||||
)
|
||||
expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx")
|
||||
|
||||
rng3 = PeriodIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
|
||||
freq="D",
|
||||
name="other",
|
||||
)
|
||||
expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None)
|
||||
|
||||
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
|
||||
expected4 = PeriodIndex([], freq="D", name="idx")
|
||||
|
||||
for (rng, expected) in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == "D"
|
||||
|
||||
# empty same freq
|
||||
rng = date_range("6/1/2000", "6/15/2000", freq="T")
|
||||
result = rng[0:0].intersection(rng)
|
||||
assert len(result) == 0
|
||||
|
||||
result = rng.intersection(rng[0:0])
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference(self, sort):
|
||||
# diff
|
||||
period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"]
|
||||
rng1 = pd.PeriodIndex(period_rng, freq="D")
|
||||
other1 = pd.period_range("1/6/2000", freq="D", periods=5)
|
||||
expected1 = rng1
|
||||
|
||||
rng2 = pd.PeriodIndex(period_rng, freq="D")
|
||||
other2 = pd.period_range("1/4/2000", freq="D", periods=5)
|
||||
expected2 = pd.PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D")
|
||||
|
||||
rng3 = pd.PeriodIndex(period_rng, freq="D")
|
||||
other3 = pd.PeriodIndex([], freq="D")
|
||||
expected3 = rng3
|
||||
|
||||
period_rng = [
|
||||
"2000-01-01 10:00",
|
||||
"2000-01-01 09:00",
|
||||
"2000-01-01 12:00",
|
||||
"2000-01-01 11:00",
|
||||
"2000-01-01 13:00",
|
||||
]
|
||||
rng4 = pd.PeriodIndex(period_rng, freq="H")
|
||||
other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
|
||||
expected4 = rng4
|
||||
|
||||
rng5 = pd.PeriodIndex(
|
||||
["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T"
|
||||
)
|
||||
other5 = pd.PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T")
|
||||
expected5 = pd.PeriodIndex(["2000-01-01 09:03"], freq="T")
|
||||
|
||||
period_rng = [
|
||||
"2000-02-01",
|
||||
"2000-01-01",
|
||||
"2000-06-01",
|
||||
"2000-07-01",
|
||||
"2000-05-01",
|
||||
"2000-03-01",
|
||||
"2000-04-01",
|
||||
]
|
||||
rng6 = pd.PeriodIndex(period_rng, freq="M")
|
||||
other6 = pd.period_range("2000-04-01", freq="M", periods=7)
|
||||
expected6 = pd.PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M")
|
||||
|
||||
period_rng = ["2003", "2007", "2006", "2005", "2004"]
|
||||
rng7 = pd.PeriodIndex(period_rng, freq="A")
|
||||
other7 = pd.period_range("1998-01-01", freq="A", periods=8)
|
||||
expected7 = pd.PeriodIndex(["2007", "2006"], freq="A")
|
||||
|
||||
for rng, other, expected in [
|
||||
(rng1, other1, expected1),
|
||||
(rng2, other2, expected2),
|
||||
(rng3, other3, expected3),
|
||||
(rng4, other4, expected4),
|
||||
(rng5, other5, expected5),
|
||||
(rng6, other6, expected6),
|
||||
(rng7, other7, expected7),
|
||||
]:
|
||||
result_difference = rng.difference(other, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result_difference, expected)
|
||||
@@ -0,0 +1,352 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.ccalendar import MONTHS
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas.core.indexes.period as period
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPeriodRepresentation:
|
||||
"""
|
||||
Wish to match NumPy units
|
||||
"""
|
||||
|
||||
def _check_freq(self, freq, base_date):
|
||||
rng = period_range(start=base_date, periods=10, freq=freq)
|
||||
exp = np.arange(10, dtype=np.int64)
|
||||
|
||||
tm.assert_numpy_array_equal(rng.asi8, exp)
|
||||
|
||||
def test_annual(self):
|
||||
self._check_freq("A", 1970)
|
||||
|
||||
def test_monthly(self):
|
||||
self._check_freq("M", "1970-01")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["W-THU", "D", "B", "H", "T", "S", "L", "U", "N"])
|
||||
def test_freq(self, freq):
|
||||
self._check_freq(freq, "1970-01-01")
|
||||
|
||||
def test_negone_ordinals(self):
|
||||
freqs = ["A", "M", "Q", "D", "H", "T", "S"]
|
||||
|
||||
period = Period(ordinal=-1, freq="D")
|
||||
for freq in freqs:
|
||||
repr(period.asfreq(freq))
|
||||
|
||||
for freq in freqs:
|
||||
period = Period(ordinal=-1, freq=freq)
|
||||
repr(period)
|
||||
assert period.year == 1969
|
||||
|
||||
period = Period(ordinal=-1, freq="B")
|
||||
repr(period)
|
||||
period = Period(ordinal=-1, freq="W")
|
||||
repr(period)
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
def test_to_timestamp(self):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
series = Series(1, index=index, name="foo")
|
||||
|
||||
exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
|
||||
result = series.to_timestamp(how="end")
|
||||
exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
assert result.name == "foo"
|
||||
|
||||
exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
|
||||
result = series.to_timestamp(how="start")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
def _get_with_delta(delta, freq="A-DEC"):
|
||||
return date_range(
|
||||
to_datetime("1/1/2001") + delta,
|
||||
to_datetime("12/31/2009") + delta,
|
||||
freq=freq,
|
||||
)
|
||||
|
||||
delta = timedelta(hours=23)
|
||||
result = series.to_timestamp("H", "end")
|
||||
exp_index = _get_with_delta(delta)
|
||||
exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
delta = timedelta(hours=23, minutes=59)
|
||||
result = series.to_timestamp("T", "end")
|
||||
exp_index = _get_with_delta(delta)
|
||||
exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
result = series.to_timestamp("S", "end")
|
||||
delta = timedelta(hours=23, minutes=59, seconds=59)
|
||||
exp_index = _get_with_delta(delta)
|
||||
exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
index = period_range(freq="H", start="1/1/2001", end="1/2/2001")
|
||||
series = Series(1, index=index, name="foo")
|
||||
|
||||
exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H")
|
||||
result = series.to_timestamp(how="end")
|
||||
exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
assert result.name == "foo"
|
||||
|
||||
def test_to_timestamp_freq(self):
|
||||
idx = pd.period_range("2017", periods=12, freq="A-DEC")
|
||||
result = idx.to_timestamp()
|
||||
expected = pd.date_range("2017", periods=12, freq="AS-JAN")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timestamp_repr_is_code(self):
|
||||
zs = [
|
||||
Timestamp("99-04-17 00:00:00", tz="UTC"),
|
||||
Timestamp("2001-04-17 00:00:00", tz="UTC"),
|
||||
Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"),
|
||||
Timestamp("2001-04-17 00:00:00", tz=None),
|
||||
]
|
||||
for z in zs:
|
||||
assert eval(repr(z)) == z
|
||||
|
||||
def test_to_timestamp_to_period_astype(self):
|
||||
idx = DatetimeIndex([pd.NaT, "2011-01-01", "2011-02-01"], name="idx")
|
||||
|
||||
res = idx.astype("period[M]")
|
||||
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
res = idx.astype("period[3M]")
|
||||
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
def test_dti_to_period(self):
|
||||
dti = pd.date_range(start="1/1/2005", end="12/1/2005", freq="M")
|
||||
pi1 = dti.to_period()
|
||||
pi2 = dti.to_period(freq="D")
|
||||
pi3 = dti.to_period(freq="3D")
|
||||
|
||||
assert pi1[0] == Period("Jan 2005", freq="M")
|
||||
assert pi2[0] == Period("1/31/2005", freq="D")
|
||||
assert pi3[0] == Period("1/31/2005", freq="3D")
|
||||
|
||||
assert pi1[-1] == Period("Nov 2005", freq="M")
|
||||
assert pi2[-1] == Period("11/30/2005", freq="D")
|
||||
assert pi3[-1], Period("11/30/2005", freq="3D")
|
||||
|
||||
tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M"))
|
||||
tm.assert_index_equal(
|
||||
pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D")
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
def test_to_period_quarterly(self, month):
|
||||
# make sure we can make the round trip
|
||||
freq = "Q-{month}".format(month=month)
|
||||
rng = period_range("1989Q3", "1991Q3", freq=freq)
|
||||
stamps = rng.to_timestamp()
|
||||
result = stamps.to_period(freq)
|
||||
tm.assert_index_equal(rng, result)
|
||||
|
||||
@pytest.mark.parametrize("off", ["BQ", "QS", "BQS"])
|
||||
def test_to_period_quarterlyish(self, off):
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "Q-DEC"
|
||||
|
||||
@pytest.mark.parametrize("off", ["BA", "AS", "BAS"])
|
||||
def test_to_period_annualish(self, off):
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "A-DEC"
|
||||
|
||||
def test_to_period_monthish(self):
|
||||
offsets = ["MS", "BM"]
|
||||
for off in offsets:
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "M"
|
||||
|
||||
rng = date_range("01-Jan-2012", periods=8, freq="M")
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "M"
|
||||
|
||||
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
date_range("01-Jan-2012", periods=8, freq="EOM")
|
||||
|
||||
def test_period_dt64_round_trip(self):
|
||||
dti = date_range("1/1/2000", "1/7/2002", freq="B")
|
||||
pi = dti.to_period()
|
||||
tm.assert_index_equal(pi.to_timestamp(), dti)
|
||||
|
||||
dti = date_range("1/1/2000", "1/7/2002", freq="B")
|
||||
pi = dti.to_period(freq="H")
|
||||
tm.assert_index_equal(pi.to_timestamp(), dti)
|
||||
|
||||
def test_combine_first(self):
|
||||
# GH#3367
|
||||
didx = pd.date_range(start="1950-01-31", end="1950-07-31", freq="M")
|
||||
pidx = pd.period_range(
|
||||
start=pd.Period("1950-1"), end=pd.Period("1950-7"), freq="M"
|
||||
)
|
||||
# check to be consistent with DatetimeIndex
|
||||
for idx in [didx, pidx]:
|
||||
a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
|
||||
b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx)
|
||||
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D"])
|
||||
def test_searchsorted(self, freq):
|
||||
pidx = pd.PeriodIndex(
|
||||
["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"],
|
||||
freq=freq,
|
||||
)
|
||||
|
||||
p1 = pd.Period("2014-01-01", freq=freq)
|
||||
assert pidx.searchsorted(p1) == 0
|
||||
|
||||
p2 = pd.Period("2014-01-04", freq=freq)
|
||||
assert pidx.searchsorted(p2) == 3
|
||||
|
||||
msg = "Input has different freq=H from PeriodIndex"
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
pidx.searchsorted(pd.Period("2014-01-01", freq="H"))
|
||||
|
||||
msg = "Input has different freq=5D from PeriodIndex"
|
||||
with pytest.raises(period.IncompatibleFrequency, match=msg):
|
||||
pidx.searchsorted(pd.Period("2014-01-01", freq="5D"))
|
||||
|
||||
|
||||
class TestPeriodIndexConversion:
|
||||
def test_tolist(self):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
|
||||
rs = index.tolist()
|
||||
for x in rs:
|
||||
assert isinstance(x, Period)
|
||||
|
||||
recon = PeriodIndex(rs)
|
||||
tm.assert_index_equal(index, recon)
|
||||
|
||||
def test_to_timestamp_pi_nat(self):
|
||||
# GH#7228
|
||||
index = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
|
||||
|
||||
result = index.to_timestamp("D")
|
||||
expected = DatetimeIndex(
|
||||
[pd.NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "idx"
|
||||
|
||||
result2 = result.to_period(freq="M")
|
||||
tm.assert_index_equal(result2, index)
|
||||
assert result2.name == "idx"
|
||||
|
||||
result3 = result.to_period(freq="3M")
|
||||
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
|
||||
tm.assert_index_equal(result3, exp)
|
||||
assert result3.freqstr == "3M"
|
||||
|
||||
msg = "Frequency must be positive, because it represents span: -2A"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
result.to_period(freq="-2A")
|
||||
|
||||
def test_to_timestamp_preserve_name(self):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/1/2009", name="foo")
|
||||
assert index.name == "foo"
|
||||
|
||||
conv = index.to_timestamp("D")
|
||||
assert conv.name == "foo"
|
||||
|
||||
def test_to_timestamp_quarterly_bug(self):
|
||||
years = np.arange(1960, 2000).repeat(4)
|
||||
quarters = np.tile(list(range(1, 5)), 40)
|
||||
|
||||
pindex = PeriodIndex(year=years, quarter=quarters)
|
||||
|
||||
stamps = pindex.to_timestamp("D", "end")
|
||||
expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex])
|
||||
tm.assert_index_equal(stamps, expected)
|
||||
|
||||
def test_to_timestamp_pi_mult(self):
|
||||
idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="2M", name="idx")
|
||||
|
||||
result = idx.to_timestamp()
|
||||
expected = DatetimeIndex(["2011-01-01", "NaT", "2011-02-01"], name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.to_timestamp(how="E")
|
||||
expected = DatetimeIndex(["2011-02-28", "NaT", "2011-03-31"], name="idx")
|
||||
expected = expected + Timedelta(1, "D") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timestamp_pi_combined(self):
|
||||
idx = period_range(start="2011", periods=2, freq="1D1H", name="idx")
|
||||
|
||||
result = idx.to_timestamp()
|
||||
expected = DatetimeIndex(["2011-01-01 00:00", "2011-01-02 01:00"], name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.to_timestamp(how="E")
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx"
|
||||
)
|
||||
expected = expected + Timedelta(1, "s") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.to_timestamp(how="E", freq="H")
|
||||
expected = DatetimeIndex(["2011-01-02 00:00", "2011-01-03 01:00"], name="idx")
|
||||
expected = expected + Timedelta(1, "h") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_period_astype_to_timestamp(self):
|
||||
pi = pd.PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M")
|
||||
|
||||
exp = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
||||
tm.assert_index_equal(pi.astype("datetime64[ns]"), exp)
|
||||
|
||||
exp = pd.DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"])
|
||||
exp = exp + Timedelta(1, "D") - Timedelta(1, "ns")
|
||||
tm.assert_index_equal(pi.astype("datetime64[ns]", how="end"), exp)
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01", "2011-02-01", "2011-03-01"], tz="US/Eastern"
|
||||
)
|
||||
res = pi.astype("datetime64[ns, US/Eastern]")
|
||||
tm.assert_index_equal(pi.astype("datetime64[ns, US/Eastern]"), exp)
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-31", "2011-02-28", "2011-03-31"], tz="US/Eastern"
|
||||
)
|
||||
exp = exp + Timedelta(1, "D") - Timedelta(1, "ns")
|
||||
res = pi.astype("datetime64[ns, US/Eastern]", how="end")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
def test_to_timestamp_1703(self):
|
||||
index = period_range("1/1/2012", periods=4, freq="D")
|
||||
|
||||
result = index.to_timestamp()
|
||||
assert result[0] == Timestamp("1/1/2012")
|
||||
2807
venv/lib/python3.6/site-packages/pandas/tests/indexes/test_base.py
Normal file
2807
venv/lib/python3.6/site-packages/pandas/tests/indexes/test_base.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
Collection of tests asserting things that should be true for
|
||||
any index subclass. Makes use of the `indices` fixture defined
|
||||
in pandas/tests/indexes/conftest.py.
|
||||
"""
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
|
||||
from pandas.core.dtypes.common import needs_i8_conversion
|
||||
|
||||
import pandas as pd
|
||||
from pandas import CategoricalIndex, MultiIndex, RangeIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCommon:
|
||||
def test_droplevel(self, indices):
|
||||
# GH 21115
|
||||
if isinstance(indices, MultiIndex):
|
||||
# Tested separately in test_multi.py
|
||||
return
|
||||
|
||||
assert indices.droplevel([]).equals(indices)
|
||||
|
||||
for level in indices.name, [indices.name]:
|
||||
if isinstance(indices.name, tuple) and level is indices.name:
|
||||
# GH 21121 : droplevel with tuple name
|
||||
continue
|
||||
with pytest.raises(ValueError):
|
||||
indices.droplevel(level)
|
||||
|
||||
for level in "wrong", ["wrong"]:
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("'Level wrong must be same as name (None)'")
|
||||
):
|
||||
indices.droplevel(level)
|
||||
|
||||
def test_constructor_non_hashable_name(self, indices):
|
||||
# GH 20527
|
||||
|
||||
if isinstance(indices, MultiIndex):
|
||||
pytest.skip("multiindex handled in test_multi.py")
|
||||
|
||||
message = "Index.name must be a hashable type"
|
||||
renamed = [["1"]]
|
||||
|
||||
# With .rename()
|
||||
with pytest.raises(TypeError, match=message):
|
||||
indices.rename(name=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=message):
|
||||
indices.set_names(names=renamed)
|
||||
|
||||
def test_constructor_unwraps_index(self, indices):
|
||||
if isinstance(indices, pd.MultiIndex):
|
||||
raise pytest.skip("MultiIndex has no ._data")
|
||||
a = indices
|
||||
b = type(a)(a)
|
||||
tm.assert_equal(a._data, b._data)
|
||||
|
||||
@pytest.mark.parametrize("itm", [101, "no_int"])
|
||||
# FutureWarning from non-tuple sequence of nd indexing
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_getitem_error(self, indices, itm):
|
||||
with pytest.raises(IndexError):
|
||||
indices[itm]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fname, sname, expected_name",
|
||||
[
|
||||
("A", "A", "A"),
|
||||
("A", "B", None),
|
||||
("A", None, None),
|
||||
(None, "B", None),
|
||||
(None, None, None),
|
||||
],
|
||||
)
|
||||
def test_corner_union(self, indices, fname, sname, expected_name):
|
||||
# GH 9943 9862
|
||||
# Test unions with various name combinations
|
||||
# Do not test MultiIndex or repeats
|
||||
|
||||
if isinstance(indices, MultiIndex) or not indices.is_unique:
|
||||
pytest.skip("Not for MultiIndex or repeated indices")
|
||||
|
||||
# Test copy.union(copy)
|
||||
first = indices.copy().set_names(fname)
|
||||
second = indices.copy().set_names(sname)
|
||||
union = first.union(second)
|
||||
expected = indices.copy().set_names(expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
# Test copy.union(empty)
|
||||
first = indices.copy().set_names(fname)
|
||||
second = indices.drop(indices).set_names(sname)
|
||||
union = first.union(second)
|
||||
expected = indices.copy().set_names(expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
# Test empty.union(copy)
|
||||
first = indices.drop(indices).set_names(fname)
|
||||
second = indices.copy().set_names(sname)
|
||||
union = first.union(second)
|
||||
expected = indices.copy().set_names(expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
# Test empty.union(empty)
|
||||
first = indices.drop(indices).set_names(fname)
|
||||
second = indices.drop(indices).set_names(sname)
|
||||
union = first.union(second)
|
||||
expected = indices.drop(indices).set_names(expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
def test_to_flat_index(self, indices):
|
||||
# 22866
|
||||
if isinstance(indices, MultiIndex):
|
||||
pytest.skip("Separate expectation for MultiIndex")
|
||||
|
||||
result = indices.to_flat_index()
|
||||
tm.assert_index_equal(result, indices)
|
||||
|
||||
def test_wrong_number_names(self, indices):
|
||||
with pytest.raises(ValueError, match="^Length"):
|
||||
indices.names = ["apple", "banana", "carrot"]
|
||||
|
||||
def test_set_name_methods(self, indices):
|
||||
new_name = "This is the new name for this index"
|
||||
|
||||
# don't tests a MultiIndex here (as its tested separated)
|
||||
if isinstance(indices, MultiIndex):
|
||||
pytest.skip("Skip check for MultiIndex")
|
||||
original_name = indices.name
|
||||
new_ind = indices.set_names([new_name])
|
||||
assert new_ind.name == new_name
|
||||
assert indices.name == original_name
|
||||
res = indices.rename(new_name, inplace=True)
|
||||
|
||||
# should return None
|
||||
assert res is None
|
||||
assert indices.name == new_name
|
||||
assert indices.names == [new_name]
|
||||
# FIXME: dont leave commented-out
|
||||
# with pytest.raises(TypeError, match="list-like"):
|
||||
# # should still fail even if it would be the right length
|
||||
# ind.set_names("a")
|
||||
with pytest.raises(ValueError, match="Level must be None"):
|
||||
indices.set_names("a", level=0)
|
||||
|
||||
# rename in place just leaves tuples and other containers alone
|
||||
name = ("A", "B")
|
||||
indices.rename(name, inplace=True)
|
||||
assert indices.name == name
|
||||
assert indices.names == [name]
|
||||
|
||||
def test_dtype_str(self, indices):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
dtype = indices.dtype_str
|
||||
assert isinstance(dtype, str)
|
||||
assert dtype == str(indices.dtype)
|
||||
|
||||
def test_hash_error(self, indices):
|
||||
index = indices
|
||||
with pytest.raises(
|
||||
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
|
||||
):
|
||||
hash(indices)
|
||||
|
||||
def test_copy_and_deepcopy(self, indices):
|
||||
from copy import copy, deepcopy
|
||||
|
||||
if isinstance(indices, MultiIndex):
|
||||
pytest.skip("Skip check for MultiIndex")
|
||||
|
||||
for func in (copy, deepcopy):
|
||||
idx_copy = func(indices)
|
||||
assert idx_copy is not indices
|
||||
assert idx_copy.equals(indices)
|
||||
|
||||
new_copy = indices.copy(deep=True, name="banana")
|
||||
assert new_copy.name == "banana"
|
||||
|
||||
def test_unique(self, indices):
|
||||
# don't test a MultiIndex here (as its tested separated)
|
||||
# don't test a CategoricalIndex because categories change (GH 18291)
|
||||
if isinstance(indices, (MultiIndex, CategoricalIndex)):
|
||||
pytest.skip("Skip check for MultiIndex/CategoricalIndex")
|
||||
|
||||
# GH 17896
|
||||
expected = indices.drop_duplicates()
|
||||
for level in 0, indices.name, None:
|
||||
result = indices.unique(level=level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Too many levels: Index has only 1 level, not 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
indices.unique(level=3)
|
||||
|
||||
msg = r"Level wrong must be same as name \({}\)".format(
|
||||
re.escape(indices.name.__repr__())
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indices.unique(level="wrong")
|
||||
|
||||
def test_get_unique_index(self, indices):
|
||||
# MultiIndex tested separately
|
||||
if not len(indices) or isinstance(indices, MultiIndex):
|
||||
pytest.skip("Skip check for empty Index and MultiIndex")
|
||||
|
||||
idx = indices[[0] * 5]
|
||||
idx_unique = indices[[0]]
|
||||
|
||||
# We test against `idx_unique`, so first we make sure it's unique
|
||||
# and doesn't contain nans.
|
||||
assert idx_unique.is_unique is True
|
||||
try:
|
||||
assert idx_unique.hasnans is False
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
for dropna in [False, True]:
|
||||
result = idx._get_unique_index(dropna=dropna)
|
||||
tm.assert_index_equal(result, idx_unique)
|
||||
|
||||
# nans:
|
||||
if not indices._can_hold_na:
|
||||
pytest.skip("Skip na-check if index cannot hold na")
|
||||
|
||||
if needs_i8_conversion(indices):
|
||||
vals = indices.asi8[[0] * 5]
|
||||
vals[0] = iNaT
|
||||
else:
|
||||
vals = indices.values[[0] * 5]
|
||||
vals[0] = np.nan
|
||||
|
||||
vals_unique = vals[:2]
|
||||
idx_nan = indices._shallow_copy(vals)
|
||||
idx_unique_nan = indices._shallow_copy(vals_unique)
|
||||
assert idx_unique_nan.is_unique is True
|
||||
|
||||
assert idx_nan.dtype == indices.dtype
|
||||
assert idx_unique_nan.dtype == indices.dtype
|
||||
|
||||
for dropna, expected in zip([False, True], [idx_unique_nan, idx_unique]):
|
||||
for i in [idx_nan, idx_unique_nan]:
|
||||
result = i._get_unique_index(dropna=dropna)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_sort(self, indices):
|
||||
msg = "cannot sort an Index object in-place, use sort_values instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indices.sort()
|
||||
|
||||
def test_mutability(self, indices):
|
||||
if not len(indices):
|
||||
pytest.skip("Skip check for empty Index")
|
||||
msg = "Index does not support mutable operations"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indices[0] = indices[0]
|
||||
|
||||
def test_view(self, indices):
|
||||
assert indices.view().name == indices.name
|
||||
|
||||
def test_compat(self, indices):
|
||||
assert indices.tolist() == list(indices)
|
||||
|
||||
def test_searchsorted_monotonic(self, indices):
|
||||
# GH17271
|
||||
# not implemented for tuple searches in MultiIndex
|
||||
# or Intervals searches in IntervalIndex
|
||||
if isinstance(indices, (MultiIndex, pd.IntervalIndex)):
|
||||
pytest.skip("Skip check for MultiIndex/IntervalIndex")
|
||||
|
||||
# nothing to test if the index is empty
|
||||
if indices.empty:
|
||||
pytest.skip("Skip check for empty Index")
|
||||
value = indices[0]
|
||||
|
||||
# determine the expected results (handle dupes for 'right')
|
||||
expected_left, expected_right = 0, (indices == value).argmin()
|
||||
if expected_right == 0:
|
||||
# all values are the same, expected_right should be length
|
||||
expected_right = len(indices)
|
||||
|
||||
# test _searchsorted_monotonic in all cases
|
||||
# test searchsorted only for increasing
|
||||
if indices.is_monotonic_increasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side="left")
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side="right")
|
||||
assert expected_right == ssm_right
|
||||
|
||||
ss_left = indices.searchsorted(value, side="left")
|
||||
assert expected_left == ss_left
|
||||
|
||||
ss_right = indices.searchsorted(value, side="right")
|
||||
assert expected_right == ss_right
|
||||
|
||||
elif indices.is_monotonic_decreasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side="left")
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side="right")
|
||||
assert expected_right == ssm_right
|
||||
else:
|
||||
# non-monotonic should raise.
|
||||
with pytest.raises(ValueError):
|
||||
indices._searchsorted_monotonic(value, side="left")
|
||||
|
||||
def test_pickle(self, indices):
|
||||
original_name, indices.name = indices.name, "foo"
|
||||
unpickled = tm.round_trip_pickle(indices)
|
||||
assert indices.equals(unpickled)
|
||||
indices.name = original_name
|
||||
|
||||
@pytest.mark.parametrize("keep", ["first", "last", False])
|
||||
def test_duplicated(self, indices, keep):
|
||||
if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
|
||||
# MultiIndex tested separately in:
|
||||
# tests/indexes/multi/test_unique_and_duplicates
|
||||
pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex")
|
||||
|
||||
holder = type(indices)
|
||||
|
||||
idx = holder(indices)
|
||||
if idx.has_duplicates:
|
||||
# We are testing the duplicated-method here, so we need to know
|
||||
# exactly which indices are duplicate and how (for the result).
|
||||
# This is not possible if "idx" has duplicates already, which we
|
||||
# therefore remove. This is seemingly circular, as drop_duplicates
|
||||
# invokes duplicated, but in the end, it all works out because we
|
||||
# cross-check with Series.duplicated, which is tested separately.
|
||||
idx = idx.drop_duplicates()
|
||||
|
||||
n, k = len(idx), 10
|
||||
duplicated_selection = np.random.choice(n, k * n)
|
||||
expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
|
||||
idx = holder(idx.values[duplicated_selection])
|
||||
|
||||
result = idx.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_has_duplicates(self, indices):
|
||||
holder = type(indices)
|
||||
if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
|
||||
# MultiIndex tested separately in:
|
||||
# tests/indexes/multi/test_unique_and_duplicates.
|
||||
# RangeIndex is unique by definition.
|
||||
pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
|
||||
|
||||
idx = holder([indices[0]] * 5)
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
@@ -0,0 +1,116 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.indexes.frozen import FrozenList, FrozenNDArray
|
||||
from pandas.tests.test_base import CheckImmutable, CheckStringMixin
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestFrozenList(CheckImmutable, CheckStringMixin):
|
||||
mutable_methods = ("extend", "pop", "remove", "insert")
|
||||
unicode_container = FrozenList(["\u05d0", "\u05d1", "c"])
|
||||
|
||||
def setup_method(self, _):
|
||||
self.lst = [1, 2, 3, 4, 5]
|
||||
self.container = FrozenList(self.lst)
|
||||
self.klass = FrozenList
|
||||
|
||||
def test_add(self):
|
||||
result = self.container + (1, 2, 3)
|
||||
expected = FrozenList(self.lst + [1, 2, 3])
|
||||
self.check_result(result, expected)
|
||||
|
||||
result = (1, 2, 3) + self.container
|
||||
expected = FrozenList([1, 2, 3] + self.lst)
|
||||
self.check_result(result, expected)
|
||||
|
||||
def test_iadd(self):
|
||||
q = r = self.container
|
||||
|
||||
q += [5]
|
||||
self.check_result(q, self.lst + [5])
|
||||
|
||||
# Other shouldn't be mutated.
|
||||
self.check_result(r, self.lst)
|
||||
|
||||
def test_union(self):
|
||||
result = self.container.union((1, 2, 3))
|
||||
expected = FrozenList(self.lst + [1, 2, 3])
|
||||
self.check_result(result, expected)
|
||||
|
||||
def test_difference(self):
|
||||
result = self.container.difference([2])
|
||||
expected = FrozenList([1, 3, 4, 5])
|
||||
self.check_result(result, expected)
|
||||
|
||||
def test_difference_dupe(self):
|
||||
result = FrozenList([1, 2, 3, 2]).difference([2])
|
||||
expected = FrozenList([1, 3])
|
||||
self.check_result(result, expected)
|
||||
|
||||
def test_tricky_container_to_bytes_raises(self):
|
||||
# GH 26447
|
||||
msg = "^'str' object cannot be interpreted as an integer$"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
bytes(self.unicode_container)
|
||||
|
||||
|
||||
class TestFrozenNDArray(CheckImmutable, CheckStringMixin):
|
||||
mutable_methods = ("put", "itemset", "fill")
|
||||
|
||||
def setup_method(self, _):
|
||||
self.lst = [3, 5, 7, -2]
|
||||
self.klass = FrozenNDArray
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
|
||||
self.container = FrozenNDArray(self.lst)
|
||||
self.unicode_container = FrozenNDArray(["\u05d0", "\u05d1", "c"])
|
||||
|
||||
def test_constructor_warns(self):
|
||||
# see gh-9031
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
FrozenNDArray([1, 2, 3])
|
||||
|
||||
def test_tricky_container_to_bytes(self):
|
||||
bytes(self.unicode_container)
|
||||
|
||||
def test_shallow_copying(self):
|
||||
original = self.container.copy()
|
||||
assert isinstance(self.container.view(), FrozenNDArray)
|
||||
assert not isinstance(self.container.view(np.ndarray), FrozenNDArray)
|
||||
assert self.container.view() is not self.container
|
||||
tm.assert_numpy_array_equal(self.container, original)
|
||||
|
||||
# Shallow copy should be the same too
|
||||
assert isinstance(self.container._shallow_copy(), FrozenNDArray)
|
||||
|
||||
# setting should not be allowed
|
||||
def testit(container):
|
||||
container[0] = 16
|
||||
|
||||
self.check_mutable_error(testit, self.container)
|
||||
|
||||
def test_values(self):
|
||||
original = self.container.view(np.ndarray).copy()
|
||||
n = original[0] + 15
|
||||
|
||||
vals = self.container.values()
|
||||
tm.assert_numpy_array_equal(original, vals)
|
||||
|
||||
assert original is not vals
|
||||
vals[0] = n
|
||||
|
||||
assert isinstance(self.container, FrozenNDArray)
|
||||
tm.assert_numpy_array_equal(self.container.values(), original)
|
||||
assert vals[0] == n
|
||||
|
||||
def test_searchsorted(self):
|
||||
expected = 2
|
||||
assert self.container.searchsorted(7) == expected
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert self.container.searchsorted(v=7) == expected
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,121 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Float64Index,
|
||||
Index,
|
||||
Int64Index,
|
||||
TimedeltaIndex,
|
||||
UInt64Index,
|
||||
_np_version_under1p17,
|
||||
)
|
||||
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda x: x.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs_basic(indices, func):
|
||||
# test ufuncs of numpy, see:
|
||||
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
|
||||
|
||||
idx = indices
|
||||
if isinstance(idx, DatetimeIndexOpsMixin):
|
||||
# raise TypeError or ValueError (PeriodIndex)
|
||||
with pytest.raises(Exception):
|
||||
with np.errstate(all="ignore"):
|
||||
func(idx)
|
||||
elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
|
||||
# coerces to float (e.g. np.sin)
|
||||
with np.errstate(all="ignore"):
|
||||
result = func(idx)
|
||||
exp = Index(func(idx.values), name=idx.name)
|
||||
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert isinstance(result, Float64Index)
|
||||
else:
|
||||
# raise AttributeError or TypeError
|
||||
if len(idx) == 0:
|
||||
pass
|
||||
else:
|
||||
with pytest.raises(Exception):
|
||||
with np.errstate(all="ignore"):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func", [np.isfinite, np.isinf, np.isnan, np.signbit], ids=lambda x: x.__name__
|
||||
)
|
||||
def test_numpy_ufuncs_other(indices, func):
|
||||
# test ufuncs of numpy, see:
|
||||
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
|
||||
|
||||
idx = indices
|
||||
if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
|
||||
|
||||
# ok under numpy >= 1.17
|
||||
if not _np_version_under1p17 and func in [np.isfinite]:
|
||||
# Results in bool array
|
||||
result = func(idx)
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert not isinstance(result, Index)
|
||||
else:
|
||||
# raise TypeError or ValueError (PeriodIndex)
|
||||
with pytest.raises(Exception):
|
||||
func(idx)
|
||||
|
||||
elif isinstance(idx, DatetimeIndexOpsMixin):
|
||||
# raise TypeError or ValueError (PeriodIndex)
|
||||
with pytest.raises(Exception):
|
||||
func(idx)
|
||||
|
||||
elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
|
||||
# Results in bool array
|
||||
result = func(idx)
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert not isinstance(result, Index)
|
||||
else:
|
||||
if len(idx) == 0:
|
||||
pass
|
||||
else:
|
||||
with pytest.raises(Exception):
|
||||
func(idx)
|
||||
|
||||
|
||||
def test_elementwise_comparison_warning():
|
||||
# https://github.com/pandas-dev/pandas/issues/22698#issuecomment-458968300
|
||||
# np.array([1, 2]) == 'a' returns False, and produces a
|
||||
# FutureWarning that it'll be [False, False] in the future.
|
||||
# We just want to ensure that comes through.
|
||||
# When NumPy dev actually enforces this change, we'll need to skip
|
||||
# this test.
|
||||
idx = Index([1, 2])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
idx == "a"
|
||||
1039
venv/lib/python3.6/site-packages/pandas/tests/indexes/test_range.py
Normal file
1039
venv/lib/python3.6/site-packages/pandas/tests/indexes/test_range.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
The tests in this package are to ensure the proper resultant dtypes of
|
||||
set operations.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
import itertools as it
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_dtype_equal
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index
|
||||
from pandas.api.types import pandas_dtype
|
||||
from pandas.tests.indexes.conftest import indices_list
|
||||
import pandas.util.testing as tm
|
||||
|
||||
COMPATIBLE_INCONSISTENT_PAIRS = OrderedDict(
|
||||
[
|
||||
((Int64Index, RangeIndex), (tm.makeIntIndex, tm.makeRangeIndex)),
|
||||
((Float64Index, Int64Index), (tm.makeFloatIndex, tm.makeIntIndex)),
|
||||
((Float64Index, RangeIndex), (tm.makeFloatIndex, tm.makeIntIndex)),
|
||||
((Float64Index, UInt64Index), (tm.makeFloatIndex, tm.makeUIntIndex)),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=list(it.combinations(indices_list, 2)),
|
||||
ids=lambda x: type(x[0]).__name__ + type(x[1]).__name__,
|
||||
)
|
||||
def index_pair(request):
|
||||
"""
|
||||
Create all combinations of 2 index types.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_union_same_types(indices):
|
||||
# Union with a non-unique, non-monotonic index raises error
|
||||
# Only needed for bool index factory
|
||||
idx1 = indices.sort_values()
|
||||
idx2 = indices.sort_values()
|
||||
assert idx1.union(idx2).dtype == idx1.dtype
|
||||
|
||||
|
||||
def test_union_different_types(index_pair):
|
||||
# GH 23525
|
||||
idx1, idx2 = index_pair
|
||||
type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x)))
|
||||
if type_pair in COMPATIBLE_INCONSISTENT_PAIRS:
|
||||
pytest.xfail("This test only considers non compatible indexes.")
|
||||
|
||||
if any(isinstance(idx, pd.MultiIndex) for idx in index_pair):
|
||||
pytest.xfail("This test doesn't consider multiindixes.")
|
||||
|
||||
if is_dtype_equal(idx1.dtype, idx2.dtype):
|
||||
pytest.xfail("This test only considers non matching dtypes.")
|
||||
|
||||
# A union with a CategoricalIndex (even as dtype('O')) and a
|
||||
# non-CategoricalIndex can only be made if both indices are monotonic.
|
||||
# This is true before this PR as well.
|
||||
|
||||
# Union with a non-unique, non-monotonic index raises error
|
||||
# This applies to the boolean index
|
||||
idx1 = idx1.sort_values()
|
||||
idx2 = idx2.sort_values()
|
||||
|
||||
assert idx1.union(idx2).dtype == np.dtype("O")
|
||||
assert idx2.union(idx1).dtype == np.dtype("O")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("idx_fact1,idx_fact2", COMPATIBLE_INCONSISTENT_PAIRS.values())
|
||||
def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
|
||||
# GH 23525
|
||||
idx1 = idx_fact1(10)
|
||||
idx2 = idx_fact2(20)
|
||||
|
||||
res1 = idx1.union(idx2)
|
||||
res2 = idx2.union(idx1)
|
||||
|
||||
assert res1.dtype in (idx1.dtype, idx2.dtype)
|
||||
assert res2.dtype in (idx1.dtype, idx2.dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
("int64", "int64", "int64"),
|
||||
("int64", "uint64", "object"),
|
||||
("int64", "float64", "float64"),
|
||||
("uint64", "float64", "float64"),
|
||||
("uint64", "uint64", "uint64"),
|
||||
("float64", "float64", "float64"),
|
||||
("datetime64[ns]", "int64", "object"),
|
||||
("datetime64[ns]", "uint64", "object"),
|
||||
("datetime64[ns]", "float64", "object"),
|
||||
("datetime64[ns, CET]", "int64", "object"),
|
||||
("datetime64[ns, CET]", "uint64", "object"),
|
||||
("datetime64[ns, CET]", "float64", "object"),
|
||||
("Period[D]", "int64", "object"),
|
||||
("Period[D]", "uint64", "object"),
|
||||
("Period[D]", "float64", "object"),
|
||||
],
|
||||
)
|
||||
def test_union_dtypes(left, right, expected):
|
||||
left = pandas_dtype(left)
|
||||
right = pandas_dtype(right)
|
||||
a = pd.Index([], dtype=left)
|
||||
b = pd.Index([], dtype=right)
|
||||
result = (a | b).dtype
|
||||
assert result == expected
|
||||
@@ -0,0 +1,292 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import NullFrequencyError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Timedelta, TimedeltaIndex, timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Hour(2),
|
||||
timedelta(hours=2),
|
||||
np.timedelta64(2, "h"),
|
||||
Timedelta(hours=2),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def delta(request):
|
||||
# Several ways of representing two hours
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["B", "D"])
|
||||
def freq(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestTimedeltaIndexArithmetic:
|
||||
# Addition and Subtraction Operations
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# TimedeltaIndex.shift is used by __add__/__sub__
|
||||
|
||||
def test_tdi_shift_empty(self):
|
||||
# GH#9903
|
||||
idx = pd.TimedeltaIndex([], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
|
||||
tm.assert_index_equal(idx.shift(3, freq="H"), idx)
|
||||
|
||||
def test_tdi_shift_hours(self):
|
||||
# GH#9903
|
||||
idx = pd.TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
|
||||
exp = pd.TimedeltaIndex(["8 hours", "9 hours", "12 hours"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(3, freq="H"), exp)
|
||||
exp = pd.TimedeltaIndex(["2 hours", "3 hours", "6 hours"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(-3, freq="H"), exp)
|
||||
|
||||
def test_tdi_shift_minutes(self):
|
||||
# GH#9903
|
||||
idx = pd.TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(0, freq="T"), idx)
|
||||
exp = pd.TimedeltaIndex(["05:03:00", "06:03:00", "9:03:00"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(3, freq="T"), exp)
|
||||
exp = pd.TimedeltaIndex(["04:57:00", "05:57:00", "8:57:00"], name="xxx")
|
||||
tm.assert_index_equal(idx.shift(-3, freq="T"), exp)
|
||||
|
||||
def test_tdi_shift_int(self):
|
||||
# GH#8083
|
||||
trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1)
|
||||
result = trange.shift(1)
|
||||
expected = TimedeltaIndex(
|
||||
[
|
||||
"1 days 01:00:00",
|
||||
"2 days 01:00:00",
|
||||
"3 days 01:00:00",
|
||||
"4 days 01:00:00",
|
||||
"5 days 01:00:00",
|
||||
],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_tdi_shift_nonstandard_freq(self):
|
||||
# GH#8083
|
||||
trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1)
|
||||
result = trange.shift(3, freq="2D 1s")
|
||||
expected = TimedeltaIndex(
|
||||
[
|
||||
"6 days 01:00:03",
|
||||
"7 days 01:00:03",
|
||||
"8 days 01:00:03",
|
||||
"9 days 01:00:03",
|
||||
"10 days 01:00:03",
|
||||
],
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_shift_no_freq(self):
|
||||
# GH#19147
|
||||
tdi = TimedeltaIndex(["1 days 01:00:00", "2 days 01:00:00"], freq=None)
|
||||
with pytest.raises(NullFrequencyError):
|
||||
tdi.shift(2)
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# Binary operations TimedeltaIndex and integer
|
||||
|
||||
def test_tdi_add_int(self, one):
|
||||
# Variants of `one` for #19012
|
||||
rng = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng + one
|
||||
expected = timedelta_range("1 days 10:00:00", freq="H", periods=10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_tdi_iadd_int(self, one):
|
||||
rng = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
expected = timedelta_range("1 days 10:00:00", freq="H", periods=10)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
rng += one
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
def test_tdi_sub_int(self, one):
|
||||
rng = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng - one
|
||||
expected = timedelta_range("1 days 08:00:00", freq="H", periods=10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_tdi_isub_int(self, one):
|
||||
rng = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
expected = timedelta_range("1 days 08:00:00", freq="H", periods=10)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
rng -= one
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# __add__/__sub__ with integer arrays
|
||||
|
||||
@pytest.mark.parametrize("box", [np.array, pd.Index])
|
||||
def test_tdi_add_integer_array(self, box):
|
||||
# GH#19959
|
||||
rng = timedelta_range("1 days 09:00:00", freq="H", periods=3)
|
||||
other = box([4, 3, 2])
|
||||
expected = TimedeltaIndex(["1 day 13:00:00"] * 3)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng + other
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = other + rng
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [np.array, pd.Index])
|
||||
def test_tdi_sub_integer_array(self, box):
|
||||
# GH#19959
|
||||
rng = timedelta_range("9H", freq="H", periods=3)
|
||||
other = box([4, 3, 2])
|
||||
expected = TimedeltaIndex(["5H", "7H", "9H"])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = rng - other
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# GH#22535
|
||||
result = other - rng
|
||||
tm.assert_index_equal(result, -expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [np.array, pd.Index])
|
||||
def test_tdi_addsub_integer_array_no_freq(self, box):
|
||||
# GH#19959
|
||||
tdi = TimedeltaIndex(["1 Day", "NaT", "3 Hours"])
|
||||
other = box([14, -1, 16])
|
||||
with pytest.raises(NullFrequencyError):
|
||||
tdi + other
|
||||
with pytest.raises(NullFrequencyError):
|
||||
other + tdi
|
||||
with pytest.raises(NullFrequencyError):
|
||||
tdi - other
|
||||
with pytest.raises(NullFrequencyError):
|
||||
other - tdi
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# Binary operations TimedeltaIndex and timedelta-like
|
||||
# Note: add and sub are tested in tests.test_arithmetic, in-place
|
||||
# tests are kept here because their behavior is Index-specific
|
||||
|
||||
def test_tdi_iadd_timedeltalike(self, delta):
|
||||
# only test adding/sub offsets as + is now numeric
|
||||
rng = timedelta_range("1 days", "10 days")
|
||||
expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D")
|
||||
rng += delta
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
def test_tdi_isub_timedeltalike(self, delta):
|
||||
# only test adding/sub offsets as - is now numeric
|
||||
rng = timedelta_range("1 days", "10 days")
|
||||
expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00")
|
||||
rng -= delta
|
||||
tm.assert_index_equal(rng, expected)
|
||||
|
||||
# -------------------------------------------------------------
|
||||
|
||||
# TODO: after #24365 this probably belongs in scalar tests
|
||||
def test_ops_ndarray(self):
|
||||
td = Timedelta("1 day")
|
||||
|
||||
# timedelta, timedelta
|
||||
other = pd.to_timedelta(["1 day"]).values
|
||||
expected = pd.to_timedelta(["2 days"]).values
|
||||
tm.assert_numpy_array_equal(td + other, expected)
|
||||
tm.assert_numpy_array_equal(other + td, expected)
|
||||
msg = r"unsupported operand type\(s\) for \+: 'Timedelta' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
td + np.array([1])
|
||||
msg = r"unsupported operand type\(s\) for \+: 'numpy.ndarray' and 'Timedelta'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([1]) + td
|
||||
|
||||
expected = pd.to_timedelta(["0 days"]).values
|
||||
tm.assert_numpy_array_equal(td - other, expected)
|
||||
tm.assert_numpy_array_equal(-other + td, expected)
|
||||
msg = r"unsupported operand type\(s\) for -: 'Timedelta' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
td - np.array([1])
|
||||
msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timedelta'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([1]) - td
|
||||
|
||||
expected = pd.to_timedelta(["2 days"]).values
|
||||
tm.assert_numpy_array_equal(td * np.array([2]), expected)
|
||||
tm.assert_numpy_array_equal(np.array([2]) * td, expected)
|
||||
msg = (
|
||||
"ufunc '?multiply'? cannot use operands with types"
|
||||
r" dtype\('<m8\[ns\]'\) and dtype\('<m8\[ns\]'\)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
td * other
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
other * td
|
||||
|
||||
tm.assert_numpy_array_equal(td / other, np.array([1], dtype=np.float64))
|
||||
tm.assert_numpy_array_equal(other / td, np.array([1], dtype=np.float64))
|
||||
|
||||
# timedelta, datetime
|
||||
other = pd.to_datetime(["2000-01-01"]).values
|
||||
expected = pd.to_datetime(["2000-01-02"]).values
|
||||
tm.assert_numpy_array_equal(td + other, expected)
|
||||
tm.assert_numpy_array_equal(other + td, expected)
|
||||
|
||||
expected = pd.to_datetime(["1999-12-31"]).values
|
||||
tm.assert_numpy_array_equal(-td + other, expected)
|
||||
tm.assert_numpy_array_equal(other - td, expected)
|
||||
|
||||
def test_tdi_ops_attributes(self):
|
||||
rng = timedelta_range("2 days", periods=5, freq="2D", name="x")
|
||||
|
||||
result = rng + 1 * rng.freq
|
||||
exp = timedelta_range("4 days", periods=5, freq="2D", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == "2D"
|
||||
|
||||
result = rng - 2 * rng.freq
|
||||
exp = timedelta_range("-2 days", periods=5, freq="2D", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == "2D"
|
||||
|
||||
result = rng * 2
|
||||
exp = timedelta_range("4 days", periods=5, freq="4D", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == "4D"
|
||||
|
||||
result = rng / 2
|
||||
exp = timedelta_range("1 days", periods=5, freq="D", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == "D"
|
||||
|
||||
result = -rng
|
||||
exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x")
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq == "-2D"
|
||||
|
||||
rng = pd.timedelta_range("-2 days", periods=5, freq="D", name="x")
|
||||
|
||||
result = abs(rng)
|
||||
exp = TimedeltaIndex(
|
||||
["2 days", "1 days", "0 days", "1 days", "2 days"], name="x"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
assert result.freq is None
|
||||
@@ -0,0 +1,123 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Float64Index,
|
||||
Index,
|
||||
Int64Index,
|
||||
NaT,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaIndex:
|
||||
def test_astype_object(self):
|
||||
idx = timedelta_range(start="1 days", periods=4, freq="D", name="idx")
|
||||
expected_list = [
|
||||
Timedelta("1 days"),
|
||||
Timedelta("2 days"),
|
||||
Timedelta("3 days"),
|
||||
Timedelta("4 days"),
|
||||
]
|
||||
result = idx.astype(object)
|
||||
expected = Index(expected_list, dtype=object, name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
def test_astype_object_with_nat(self):
|
||||
idx = TimedeltaIndex(
|
||||
[timedelta(days=1), timedelta(days=2), NaT, timedelta(days=4)], name="idx"
|
||||
)
|
||||
expected_list = [
|
||||
Timedelta("1 days"),
|
||||
Timedelta("2 days"),
|
||||
NaT,
|
||||
Timedelta("4 days"),
|
||||
]
|
||||
result = idx.astype(object)
|
||||
expected = Index(expected_list, dtype=object, name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
def test_astype(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN])
|
||||
|
||||
result = idx.astype(object)
|
||||
expected = Index([Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(int)
|
||||
expected = Int64Index(
|
||||
[100000000000000] + [-9223372036854775808] * 3, dtype=np.int64
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(str)
|
||||
expected = Index(str(x) for x in idx)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
rng = timedelta_range("1 days", periods=10)
|
||||
result = rng.astype("i8")
|
||||
tm.assert_index_equal(result, Index(rng.asi8))
|
||||
tm.assert_numpy_array_equal(rng.asi8, result.values)
|
||||
|
||||
def test_astype_uint(self):
|
||||
arr = timedelta_range("1H", periods=2)
|
||||
expected = pd.UInt64Index(
|
||||
np.array([3600000000000, 90000000000000], dtype="uint64")
|
||||
)
|
||||
|
||||
tm.assert_index_equal(arr.astype("uint64"), expected)
|
||||
tm.assert_index_equal(arr.astype("uint32"), expected)
|
||||
|
||||
def test_astype_timedelta64(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN])
|
||||
|
||||
result = idx.astype("timedelta64")
|
||||
expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype("timedelta64[ns]")
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
result = idx.astype("timedelta64[ns]", copy=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is idx
|
||||
|
||||
@pytest.mark.parametrize("dtype", [float, "datetime64", "datetime64[ns]"])
|
||||
def test_astype_raises(self, dtype):
|
||||
# GH 13149, GH 13209
|
||||
idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN])
|
||||
msg = "Cannot cast TimedeltaArray to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.astype(dtype)
|
||||
|
||||
def test_astype_category(self):
|
||||
obj = pd.timedelta_range("1H", periods=2, freq="H")
|
||||
|
||||
result = obj.astype("category")
|
||||
expected = pd.CategoricalIndex([pd.Timedelta("1H"), pd.Timedelta("2H")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype("category")
|
||||
expected = expected.values
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_astype_array_fallback(self):
|
||||
obj = pd.timedelta_range("1H", periods=2)
|
||||
result = obj.astype(bool)
|
||||
expected = pd.Index(np.array([True, True]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype(bool)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,229 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Timedelta, TimedeltaIndex, timedelta_range, to_timedelta
|
||||
from pandas.core.arrays import TimedeltaArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaIndex:
|
||||
def test_verify_integrity_deprecated(self):
|
||||
# GH#23919
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
TimedeltaIndex(["1 Day"], verify_integrity=False)
|
||||
|
||||
def test_range_kwargs_deprecated(self):
|
||||
# GH#23919
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
TimedeltaIndex(start="1 Day", end="3 Days", freq="D")
|
||||
|
||||
def test_int64_nocopy(self):
|
||||
# GH#23539 check that a copy isn't made when we pass int64 data
|
||||
# and copy=False
|
||||
arr = np.arange(10, dtype=np.int64)
|
||||
tdi = TimedeltaIndex(arr, copy=False)
|
||||
assert tdi._data._data.base is arr
|
||||
|
||||
def test_infer_from_tdi(self):
|
||||
# GH#23539
|
||||
# fast-path for inferring a frequency if the passed data already
|
||||
# has one
|
||||
tdi = pd.timedelta_range("1 second", periods=10 ** 7, freq="1s")
|
||||
|
||||
result = pd.TimedeltaIndex(tdi, freq="infer")
|
||||
assert result.freq == tdi.freq
|
||||
|
||||
# check that inferred_freq was not called by checking that the
|
||||
# value has not been cached
|
||||
assert "inferred_freq" not in getattr(result, "_cache", {})
|
||||
|
||||
def test_infer_from_tdi_mismatch(self):
|
||||
# GH#23539
|
||||
# fast-path for invalidating a frequency if the passed data already
|
||||
# has one and it does not match the `freq` input
|
||||
tdi = pd.timedelta_range("1 second", periods=100, freq="1s")
|
||||
|
||||
msg = (
|
||||
"Inferred frequency .* from passed values does "
|
||||
"not conform to passed frequency"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaIndex(tdi, freq="D")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#23789
|
||||
TimedeltaArray(tdi, freq="D")
|
||||
|
||||
def test_dt64_data_invalid(self):
|
||||
# GH#23539
|
||||
# passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64]
|
||||
# does not yet, but will in the future
|
||||
dti = pd.date_range("2016-01-01", periods=3)
|
||||
|
||||
msg = "cannot be converted to timedelta64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
TimedeltaIndex(dti.tz_localize("Europe/Brussels"))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
TimedeltaIndex(dti)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
TimedeltaIndex(np.asarray(dti))
|
||||
|
||||
def test_float64_ns_rounded(self):
|
||||
# GH#23539 without specifying a unit, floats are regarded as nanos,
|
||||
# and fractional portions are truncated
|
||||
tdi = TimedeltaIndex([2.3, 9.7])
|
||||
expected = TimedeltaIndex([2, 9])
|
||||
tm.assert_index_equal(tdi, expected)
|
||||
|
||||
# integral floats are non-lossy
|
||||
tdi = TimedeltaIndex([2.0, 9.0])
|
||||
expected = TimedeltaIndex([2, 9])
|
||||
tm.assert_index_equal(tdi, expected)
|
||||
|
||||
# NaNs get converted to NaT
|
||||
tdi = TimedeltaIndex([2.0, np.nan])
|
||||
expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT])
|
||||
tm.assert_index_equal(tdi, expected)
|
||||
|
||||
def test_float64_unit_conversion(self):
|
||||
# GH#23539
|
||||
tdi = TimedeltaIndex([1.5, 2.25], unit="D")
|
||||
expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)])
|
||||
tm.assert_index_equal(tdi, expected)
|
||||
|
||||
def test_construction_base_constructor(self):
|
||||
arr = [pd.Timedelta("1 days"), pd.NaT, pd.Timedelta("3 days")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.TimedeltaIndex(np.array(arr)))
|
||||
|
||||
arr = [np.nan, pd.NaT, pd.Timedelta("1 days")]
|
||||
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
|
||||
tm.assert_index_equal(pd.Index(np.array(arr)), pd.TimedeltaIndex(np.array(arr)))
|
||||
|
||||
def test_constructor(self):
|
||||
expected = TimedeltaIndex(
|
||||
[
|
||||
"1 days",
|
||||
"1 days 00:00:05",
|
||||
"2 days",
|
||||
"2 days 00:00:02",
|
||||
"0 days 00:00:03",
|
||||
]
|
||||
)
|
||||
result = TimedeltaIndex(
|
||||
[
|
||||
"1 days",
|
||||
"1 days, 00:00:05",
|
||||
np.timedelta64(2, "D"),
|
||||
timedelta(days=2, seconds=2),
|
||||
pd.offsets.Second(3),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# unicode
|
||||
result = TimedeltaIndex(
|
||||
[
|
||||
"1 days",
|
||||
"1 days, 00:00:05",
|
||||
np.timedelta64(2, "D"),
|
||||
timedelta(days=2, seconds=2),
|
||||
pd.offsets.Second(3),
|
||||
]
|
||||
)
|
||||
|
||||
expected = TimedeltaIndex(
|
||||
["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"]
|
||||
)
|
||||
tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected)
|
||||
expected = TimedeltaIndex(
|
||||
["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"]
|
||||
)
|
||||
tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected)
|
||||
expected = TimedeltaIndex(
|
||||
["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"]
|
||||
)
|
||||
tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected)
|
||||
|
||||
def test_constructor_iso(self):
|
||||
# GH #21877
|
||||
expected = timedelta_range("1s", periods=9, freq="s")
|
||||
durations = ["P0DT0H0M{}S".format(i) for i in range(1, 10)]
|
||||
result = to_timedelta(durations)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
rng = timedelta_range("1 days", periods=10.5)
|
||||
exp = timedelta_range("1 days", periods=10)
|
||||
tm.assert_index_equal(rng, exp)
|
||||
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
timedelta_range(start="1 days", periods="foo", freq="D")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
TimedeltaIndex(start="1 days", end="10 days")
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
TimedeltaIndex("1 days")
|
||||
|
||||
# generator expression
|
||||
gen = (timedelta(i) for i in range(10))
|
||||
result = TimedeltaIndex(gen)
|
||||
expected = TimedeltaIndex([timedelta(i) for i in range(10)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NumPy string array
|
||||
strings = np.array(["1 days", "2 days", "3 days"])
|
||||
result = TimedeltaIndex(strings)
|
||||
expected = to_timedelta([1, 2, 3], unit="d")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
from_ints = TimedeltaIndex(expected.asi8)
|
||||
tm.assert_index_equal(from_ints, expected)
|
||||
|
||||
# non-conforming freq
|
||||
msg = (
|
||||
"Inferred frequency None from passed values does not conform to"
|
||||
" passed frequency D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D")
|
||||
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, exactly"
|
||||
" three must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range(periods=10, freq="D")
|
||||
|
||||
def test_constructor_name(self):
|
||||
idx = timedelta_range(start="1 days", periods=1, freq="D", name="TEST")
|
||||
assert idx.name == "TEST"
|
||||
|
||||
# GH10025
|
||||
idx2 = TimedeltaIndex(idx, name="something else")
|
||||
assert idx2.name == "something else"
|
||||
|
||||
def test_constructor_no_precision_warns(self):
|
||||
# GH-24753, GH-24739
|
||||
expected = pd.TimedeltaIndex(["2000"], dtype="timedelta64[ns]")
|
||||
|
||||
# we set the stacklevel for DatetimeIndex
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = pd.TimedeltaIndex(["2000"], dtype="timedelta64")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = pd.Index(["2000"], dtype="timedelta64")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_wrong_precision_raises(self):
|
||||
with pytest.raises(ValueError):
|
||||
pd.TimedeltaIndex(["2000"], dtype="timedelta64[us]")
|
||||
@@ -0,0 +1,90 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import TimedeltaIndex
|
||||
|
||||
|
||||
class TestTimedeltaIndexRendering:
|
||||
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
|
||||
def test_representation(self, method):
|
||||
idx1 = TimedeltaIndex([], freq="D")
|
||||
idx2 = TimedeltaIndex(["1 days"], freq="D")
|
||||
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
|
||||
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
|
||||
idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"])
|
||||
|
||||
exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')"
|
||||
|
||||
exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"
|
||||
|
||||
exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"
|
||||
|
||||
exp4 = (
|
||||
"TimedeltaIndex(['1 days', '2 days', '3 days'], "
|
||||
"dtype='timedelta64[ns]', freq='D')"
|
||||
)
|
||||
|
||||
exp5 = (
|
||||
"TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', "
|
||||
"'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]
|
||||
):
|
||||
result = getattr(idx, method)()
|
||||
assert result == expected
|
||||
|
||||
def test_representation_to_series(self):
|
||||
idx1 = TimedeltaIndex([], freq="D")
|
||||
idx2 = TimedeltaIndex(["1 days"], freq="D")
|
||||
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
|
||||
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
|
||||
idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"])
|
||||
|
||||
exp1 = """Series([], dtype: timedelta64[ns])"""
|
||||
|
||||
exp2 = "0 1 days\ndtype: timedelta64[ns]"
|
||||
|
||||
exp3 = "0 1 days\n1 2 days\ndtype: timedelta64[ns]"
|
||||
|
||||
exp4 = "0 1 days\n1 2 days\n2 3 days\ndtype: timedelta64[ns]"
|
||||
|
||||
exp5 = (
|
||||
"0 1 days 00:00:01\n"
|
||||
"1 2 days 00:00:00\n"
|
||||
"2 3 days 00:00:00\n"
|
||||
"dtype: timedelta64[ns]"
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]
|
||||
):
|
||||
result = repr(pd.Series(idx))
|
||||
assert result == expected
|
||||
|
||||
def test_summary(self):
|
||||
# GH#9116
|
||||
idx1 = TimedeltaIndex([], freq="D")
|
||||
idx2 = TimedeltaIndex(["1 days"], freq="D")
|
||||
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
|
||||
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
|
||||
idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"])
|
||||
|
||||
exp1 = "TimedeltaIndex: 0 entries\nFreq: D"
|
||||
|
||||
exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D"
|
||||
|
||||
exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D"
|
||||
|
||||
exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D"
|
||||
|
||||
exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00"
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]
|
||||
):
|
||||
result = idx._summary()
|
||||
assert result == expected
|
||||
@@ -0,0 +1,345 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_ellipsis(self):
|
||||
# GH#21282
|
||||
idx = timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
|
||||
result = idx[...]
|
||||
assert result.equals(idx)
|
||||
assert result is not idx
|
||||
|
||||
def test_getitem(self):
|
||||
idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
|
||||
for idx in [idx1]:
|
||||
result = idx[0]
|
||||
assert result == Timedelta("1 day")
|
||||
|
||||
result = idx[0:5]
|
||||
expected = timedelta_range("1 day", "5 day", freq="D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[0:10:2]
|
||||
expected = timedelta_range("1 day", "9 day", freq="2D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[-20:-5:3]
|
||||
expected = timedelta_range("12 day", "24 day", freq="3D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[4::-1]
|
||||
expected = TimedeltaIndex(
|
||||
["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key",
|
||||
[pd.Timestamp("1970-01-01"), pd.Timestamp("1970-01-02"), datetime(1970, 1, 1)],
|
||||
)
|
||||
def test_timestamp_invalid_key(self, key):
|
||||
# GH#20464
|
||||
tdi = pd.timedelta_range(0, periods=10)
|
||||
with pytest.raises(TypeError):
|
||||
tdi.get_loc(key)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
# placeholder for symmetry with DatetimeIndex and PeriodIndex tests
|
||||
pass
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take(self):
|
||||
# GH 10295
|
||||
idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
|
||||
for idx in [idx1]:
|
||||
result = idx.take([0])
|
||||
assert result == Timedelta("1 day")
|
||||
|
||||
result = idx.take([-1])
|
||||
assert result == Timedelta("31 day")
|
||||
|
||||
result = idx.take([0, 1, 2])
|
||||
expected = timedelta_range("1 day", "3 day", freq="D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([0, 2, 4])
|
||||
expected = timedelta_range("1 day", "5 day", freq="2D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([7, 4, 1])
|
||||
expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([3, 2, 5])
|
||||
expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
result = idx.take([-3, 2, 5])
|
||||
expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
indices = [1, 6, 5, 9, 10, 13, 15, 3]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
# TODO: This method came from test_timedelta; de-dup with version above
|
||||
def test_take2(self):
|
||||
tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"]
|
||||
idx = timedelta_range(start="1d", end="2d", freq="H", name="idx")
|
||||
expected = TimedeltaIndex(tds, freq=None, name="idx")
|
||||
|
||||
taken1 = idx.take([2, 4, 10])
|
||||
taken2 = idx[[2, 4, 10]]
|
||||
|
||||
for taken in [taken1, taken2]:
|
||||
tm.assert_index_equal(taken, expected)
|
||||
assert isinstance(taken, TimedeltaIndex)
|
||||
assert taken.freq is None
|
||||
assert taken.name == expected.name
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# GH 12631
|
||||
idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
class TestTimedeltaIndex:
|
||||
def test_insert(self):
|
||||
|
||||
idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
|
||||
|
||||
result = idx.insert(2, timedelta(days=5))
|
||||
exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx")
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# insertion of non-datetime should coerce to object index
|
||||
result = idx.insert(1, "inserted")
|
||||
expected = Index(
|
||||
[Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")],
|
||||
name="idx",
|
||||
)
|
||||
assert not isinstance(result, TimedeltaIndex)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx")
|
||||
|
||||
# preserve freq
|
||||
expected_0 = TimedeltaIndex(
|
||||
["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
|
||||
name="idx",
|
||||
freq="s",
|
||||
)
|
||||
expected_3 = TimedeltaIndex(
|
||||
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"],
|
||||
name="idx",
|
||||
freq="s",
|
||||
)
|
||||
|
||||
# reset freq to None
|
||||
expected_1_nofreq = TimedeltaIndex(
|
||||
["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
)
|
||||
expected_3_nofreq = TimedeltaIndex(
|
||||
["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
)
|
||||
|
||||
cases = [
|
||||
(0, Timedelta("1day"), expected_0),
|
||||
(-3, Timedelta("1day"), expected_0),
|
||||
(3, Timedelta("1day 00:00:04"), expected_3),
|
||||
(1, Timedelta("1day 00:00:01"), expected_1_nofreq),
|
||||
(3, Timedelta("1day 00:00:05"), expected_3_nofreq),
|
||||
]
|
||||
|
||||
for n, d, expected in cases:
|
||||
result = idx.insert(n, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# GH 18295 (test missing)
|
||||
expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"])
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = timedelta_range("1day", "3day").insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_delete(self):
|
||||
idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx")
|
||||
|
||||
# prserve freq
|
||||
expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx")
|
||||
expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx")
|
||||
|
||||
# reset freq to None
|
||||
expected_1 = TimedeltaIndex(
|
||||
["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx"
|
||||
)
|
||||
|
||||
cases = {
|
||||
0: expected_0,
|
||||
-5: expected_0,
|
||||
-1: expected_4,
|
||||
4: expected_4,
|
||||
1: expected_1,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
with pytest.raises((IndexError, ValueError)):
|
||||
# either depending on numpy version
|
||||
idx.delete(5)
|
||||
|
||||
def test_delete_slice(self):
|
||||
idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx")
|
||||
|
||||
# prserve freq
|
||||
expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx")
|
||||
expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx")
|
||||
|
||||
# reset freq to None
|
||||
expected_3_5 = TimedeltaIndex(
|
||||
["1 d", "2 d", "3 d", "7 d", "8 d", "9 d", "10d"], freq=None, name="idx"
|
||||
)
|
||||
|
||||
cases = {
|
||||
(0, 1, 2): expected_0_2,
|
||||
(7, 8, 9): expected_7_9,
|
||||
(3, 4, 5): expected_3_5,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.delete(slice(n[0], n[-1] + 1))
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
def test_get_loc(self):
|
||||
idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
|
||||
|
||||
for method in [None, "pad", "backfill", "nearest"]:
|
||||
assert idx.get_loc(idx[1], method) == 1
|
||||
assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1
|
||||
assert idx.get_loc(str(idx[1]), method) == 1
|
||||
|
||||
assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1
|
||||
assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1
|
||||
assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1
|
||||
|
||||
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
|
||||
idx.get_loc(idx[1], method="nearest", tolerance="foo")
|
||||
|
||||
with pytest.raises(ValueError, match="tolerance size must match"):
|
||||
idx.get_loc(
|
||||
idx[1],
|
||||
method="nearest",
|
||||
tolerance=[
|
||||
Timedelta(0).to_timedelta64(),
|
||||
Timedelta(0).to_timedelta64(),
|
||||
],
|
||||
)
|
||||
|
||||
for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]:
|
||||
assert idx.get_loc("1 day 1 hour", method) == loc
|
||||
|
||||
# GH 16909
|
||||
assert idx.get_loc(idx[1].to_timedelta64()) == 1
|
||||
|
||||
# GH 16896
|
||||
assert idx.get_loc("0 days") == 0
|
||||
|
||||
def test_get_loc_nat(self):
|
||||
tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"])
|
||||
|
||||
assert tidx.get_loc(pd.NaT) == 1
|
||||
assert tidx.get_loc(None) == 1
|
||||
assert tidx.get_loc(float("nan")) == 1
|
||||
assert tidx.get_loc(np.nan) == 1
|
||||
|
||||
def test_get_indexer(self):
|
||||
idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
|
||||
target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
||||
)
|
||||
|
||||
res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour"))
|
||||
tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))
|
||||
@@ -0,0 +1,303 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCDateOffset
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, TimedeltaIndex, timedelta_range
|
||||
from pandas.tests.test_base import Ops
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day, Hour
|
||||
|
||||
|
||||
class TestTimedeltaIndexOps(Ops):
|
||||
def setup_method(self, method):
|
||||
super().setup_method(method)
|
||||
mask = lambda x: isinstance(x, TimedeltaIndex)
|
||||
self.is_valid_objs = [o for o in self.objs if mask(o)]
|
||||
self.not_valid_objs = []
|
||||
|
||||
def test_ops_properties(self):
|
||||
f = lambda x: isinstance(x, TimedeltaIndex)
|
||||
self.check_ops_properties(TimedeltaIndex._field_ops, f)
|
||||
self.check_ops_properties(TimedeltaIndex._object_ops, f)
|
||||
|
||||
def test_value_counts_unique(self):
|
||||
# GH 7735
|
||||
|
||||
idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
|
||||
|
||||
exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
idx = TimedeltaIndex(
|
||||
[
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 08:00:00",
|
||||
"1 days 08:00:00",
|
||||
pd.NaT,
|
||||
]
|
||||
)
|
||||
|
||||
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
|
||||
expected = Series([3, 2], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
|
||||
expected = Series([3, 2, 1], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
||||
|
||||
def test_nonunique_contains(self):
|
||||
# GH 9512
|
||||
for idx in map(
|
||||
TimedeltaIndex,
|
||||
(
|
||||
[0, 1, 0],
|
||||
[0, 0, -1],
|
||||
[0, -1, -1],
|
||||
["00:01:00", "00:01:00", "00:02:00"],
|
||||
["00:01:00", "00:01:00", "00:00:01"],
|
||||
),
|
||||
):
|
||||
assert idx[0] in idx
|
||||
|
||||
def test_unknown_attribute(self):
|
||||
# see gh-9680
|
||||
tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
|
||||
ts = pd.Series(np.random.normal(size=10), index=tdi)
|
||||
assert "foo" not in ts.__dict__.keys()
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ts.foo
|
||||
|
||||
def test_order(self):
|
||||
# GH 10295
|
||||
idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
|
||||
idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")
|
||||
|
||||
for idx in [idx1, idx2]:
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
assert ordered.freq == idx.freq
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq.n == -1
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
||||
assert ordered.freq == idx.freq
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, idx[::-1])
|
||||
assert ordered.freq == expected.freq
|
||||
assert ordered.freq.n == -1
|
||||
|
||||
idx1 = TimedeltaIndex(
|
||||
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
||||
)
|
||||
exp1 = TimedeltaIndex(
|
||||
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
||||
)
|
||||
|
||||
idx2 = TimedeltaIndex(
|
||||
["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
|
||||
)
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
|
||||
# '3 day', '5 day'], name='idx2')
|
||||
|
||||
# idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
|
||||
# '2 minute', pd.NaT], name='idx3')
|
||||
# exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
|
||||
# '5 minute'], name='idx3')
|
||||
|
||||
for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq is None
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 4, 0])
|
||||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
||||
assert ordered.freq is None
|
||||
|
||||
def test_drop_duplicates_metadata(self):
|
||||
# GH 10115
|
||||
idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
assert idx_dup.freq is None # freq is reset
|
||||
result = idx_dup.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert result.freq is None
|
||||
|
||||
def test_drop_duplicates(self):
|
||||
# to check Index/Series compat
|
||||
base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
|
||||
idx = base.append(base[:5])
|
||||
|
||||
res = idx.drop_duplicates()
|
||||
tm.assert_index_equal(res, base)
|
||||
res = Series(idx).drop_duplicates()
|
||||
tm.assert_series_equal(res, Series(base))
|
||||
|
||||
res = idx.drop_duplicates(keep="last")
|
||||
exp = base[5:].append(base[:5])
|
||||
tm.assert_index_equal(res, exp)
|
||||
res = Series(idx).drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
|
||||
|
||||
res = idx.drop_duplicates(keep=False)
|
||||
tm.assert_index_equal(res, base[5:])
|
||||
res = Series(idx).drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
|
||||
)
|
||||
def test_infer_freq(self, freq):
|
||||
# GH#11018
|
||||
idx = pd.timedelta_range("1", freq=freq, periods=10)
|
||||
result = pd.TimedeltaIndex(idx.asi8, freq="infer")
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert result.freq == freq
|
||||
|
||||
def test_shift(self):
|
||||
pass # handled in test_arithmetic.py
|
||||
|
||||
def test_repeat(self):
|
||||
index = pd.timedelta_range("1 days", periods=2, freq="D")
|
||||
exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
|
||||
for res in [index.repeat(2), np.repeat(index, 2)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
index = TimedeltaIndex(["1 days", "NaT", "3 days"])
|
||||
exp = TimedeltaIndex(
|
||||
[
|
||||
"1 days",
|
||||
"1 days",
|
||||
"1 days",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"3 days",
|
||||
"3 days",
|
||||
"3 days",
|
||||
]
|
||||
)
|
||||
for res in [index.repeat(3), np.repeat(index, 3)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
def test_nat(self):
|
||||
assert pd.TimedeltaIndex._na_value is pd.NaT
|
||||
assert pd.TimedeltaIndex([])._na_value is pd.NaT
|
||||
|
||||
idx = pd.TimedeltaIndex(["1 days", "2 days"])
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
|
||||
|
||||
idx = pd.TimedeltaIndex(["1 days", "NaT"])
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
|
||||
|
||||
def test_equals(self):
|
||||
# GH 13107
|
||||
idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"])
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.astype(object).equals(idx2.astype(object))
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
@pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
|
||||
@pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
|
||||
def test_freq_setter(self, values, freq):
|
||||
# GH 20678
|
||||
idx = TimedeltaIndex(values)
|
||||
|
||||
# can set to an offset, converting from string if necessary
|
||||
idx.freq = freq
|
||||
assert idx.freq == freq
|
||||
assert isinstance(idx.freq, ABCDateOffset)
|
||||
|
||||
# can reset to None
|
||||
idx.freq = None
|
||||
assert idx.freq is None
|
||||
|
||||
def test_freq_setter_errors(self):
|
||||
# GH 20678
|
||||
idx = TimedeltaIndex(["0 days", "2 days", "4 days"])
|
||||
|
||||
# setting with an incompatible freq
|
||||
msg = (
|
||||
"Inferred frequency 2D from passed values does not conform to "
|
||||
"passed frequency 5D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.freq = "5D"
|
||||
|
||||
# setting with a non-fixed frequency
|
||||
msg = r"<2 \* BusinessDays> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.freq = "2B"
|
||||
|
||||
# setting with non-freq string
|
||||
with pytest.raises(ValueError, match="Invalid frequency"):
|
||||
idx.freq = "foo"
|
||||
@@ -0,0 +1,90 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, Timedelta, timedelta_range
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSlicing:
|
||||
def test_slice_keeps_name(self):
|
||||
# GH4226
|
||||
dr = pd.timedelta_range("1d", "5d", freq="H", name="timebucket")
|
||||
assert dr[1:].name == dr.name
|
||||
|
||||
def test_partial_slice(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["5 day":"6 day"]
|
||||
expected = s.iloc[86:134]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s["5 day":]
|
||||
expected = s.iloc[86:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s[:"6 day"]
|
||||
expected = s.iloc[:134]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s["6 days, 23:11:12"]
|
||||
assert result == s.iloc[133]
|
||||
|
||||
msg = r"^Timedelta\('50 days 00:00:00'\)$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s["50 days"]
|
||||
|
||||
def test_partial_slice_high_reso(self):
|
||||
|
||||
# higher reso
|
||||
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["1 day 10:11:12":]
|
||||
expected = s.iloc[0:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s["1 day 10:11:12.001":]
|
||||
expected = s.iloc[1000:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s["1 days, 10:11:12.001001"]
|
||||
assert result == s.iloc[1001]
|
||||
|
||||
def test_slice_with_negative_step(self):
|
||||
ts = Series(np.arange(20), timedelta_range("0", periods=20, freq="H"))
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
assert_series_equal(ts[l_slc], ts.iloc[i_slc])
|
||||
assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[Timedelta(hours=7) :: -1], SLC[7::-1])
|
||||
assert_slices_equivalent(SLC["7 hours"::-1], SLC[7::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[: Timedelta(hours=7) : -1], SLC[:6:-1])
|
||||
assert_slices_equivalent(SLC[:"7 hours":-1], SLC[:6:-1])
|
||||
|
||||
assert_slices_equivalent(SLC["15 hours":"7 hours":-1], SLC[15:6:-1])
|
||||
assert_slices_equivalent(
|
||||
SLC[Timedelta(hours=15) : Timedelta(hours=7) : -1], SLC[15:6:-1]
|
||||
)
|
||||
assert_slices_equivalent(
|
||||
SLC["15 hours" : Timedelta(hours=7) : -1], SLC[15:6:-1]
|
||||
)
|
||||
assert_slices_equivalent(
|
||||
SLC[Timedelta(hours=15) : "7 hours" : -1], SLC[15:6:-1]
|
||||
)
|
||||
|
||||
assert_slices_equivalent(SLC["7 hours":"15 hours":-1], SLC[:0])
|
||||
|
||||
def test_slice_with_zero_step_raises(self):
|
||||
ts = Series(np.arange(20), timedelta_range("0", periods=20, freq="H"))
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
ts.loc[::0]
|
||||
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Tests for TimedeltaIndex methods behaving like their Timedelta counterparts
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series, Timedelta, TimedeltaIndex, timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestVectorizedTimedelta:
|
||||
def test_tdi_total_seconds(self):
|
||||
# GH#10939
|
||||
# test index
|
||||
rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s")
|
||||
expt = [
|
||||
1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9,
|
||||
1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456.0 / 1e9,
|
||||
]
|
||||
tm.assert_almost_equal(rng.total_seconds(), Index(expt))
|
||||
|
||||
# test Series
|
||||
ser = Series(rng)
|
||||
s_expt = Series(expt, index=[0, 1])
|
||||
tm.assert_series_equal(ser.dt.total_seconds(), s_expt)
|
||||
|
||||
# with nat
|
||||
ser[1] = np.nan
|
||||
s_expt = Series(
|
||||
[1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, np.nan],
|
||||
index=[0, 1],
|
||||
)
|
||||
tm.assert_series_equal(ser.dt.total_seconds(), s_expt)
|
||||
|
||||
# with both nat
|
||||
ser = Series([np.nan, np.nan], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(
|
||||
ser.dt.total_seconds(), Series([np.nan, np.nan], index=[0, 1])
|
||||
)
|
||||
|
||||
def test_tdi_round(self):
|
||||
td = pd.timedelta_range(start="16801 days", periods=5, freq="30Min")
|
||||
elt = td[1]
|
||||
|
||||
expected_rng = TimedeltaIndex(
|
||||
[
|
||||
Timedelta("16801 days 00:00:00"),
|
||||
Timedelta("16801 days 00:00:00"),
|
||||
Timedelta("16801 days 01:00:00"),
|
||||
Timedelta("16801 days 02:00:00"),
|
||||
Timedelta("16801 days 02:00:00"),
|
||||
]
|
||||
)
|
||||
expected_elt = expected_rng[1]
|
||||
|
||||
tm.assert_index_equal(td.round(freq="H"), expected_rng)
|
||||
assert elt.round(freq="H") == expected_elt
|
||||
|
||||
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
td.round(freq="foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="foo")
|
||||
|
||||
msg = "<MonthEnd> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
td.round(freq="M")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="M")
|
||||
@@ -0,0 +1,181 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Int64Index, TimedeltaIndex, timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Hour
|
||||
|
||||
|
||||
class TestTimedeltaIndex:
|
||||
def test_union(self):
|
||||
|
||||
i1 = timedelta_range("1day", periods=5)
|
||||
i2 = timedelta_range("3day", periods=5)
|
||||
result = i1.union(i2)
|
||||
expected = timedelta_range("1day", periods=7)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i1 = Int64Index(np.arange(0, 20, 2))
|
||||
i2 = timedelta_range(start="1 day", periods=10, freq="D")
|
||||
i1.union(i2) # Works
|
||||
i2.union(i1) # Fails with "AttributeError: can't set attribute"
|
||||
|
||||
def test_union_coverage(self):
|
||||
|
||||
idx = TimedeltaIndex(["3d", "1d", "2d"])
|
||||
ordered = TimedeltaIndex(idx.sort_values(), freq="infer")
|
||||
result = ordered.union(idx)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
|
||||
result = ordered[:0].union(ordered)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
assert result.freq == ordered.freq
|
||||
|
||||
def test_union_bug_1730(self):
|
||||
|
||||
rng_a = timedelta_range("1 day", periods=4, freq="3H")
|
||||
rng_b = timedelta_range("1 day", periods=4, freq="4H")
|
||||
|
||||
result = rng_a.union(rng_b)
|
||||
exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_union_bug_1745(self):
|
||||
|
||||
left = TimedeltaIndex(["1 day 15:19:49.695000"])
|
||||
right = TimedeltaIndex(
|
||||
["2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000"]
|
||||
)
|
||||
|
||||
result = left.union(right)
|
||||
exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_union_bug_4564(self):
|
||||
|
||||
left = timedelta_range("1 day", "30d")
|
||||
right = left + pd.offsets.Minute(15)
|
||||
|
||||
result = left.union(right)
|
||||
exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_intersection_bug_1708(self):
|
||||
index_1 = timedelta_range("1 day", periods=4, freq="h")
|
||||
index_2 = index_1 + pd.offsets.Hour(5)
|
||||
|
||||
result = index_1 & index_2
|
||||
assert len(result) == 0
|
||||
|
||||
index_1 = timedelta_range("1 day", periods=4, freq="h")
|
||||
index_2 = index_1 + pd.offsets.Hour(1)
|
||||
|
||||
result = index_1 & index_2
|
||||
expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection_equal(self, sort):
|
||||
# GH 24471 Test intersection outcome given the sort keyword
|
||||
# for equal indicies intersection should return the original index
|
||||
first = timedelta_range("1 day", periods=4, freq="h")
|
||||
second = timedelta_range("1 day", periods=4, freq="h")
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(intersect, second.sort_values())
|
||||
assert tm.equalContents(intersect, second)
|
||||
|
||||
# Corner cases
|
||||
inter = first.intersection(first, sort=sort)
|
||||
assert inter is first
|
||||
|
||||
@pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)])
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection_zero_length(self, period_1, period_2, sort):
|
||||
# GH 24471 test for non overlap the intersection should be zero length
|
||||
index_1 = timedelta_range("1 day", periods=period_1, freq="h")
|
||||
index_2 = timedelta_range("1 day", periods=period_2, freq="h")
|
||||
expected = timedelta_range("1 day", periods=0, freq="h")
|
||||
result = index_1.intersection(index_2, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_zero_length_input_index(self, sort):
|
||||
# GH 24966 test for 0-len intersections are copied
|
||||
index_1 = timedelta_range("1 day", periods=0, freq="h")
|
||||
index_2 = timedelta_range("1 day", periods=3, freq="h")
|
||||
result = index_1.intersection(index_2, sort=sort)
|
||||
assert index_1 is not result
|
||||
assert index_2 is not result
|
||||
tm.assert_copy(result, index_1)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rng, expected",
|
||||
# if target has the same name, it is preserved
|
||||
[
|
||||
(
|
||||
timedelta_range("1 day", periods=5, freq="h", name="idx"),
|
||||
timedelta_range("1 day", periods=4, freq="h", name="idx"),
|
||||
),
|
||||
# if target name is different, it will be reset
|
||||
(
|
||||
timedelta_range("1 day", periods=5, freq="h", name="other"),
|
||||
timedelta_range("1 day", periods=4, freq="h", name=None),
|
||||
),
|
||||
# if no overlap exists return empty index
|
||||
(
|
||||
timedelta_range("1 day", periods=10, freq="h", name="idx")[5:],
|
||||
TimedeltaIndex([], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection(self, rng, expected, sort):
|
||||
# GH 4690 (with tz)
|
||||
base = timedelta_range("1 day", periods=4, freq="h", name="idx")
|
||||
result = base.intersection(rng, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rng, expected",
|
||||
# part intersection works
|
||||
[
|
||||
(
|
||||
TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"], name="idx"),
|
||||
TimedeltaIndex(["2 hour", "4 hour"], name="idx"),
|
||||
),
|
||||
# reordered part intersection
|
||||
(
|
||||
TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"], name="other"),
|
||||
TimedeltaIndex(["1 hour", "2 hour"], name=None),
|
||||
),
|
||||
# reveresed index
|
||||
(
|
||||
TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")[
|
||||
::-1
|
||||
],
|
||||
TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_intersection_non_monotonic(self, rng, expected, sort):
|
||||
# 24471 non-monotonic
|
||||
base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")
|
||||
result = base.intersection(rng, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
# if reveresed order, frequency is still the same
|
||||
if all(base == rng[::-1]) and sort is None:
|
||||
assert isinstance(result.freq, Hour)
|
||||
else:
|
||||
assert result.freq is None
|
||||
@@ -0,0 +1,356 @@
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Int64Index,
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_index_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
from ..datetimelike import DatetimeLike
|
||||
|
||||
randn = np.random.randn
|
||||
|
||||
|
||||
class TestTimedeltaIndex(DatetimeLike):
|
||||
_holder = TimedeltaIndex
|
||||
|
||||
def setup_method(self, method):
|
||||
self.indices = dict(index=tm.makeTimedeltaIndex(10))
|
||||
self.setup_indices()
|
||||
|
||||
def create_index(self):
|
||||
return pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1)
|
||||
|
||||
def test_numeric_compat(self):
|
||||
# Dummy method to override super's version; this test is now done
|
||||
# in test_arithmetic.py
|
||||
pass
|
||||
|
||||
def test_shift(self):
|
||||
pass # this is handled in test_arithmetic.py
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
pass
|
||||
|
||||
def test_fillna_timedelta(self):
|
||||
# GH 11343
|
||||
idx = pd.TimedeltaIndex(["1 day", pd.NaT, "3 day"])
|
||||
|
||||
exp = pd.TimedeltaIndex(["1 day", "2 day", "3 day"])
|
||||
tm.assert_index_equal(idx.fillna(pd.Timedelta("2 day")), exp)
|
||||
|
||||
exp = pd.TimedeltaIndex(["1 day", "3 hour", "3 day"])
|
||||
idx.fillna(pd.Timedelta("3 hour"))
|
||||
|
||||
exp = pd.Index(
|
||||
[pd.Timedelta("1 day"), "x", pd.Timedelta("3 day")], dtype=object
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_freq(self, sort):
|
||||
# GH14323: Difference of TimedeltaIndex should not preserve frequency
|
||||
|
||||
index = timedelta_range("0 days", "5 days", freq="D")
|
||||
|
||||
other = timedelta_range("1 days", "4 days", freq="D")
|
||||
expected = TimedeltaIndex(["0 days", "5 days"], freq=None)
|
||||
idx_diff = index.difference(other, sort)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
other = timedelta_range("2 days", "5 days", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_difference_sort(self, sort):
|
||||
|
||||
index = pd.TimedeltaIndex(
|
||||
["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"]
|
||||
)
|
||||
|
||||
other = timedelta_range("1 days", "4 days", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
|
||||
expected = TimedeltaIndex(["5 days", "0 days"], freq=None)
|
||||
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
other = timedelta_range("2 days", "5 days", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
expected = TimedeltaIndex(["1 days", "0 days"], freq=None)
|
||||
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
def test_isin(self):
|
||||
|
||||
index = tm.makeTimedeltaIndex(4)
|
||||
result = index.isin(index)
|
||||
assert result.all()
|
||||
|
||||
result = index.isin(list(index))
|
||||
assert result.all()
|
||||
|
||||
assert_almost_equal(
|
||||
index.isin([index[2], 5]), np.array([False, False, True, False])
|
||||
)
|
||||
|
||||
def test_factorize(self):
|
||||
idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"])
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
|
||||
exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"])
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
arr, idx = idx1.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
|
||||
# freq must be preserved
|
||||
idx3 = timedelta_range("1 day", periods=4, freq="s")
|
||||
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
|
||||
arr, idx = idx3.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, idx3)
|
||||
|
||||
def test_join_self(self, join_type):
|
||||
index = timedelta_range("1 day", periods=10)
|
||||
joined = index.join(index, how=join_type)
|
||||
tm.assert_index_equal(index, joined)
|
||||
|
||||
def test_does_not_convert_mixed_integer(self):
|
||||
df = tm.makeCustomDataframe(
|
||||
10,
|
||||
10,
|
||||
data_gen_f=lambda *args, **kwargs: randn(),
|
||||
r_idx_type="i",
|
||||
c_idx_type="td",
|
||||
)
|
||||
str(df)
|
||||
|
||||
cols = df.columns.join(df.index, how="outer")
|
||||
joined = cols.join(df.columns)
|
||||
assert cols.dtype == np.dtype("O")
|
||||
assert cols.dtype == joined.dtype
|
||||
tm.assert_index_equal(cols, joined)
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
idx = TimedeltaIndex(["4d", "1d", "2d"])
|
||||
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic
|
||||
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]), check_dtype=False)
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False)
|
||||
|
||||
def test_get_duplicates(self):
|
||||
idx = TimedeltaIndex(["1 day", "2 day", "2 day", "3 day", "3day", "4day"])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
result = idx.get_duplicates()
|
||||
|
||||
ex = TimedeltaIndex(["2 day", "3day"])
|
||||
tm.assert_index_equal(result, ex)
|
||||
|
||||
def test_argmin_argmax(self):
|
||||
idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"])
|
||||
assert idx.argmin() == 1
|
||||
assert idx.argmax() == 0
|
||||
|
||||
def test_misc_coverage(self):
|
||||
|
||||
rng = timedelta_range("1 day", periods=5)
|
||||
result = rng.groupby(rng.days)
|
||||
assert isinstance(list(result.values())[0][0], Timedelta)
|
||||
|
||||
idx = TimedeltaIndex(["3d", "1d", "2d"])
|
||||
assert not idx.equals(list(idx))
|
||||
|
||||
non_td = Index(list("abc"))
|
||||
assert not idx.equals(list(non_td))
|
||||
|
||||
def test_map(self):
|
||||
# test_map_dictlike generally tests
|
||||
|
||||
rng = timedelta_range("1 day", periods=10)
|
||||
|
||||
f = lambda x: x.days
|
||||
result = rng.map(f)
|
||||
exp = Int64Index([f(x) for x in rng])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_pass_TimedeltaIndex_to_index(self):
|
||||
|
||||
rng = timedelta_range("1 days", "10 days")
|
||||
idx = Index(rng, dtype=object)
|
||||
|
||||
expected = Index(rng.to_pytimedelta(), dtype=object)
|
||||
|
||||
tm.assert_numpy_array_equal(idx.values, expected.values)
|
||||
|
||||
def test_pickle(self):
|
||||
|
||||
rng = timedelta_range("1 days", periods=10)
|
||||
rng_p = tm.round_trip_pickle(rng)
|
||||
tm.assert_index_equal(rng, rng_p)
|
||||
|
||||
def test_hash_error(self):
|
||||
index = timedelta_range("1 days", periods=10)
|
||||
with pytest.raises(
|
||||
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
|
||||
):
|
||||
hash(index)
|
||||
|
||||
def test_append_join_nondatetimeindex(self):
|
||||
rng = timedelta_range("1 days", periods=10)
|
||||
idx = Index(["a", "b", "c", "d"])
|
||||
|
||||
result = rng.append(idx)
|
||||
assert isinstance(result[0], Timedelta)
|
||||
|
||||
# it works
|
||||
rng.join(idx, how="outer")
|
||||
|
||||
def test_append_numpy_bug_1681(self):
|
||||
|
||||
td = timedelta_range("1 days", "10 days", freq="2D")
|
||||
a = DataFrame()
|
||||
c = DataFrame({"A": "foo", "B": td}, index=td)
|
||||
str(c)
|
||||
|
||||
result = a.append(c)
|
||||
assert (result["B"] == td).all()
|
||||
|
||||
def test_fields(self):
|
||||
rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s")
|
||||
tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64"))
|
||||
tm.assert_index_equal(
|
||||
rng.seconds,
|
||||
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"),
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64")
|
||||
)
|
||||
tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64"))
|
||||
|
||||
msg = "'TimedeltaIndex' object has no attribute '{}'"
|
||||
with pytest.raises(AttributeError, match=msg.format("hours")):
|
||||
rng.hours
|
||||
with pytest.raises(AttributeError, match=msg.format("minutes")):
|
||||
rng.minutes
|
||||
with pytest.raises(AttributeError, match=msg.format("milliseconds")):
|
||||
rng.milliseconds
|
||||
|
||||
# with nat
|
||||
s = Series(rng)
|
||||
s[1] = np.nan
|
||||
|
||||
tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1]))
|
||||
tm.assert_series_equal(
|
||||
s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])
|
||||
)
|
||||
|
||||
# preserve name (GH15589)
|
||||
rng.name = "name"
|
||||
assert rng.days.name == "name"
|
||||
|
||||
def test_freq_conversion(self):
|
||||
|
||||
# doc example
|
||||
|
||||
# series
|
||||
td = Series(date_range("20130101", periods=4)) - Series(
|
||||
date_range("20121201", periods=4)
|
||||
)
|
||||
td[2] += timedelta(minutes=5, seconds=3)
|
||||
td[3] = np.nan
|
||||
|
||||
result = td / np.timedelta64(1, "D")
|
||||
expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = td.astype("timedelta64[D]")
|
||||
expected = Series([31, 31, 31, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = td / np.timedelta64(1, "s")
|
||||
expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = td.astype("timedelta64[s]")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# tdi
|
||||
td = TimedeltaIndex(td)
|
||||
|
||||
result = td / np.timedelta64(1, "D")
|
||||
expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
result = td.astype("timedelta64[D]")
|
||||
expected = Index([31, 31, 31, np.nan])
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
result = td / np.timedelta64(1, "s")
|
||||
expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan])
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
result = td.astype("timedelta64[s]")
|
||||
assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("unit", ["Y", "y", "M"])
|
||||
def test_unit_m_y_deprecated(self, unit):
|
||||
with tm.assert_produces_warning(FutureWarning) as w:
|
||||
TimedeltaIndex([1, 3, 7], unit)
|
||||
msg = r".* units are deprecated .*"
|
||||
assert re.match(msg, str(w[0].message))
|
||||
|
||||
|
||||
class TestTimeSeries:
|
||||
def test_series_box_timedelta(self):
|
||||
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
|
||||
s = Series(rng)
|
||||
assert isinstance(s[1], Timedelta)
|
||||
assert isinstance(s.iat[2], Timedelta)
|
||||
@@ -0,0 +1,80 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import timedelta_range, to_timedelta
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day, Second
|
||||
|
||||
|
||||
class TestTimedeltas:
|
||||
def test_timedelta_range(self):
|
||||
|
||||
expected = to_timedelta(np.arange(5), unit="D")
|
||||
result = timedelta_range("0 days", periods=5, freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = to_timedelta(np.arange(11), unit="D")
|
||||
result = timedelta_range("0 days", "10 days", freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day()
|
||||
result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2)
|
||||
result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = to_timedelta(np.arange(50), unit="T") * 30
|
||||
result = timedelta_range("0 days", freq="30T", periods=50)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 11776
|
||||
arr = np.arange(10).reshape(2, 5)
|
||||
df = pd.DataFrame(np.arange(10).reshape(2, 5))
|
||||
for arg in (arr, df):
|
||||
with pytest.raises(TypeError, match="1-d array"):
|
||||
to_timedelta(arg)
|
||||
for errors in ["ignore", "raise", "coerce"]:
|
||||
with pytest.raises(TypeError, match="1-d array"):
|
||||
to_timedelta(arg, errors=errors)
|
||||
|
||||
# issue10583
|
||||
df = pd.DataFrame(np.random.normal(size=(10, 4)))
|
||||
df.index = pd.timedelta_range(start="0s", periods=10, freq="s")
|
||||
expected = df.loc[pd.Timedelta("0s") :, :]
|
||||
result = df.loc["0s":, :]
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")]
|
||||
)
|
||||
def test_linspace_behavior(self, periods, freq):
|
||||
# GH 20976
|
||||
result = timedelta_range(start="0 days", end="4 days", periods=periods)
|
||||
expected = timedelta_range(start="0 days", end="4 days", freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, "
|
||||
"exactly three must be specified"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range(start="0 days")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range(end="5 days")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range(periods=2)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
timedelta_range(start="0 days", end="5 days", periods=10, freq="H")
|
||||
@@ -0,0 +1,221 @@
|
||||
from datetime import time, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, TimedeltaIndex, isna, to_timedelta
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestTimedeltas:
|
||||
def test_to_timedelta(self):
|
||||
def conv(v):
|
||||
return v.astype("m8[ns]")
|
||||
|
||||
d1 = np.timedelta64(1, "D")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert to_timedelta("1 days 06:05:01.00003", box=False) == conv(
|
||||
d1
|
||||
+ np.timedelta64(6 * 3600 + 5 * 60 + 1, "s")
|
||||
+ np.timedelta64(30, "us")
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert to_timedelta("15.5us", box=False) == conv(
|
||||
np.timedelta64(15500, "ns")
|
||||
)
|
||||
|
||||
# empty string
|
||||
result = to_timedelta("", box=False)
|
||||
assert result.astype("int64") == iNaT
|
||||
|
||||
result = to_timedelta(["", ""])
|
||||
assert isna(result).all()
|
||||
|
||||
# pass thru
|
||||
result = to_timedelta(np.array([np.timedelta64(1, "s")]))
|
||||
expected = pd.Index(np.array([np.timedelta64(1, "s")]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# ints
|
||||
result = np.timedelta64(0, "ns")
|
||||
expected = to_timedelta(0, box=False)
|
||||
assert result == expected
|
||||
|
||||
# Series
|
||||
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
|
||||
result = to_timedelta(Series(["1d", "1days 00:00:01"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# with units
|
||||
result = TimedeltaIndex(
|
||||
[np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")]
|
||||
)
|
||||
expected = to_timedelta([0, 10], unit="s")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# single element conversion
|
||||
v = timedelta(seconds=1)
|
||||
result = to_timedelta(v, box=False)
|
||||
expected = np.timedelta64(timedelta(seconds=1))
|
||||
assert result == expected
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
v = np.timedelta64(timedelta(seconds=1))
|
||||
result = to_timedelta(v, box=False)
|
||||
expected = np.timedelta64(timedelta(seconds=1))
|
||||
assert result == expected
|
||||
|
||||
# arrays of various dtypes
|
||||
arr = np.array([1] * 5, dtype="int64")
|
||||
result = to_timedelta(arr, unit="s")
|
||||
expected = TimedeltaIndex([np.timedelta64(1, "s")] * 5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr = np.array([1] * 5, dtype="int64")
|
||||
result = to_timedelta(arr, unit="m")
|
||||
expected = TimedeltaIndex([np.timedelta64(1, "m")] * 5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr = np.array([1] * 5, dtype="int64")
|
||||
result = to_timedelta(arr, unit="h")
|
||||
expected = TimedeltaIndex([np.timedelta64(1, "h")] * 5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr = np.array([1] * 5, dtype="timedelta64[s]")
|
||||
result = to_timedelta(arr)
|
||||
expected = TimedeltaIndex([np.timedelta64(1, "s")] * 5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr = np.array([1] * 5, dtype="timedelta64[D]")
|
||||
result = to_timedelta(arr)
|
||||
expected = TimedeltaIndex([np.timedelta64(1, "D")] * 5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Test with lists as input when box=false
|
||||
expected = np.array(np.arange(3) * 1000000000, dtype="timedelta64[ns]")
|
||||
result = to_timedelta(range(3), unit="s", box=False)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = to_timedelta(np.arange(3), unit="s", box=False)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = to_timedelta([0, 1, 2], unit="s", box=False)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Tests with fractional seconds as input:
|
||||
expected = np.array(
|
||||
[0, 500000000, 800000000, 1200000000], dtype="timedelta64[ns]"
|
||||
)
|
||||
result = to_timedelta([0.0, 0.5, 0.8, 1.2], unit="s", box=False)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
def test_to_timedelta_invalid(self):
|
||||
|
||||
# bad value for errors parameter
|
||||
msg = "errors must be one of"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(["foo"], errors="never")
|
||||
|
||||
# these will error
|
||||
msg = "invalid unit abbreviation: foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta([1, 2], unit="foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(1, unit="foo")
|
||||
|
||||
# time not supported ATM
|
||||
msg = (
|
||||
"Value must be Timedelta, string, integer, float, timedelta or"
|
||||
" convertible"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(time(second=1))
|
||||
assert to_timedelta(time(second=1), errors="coerce") is pd.NaT
|
||||
|
||||
msg = "unit abbreviation w/o a number"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(["foo", "bar"])
|
||||
tm.assert_index_equal(
|
||||
TimedeltaIndex([pd.NaT, pd.NaT]),
|
||||
to_timedelta(["foo", "bar"], errors="coerce"),
|
||||
)
|
||||
|
||||
tm.assert_index_equal(
|
||||
TimedeltaIndex(["1 day", pd.NaT, "1 min"]),
|
||||
to_timedelta(["1 day", "bar", "1 min"], errors="coerce"),
|
||||
)
|
||||
|
||||
# gh-13613: these should not error because errors='ignore'
|
||||
invalid_data = "apple"
|
||||
assert invalid_data == to_timedelta(invalid_data, errors="ignore")
|
||||
|
||||
invalid_data = ["apple", "1 days"]
|
||||
tm.assert_numpy_array_equal(
|
||||
np.array(invalid_data, dtype=object),
|
||||
to_timedelta(invalid_data, errors="ignore"),
|
||||
)
|
||||
|
||||
invalid_data = pd.Index(["apple", "1 days"])
|
||||
tm.assert_index_equal(invalid_data, to_timedelta(invalid_data, errors="ignore"))
|
||||
|
||||
invalid_data = Series(["apple", "1 days"])
|
||||
tm.assert_series_equal(
|
||||
invalid_data, to_timedelta(invalid_data, errors="ignore")
|
||||
)
|
||||
|
||||
def test_to_timedelta_via_apply(self):
|
||||
# GH 5458
|
||||
expected = Series([np.timedelta64(1, "s")])
|
||||
result = Series(["00:00:01"]).apply(to_timedelta)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series([to_timedelta("00:00:01")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_on_missing_values(self):
|
||||
# GH5438
|
||||
timedelta_NaT = np.timedelta64("NaT")
|
||||
|
||||
actual = pd.to_timedelta(Series(["00:00:01", np.nan]))
|
||||
expected = Series(
|
||||
[np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
|
||||
)
|
||||
assert_series_equal(actual, expected)
|
||||
|
||||
actual = pd.to_timedelta(Series(["00:00:01", pd.NaT]))
|
||||
assert_series_equal(actual, expected)
|
||||
|
||||
actual = pd.to_timedelta(np.nan)
|
||||
assert actual.value == timedelta_NaT.astype("int64")
|
||||
|
||||
actual = pd.to_timedelta(pd.NaT)
|
||||
assert actual.value == timedelta_NaT.astype("int64")
|
||||
|
||||
def test_to_timedelta_float(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/25077
|
||||
arr = np.arange(0, 1, 1e-6)[-10:]
|
||||
result = pd.to_timedelta(arr, unit="s")
|
||||
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.asi8, expected_asi8)
|
||||
|
||||
def test_to_timedelta_box_deprecated(self):
|
||||
result = np.timedelta64(0, "ns")
|
||||
|
||||
# Deprecated - see GH24416
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
to_timedelta(0, box=False)
|
||||
|
||||
expected = to_timedelta(0).to_timedelta64()
|
||||
assert result == expected
|
||||
Reference in New Issue
Block a user