8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,121 @@
import numpy as np
import pytest
import pandas as pd
from pandas import PeriodIndex, period_range
import pandas.util.testing as tm
class TestPeriodIndexArithmetic:
# ---------------------------------------------------------------
# PeriodIndex.shift is used by __add__ and __sub__
def test_pi_shift_ndarray(self):
idx = PeriodIndex(
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
)
result = idx.shift(np.array([1, 2, 3, 4]))
expected = PeriodIndex(
["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx"
)
tm.assert_index_equal(result, expected)
result = idx.shift(np.array([1, -2, 3, -4]))
expected = PeriodIndex(
["2011-02", "2010-12", "NaT", "2010-12"], freq="M", name="idx"
)
tm.assert_index_equal(result, expected)
def test_shift(self):
pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="A", start="1/1/2002", end="12/1/2010")
tm.assert_index_equal(pi1.shift(0), pi1)
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="A", start="1/1/2000", end="12/1/2008")
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="M", start="2/1/2001", end="1/1/2010")
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="M", start="12/1/2000", end="11/1/2009")
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="D", start="1/2/2001", end="12/2/2009")
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009")
pi2 = period_range(freq="D", start="12/31/2000", end="11/30/2009")
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
def test_shift_corner_cases(self):
# GH#9903
idx = pd.PeriodIndex([], name="xxx", freq="H")
with pytest.raises(TypeError):
# period shift doesn't accept freq
idx.shift(1, freq="H")
tm.assert_index_equal(idx.shift(0), idx)
tm.assert_index_equal(idx.shift(3), idx)
idx = pd.PeriodIndex(
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
name="xxx",
freq="H",
)
tm.assert_index_equal(idx.shift(0), idx)
exp = pd.PeriodIndex(
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
name="xxx",
freq="H",
)
tm.assert_index_equal(idx.shift(3), exp)
exp = pd.PeriodIndex(
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
name="xxx",
freq="H",
)
tm.assert_index_equal(idx.shift(-3), exp)
def test_shift_nat(self):
idx = PeriodIndex(
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
)
result = idx.shift(1)
expected = PeriodIndex(
["2011-02", "2011-03", "NaT", "2011-05"], freq="M", name="idx"
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_shift_gh8083(self):
# test shift for PeriodIndex
# GH#8083
drange = pd.period_range("20130101", periods=5, freq="D")
result = drange.shift(1)
expected = PeriodIndex(
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
freq="D",
)
tm.assert_index_equal(result, expected)
def test_shift_periods(self):
# GH #22458 : argument 'n' was deprecated in favor of 'periods'
idx = period_range(freq="A", start="1/1/2001", end="12/1/2009")
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
tm.assert_index_equal(idx.shift(n=0), idx)

View File

@@ -0,0 +1,149 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, PeriodIndex, Series, period_range
from pandas.util import testing as tm
class TestPeriodIndex:
def test_asfreq(self):
pi1 = period_range(freq="A", start="1/1/2001", end="1/1/2001")
pi2 = period_range(freq="Q", start="1/1/2001", end="1/1/2001")
pi3 = period_range(freq="M", start="1/1/2001", end="1/1/2001")
pi4 = period_range(freq="D", start="1/1/2001", end="1/1/2001")
pi5 = period_range(freq="H", start="1/1/2001", end="1/1/2001 00:00")
pi6 = period_range(freq="Min", start="1/1/2001", end="1/1/2001 00:00")
pi7 = period_range(freq="S", start="1/1/2001", end="1/1/2001 00:00:00")
assert pi1.asfreq("Q", "S") == pi2
assert pi1.asfreq("Q", "s") == pi2
assert pi1.asfreq("M", "start") == pi3
assert pi1.asfreq("D", "StarT") == pi4
assert pi1.asfreq("H", "beGIN") == pi5
assert pi1.asfreq("Min", "S") == pi6
assert pi1.asfreq("S", "S") == pi7
assert pi2.asfreq("A", "S") == pi1
assert pi2.asfreq("M", "S") == pi3
assert pi2.asfreq("D", "S") == pi4
assert pi2.asfreq("H", "S") == pi5
assert pi2.asfreq("Min", "S") == pi6
assert pi2.asfreq("S", "S") == pi7
assert pi3.asfreq("A", "S") == pi1
assert pi3.asfreq("Q", "S") == pi2
assert pi3.asfreq("D", "S") == pi4
assert pi3.asfreq("H", "S") == pi5
assert pi3.asfreq("Min", "S") == pi6
assert pi3.asfreq("S", "S") == pi7
assert pi4.asfreq("A", "S") == pi1
assert pi4.asfreq("Q", "S") == pi2
assert pi4.asfreq("M", "S") == pi3
assert pi4.asfreq("H", "S") == pi5
assert pi4.asfreq("Min", "S") == pi6
assert pi4.asfreq("S", "S") == pi7
assert pi5.asfreq("A", "S") == pi1
assert pi5.asfreq("Q", "S") == pi2
assert pi5.asfreq("M", "S") == pi3
assert pi5.asfreq("D", "S") == pi4
assert pi5.asfreq("Min", "S") == pi6
assert pi5.asfreq("S", "S") == pi7
assert pi6.asfreq("A", "S") == pi1
assert pi6.asfreq("Q", "S") == pi2
assert pi6.asfreq("M", "S") == pi3
assert pi6.asfreq("D", "S") == pi4
assert pi6.asfreq("H", "S") == pi5
assert pi6.asfreq("S", "S") == pi7
assert pi7.asfreq("A", "S") == pi1
assert pi7.asfreq("Q", "S") == pi2
assert pi7.asfreq("M", "S") == pi3
assert pi7.asfreq("D", "S") == pi4
assert pi7.asfreq("H", "S") == pi5
assert pi7.asfreq("Min", "S") == pi6
msg = "How must be one of S or E"
with pytest.raises(ValueError, match=msg):
pi7.asfreq("T", "foo")
result1 = pi1.asfreq("3M")
result2 = pi1.asfreq("M")
expected = period_range(freq="M", start="2001-12", end="2001-12")
tm.assert_numpy_array_equal(result1.asi8, expected.asi8)
assert result1.freqstr == "3M"
tm.assert_numpy_array_equal(result2.asi8, expected.asi8)
assert result2.freqstr == "M"
def test_asfreq_nat(self):
idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-04"], freq="M")
result = idx.asfreq(freq="Q")
expected = PeriodIndex(["2011Q1", "2011Q1", "NaT", "2011Q2"], freq="Q")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("freq", ["D", "3D"])
def test_asfreq_mult_pi(self, freq):
pi = PeriodIndex(["2001-01", "2001-02", "NaT", "2001-03"], freq="2M")
result = pi.asfreq(freq)
exp = PeriodIndex(["2001-02-28", "2001-03-31", "NaT", "2001-04-30"], freq=freq)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
result = pi.asfreq(freq, how="S")
exp = PeriodIndex(["2001-01-01", "2001-02-01", "NaT", "2001-03-01"], freq=freq)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
def test_asfreq_combined_pi(self):
pi = pd.PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H")
exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="25H")
for freq, how in zip(["1D1H", "1H1D"], ["S", "E"]):
result = pi.asfreq(freq, how=how)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
for freq in ["1D1H", "1H1D"]:
pi = pd.PeriodIndex(
["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq
)
result = pi.asfreq("H")
exp = PeriodIndex(["2001-01-02 00:00", "2001-01-03 02:00", "NaT"], freq="H")
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
pi = pd.PeriodIndex(
["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq
)
result = pi.asfreq("H", how="S")
exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H")
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
def test_asfreq_ts(self):
index = period_range(freq="A", start="1/1/2001", end="12/31/2010")
ts = Series(np.random.randn(len(index)), index=index)
df = DataFrame(np.random.randn(len(index), 3), index=index)
result = ts.asfreq("D", how="end")
df_result = df.asfreq("D", how="end")
exp_index = index.asfreq("D", how="end")
assert len(result) == len(ts)
tm.assert_index_equal(result.index, exp_index)
tm.assert_index_equal(df_result.index, exp_index)
result = ts.asfreq("D", how="start")
assert len(result) == len(ts)
tm.assert_index_equal(result.index, index.asfreq("D", how="start"))
def test_astype_asfreq(self):
pi1 = PeriodIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="D")
exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M")
tm.assert_index_equal(pi1.asfreq("M"), exp)
tm.assert_index_equal(pi1.astype("period[M]"), exp)
exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="3M")
tm.assert_index_equal(pi1.asfreq("3M"), exp)
tm.assert_index_equal(pi1.astype("period[3M]"), exp)

View File

@@ -0,0 +1,128 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, Int64Index, NaT, Period, PeriodIndex, period_range
import pandas.util.testing as tm
class TestPeriodIndexAsType:
@pytest.mark.parametrize("dtype", [float, "timedelta64", "timedelta64[ns]"])
def test_astype_raises(self, dtype):
# GH#13149, GH#13209
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D")
msg = "Cannot cast PeriodArray to dtype"
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_astype_conversion(self):
# GH#13149, GH#13209
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D")
result = idx.astype(object)
expected = Index(
[Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3,
dtype="object",
)
tm.assert_index_equal(result, expected)
result = idx.astype(np.int64)
expected = Int64Index([16937] + [-9223372036854775808] * 3, dtype=np.int64)
tm.assert_index_equal(result, expected)
result = idx.astype(str)
expected = Index(str(x) for x in idx)
tm.assert_index_equal(result, expected)
idx = period_range("1990", "2009", freq="A")
result = idx.astype("i8")
tm.assert_index_equal(result, Index(idx.asi8))
tm.assert_numpy_array_equal(result.values, idx.asi8)
def test_astype_uint(self):
arr = period_range("2000", periods=2)
expected = pd.UInt64Index(np.array([10957, 10958], dtype="uint64"))
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
def test_astype_object(self):
idx = pd.PeriodIndex([], freq="M")
exp = np.array([], dtype=object)
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object)
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
# TODO: de-duplicate this version (from test_ops) with the one above
# (from test_period)
def test_astype_object2(self):
idx = pd.period_range(start="2013-01-01", periods=4, freq="M", name="idx")
expected_list = [
pd.Period("2013-01-31", freq="M"),
pd.Period("2013-02-28", freq="M"),
pd.Period("2013-03-31", freq="M"),
pd.Period("2013-04-30", freq="M"),
]
expected = pd.Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
assert isinstance(result, Index)
assert result.dtype == object
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert idx.tolist() == expected_list
idx = PeriodIndex(
["2013-01-01", "2013-01-02", "NaT", "2013-01-04"], freq="D", name="idx"
)
expected_list = [
pd.Period("2013-01-01", freq="D"),
pd.Period("2013-01-02", freq="D"),
pd.Period("NaT", freq="D"),
pd.Period("2013-01-04", freq="D"),
]
expected = pd.Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
assert isinstance(result, Index)
assert result.dtype == object
tm.assert_index_equal(result, expected)
for i in [0, 1, 3]:
assert result[i] == expected[i]
assert result[2] is pd.NaT
assert result.name == expected.name
result_list = idx.tolist()
for i in [0, 1, 3]:
assert result_list[i] == expected_list[i]
assert result_list[2] is pd.NaT
def test_astype_category(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype("category")
expected = pd.CategoricalIndex(
[pd.Period("2000-01-01", freq="D"), pd.Period("2000-01-02", freq="D")]
)
tm.assert_index_equal(result, expected)
result = obj._data.astype("category")
expected = expected.values
tm.assert_categorical_equal(result, expected)
def test_astype_array_fallback(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,562 @@
import numpy as np
import pytest
from pandas._libs.tslibs.period import IncompatibleFrequency
from pandas.core.dtypes.dtypes import PeriodDtype
import pandas as pd
from pandas import Index, Period, PeriodIndex, Series, date_range, offsets, period_range
import pandas.core.indexes.period as period
import pandas.util.testing as tm
class TestPeriodIndex:
def setup_method(self, method):
pass
def test_construction_base_constructor(self):
# GH 13664
arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="M")]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr)))
arr = [np.nan, pd.NaT, pd.Period("2011-03", freq="M")]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr)))
arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="D")]
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))
tm.assert_index_equal(
pd.Index(np.array(arr)), pd.Index(np.array(arr), dtype=object)
)
def test_constructor_use_start_freq(self):
# GH #1118
p = Period("4/2/2012", freq="B")
with tm.assert_produces_warning(FutureWarning):
index = PeriodIndex(start=p, periods=10)
expected = period_range(start="4/2/2012", periods=10, freq="B")
tm.assert_index_equal(index, expected)
index = period_range(start=p, periods=10)
tm.assert_index_equal(index, expected)
def test_constructor_field_arrays(self):
# GH #1264
years = np.arange(1990, 2010).repeat(4)[2:-2]
quarters = np.tile(np.arange(1, 5), 20)[2:-2]
index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC")
expected = period_range("1990Q3", "2009Q2", freq="Q-DEC")
tm.assert_index_equal(index, expected)
index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC")
tm.assert_numpy_array_equal(index.asi8, index2.asi8)
index = PeriodIndex(year=years, quarter=quarters)
tm.assert_index_equal(index, expected)
years = [2007, 2007, 2007]
months = [1, 2]
msg = "Mismatched Period array lengths"
with pytest.raises(ValueError, match=msg):
PeriodIndex(year=years, month=months, freq="M")
with pytest.raises(ValueError, match=msg):
PeriodIndex(year=years, month=months, freq="2M")
msg = "Can either instantiate from fields or endpoints, but not both"
with pytest.raises(ValueError, match=msg):
PeriodIndex(
year=years, month=months, freq="M", start=Period("2007-01", freq="M")
)
years = [2007, 2007, 2007]
months = [1, 2, 3]
idx = PeriodIndex(year=years, month=months, freq="M")
exp = period_range("2007-01", periods=3, freq="M")
tm.assert_index_equal(idx, exp)
def test_constructor_U(self):
# U was used as undefined period
with pytest.raises(ValueError, match="Invalid frequency: X"):
period_range("2007-1-1", periods=500, freq="X")
def test_constructor_nano(self):
idx = period_range(
start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N"
)
exp = PeriodIndex(
[
Period(ordinal=1, freq="N"),
Period(ordinal=2, freq="N"),
Period(ordinal=3, freq="N"),
Period(ordinal=4, freq="N"),
],
freq="N",
)
tm.assert_index_equal(idx, exp)
def test_constructor_arrays_negative_year(self):
years = np.arange(1960, 2000, dtype=np.int64).repeat(4)
quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40)
pindex = PeriodIndex(year=years, quarter=quarters)
tm.assert_index_equal(pindex.year, pd.Index(years))
tm.assert_index_equal(pindex.quarter, pd.Index(quarters))
def test_constructor_invalid_quarters(self):
msg = "Quarter must be 1 <= q <= 4"
with pytest.raises(ValueError, match=msg):
PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC")
def test_constructor_corner(self):
msg = "Not enough parameters to construct Period range"
with pytest.raises(ValueError, match=msg):
PeriodIndex(periods=10, freq="A")
start = Period("2007", freq="A-JUN")
end = Period("2010", freq="A-DEC")
msg = "start and end must have same freq"
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start, end=end)
msg = (
"Of the three parameters: start, end, and periods, exactly two"
" must be specified"
)
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start)
with pytest.raises(ValueError, match=msg):
PeriodIndex(end=end)
result = period_range("2007-01", periods=10.5, freq="M")
exp = period_range("2007-01", periods=10, freq="M")
tm.assert_index_equal(result, exp)
def test_constructor_fromarraylike(self):
idx = period_range("2007-01", periods=20, freq="M")
# values is an array of Period, thus can retrieve freq
tm.assert_index_equal(PeriodIndex(idx.values), idx)
tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
msg = "freq not specified and cannot be inferred"
with pytest.raises(ValueError, match=msg):
PeriodIndex(idx._ndarray_values)
with pytest.raises(ValueError, match=msg):
PeriodIndex(list(idx._ndarray_values))
msg = "'Period' object is not iterable"
with pytest.raises(TypeError, match=msg):
PeriodIndex(data=Period("2007", freq="A"))
result = PeriodIndex(iter(idx))
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx)
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx, freq="M")
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx, freq=offsets.MonthEnd())
tm.assert_index_equal(result, idx)
assert result.freq == "M"
result = PeriodIndex(idx, freq="2M")
tm.assert_index_equal(result, idx.asfreq("2M"))
assert result.freq == "2M"
result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
tm.assert_index_equal(result, idx.asfreq("2M"))
assert result.freq == "2M"
result = PeriodIndex(idx, freq="D")
exp = idx.asfreq("D", "e")
tm.assert_index_equal(result, exp)
def test_constructor_datetime64arr(self):
vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64)
vals = vals.view(np.dtype("M8[us]"))
msg = r"Wrong dtype: datetime64\[us\]"
with pytest.raises(ValueError, match=msg):
PeriodIndex(vals, freq="D")
@pytest.mark.parametrize("box", [None, "series", "index"])
def test_constructor_datetime64arr_ok(self, box):
# https://github.com/pandas-dev/pandas/issues/23438
data = pd.date_range("2017", periods=4, freq="M")
if box is None:
data = data._values
elif box == "series":
data = pd.Series(data)
result = PeriodIndex(data, freq="D")
expected = PeriodIndex(
["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D"
)
tm.assert_index_equal(result, expected)
def test_constructor_dtype(self):
# passing a dtype with a tz should localize
idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]")
exp = PeriodIndex(["2013-01", "2013-03"], freq="M")
tm.assert_index_equal(idx, exp)
assert idx.dtype == "period[M]"
idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]")
exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D")
tm.assert_index_equal(idx, exp)
assert idx.dtype == "period[3D]"
# if we already have a freq and its not the same, then asfreq
# (not changed)
idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D")
res = PeriodIndex(idx, dtype="period[M]")
exp = PeriodIndex(["2013-01", "2013-01"], freq="M")
tm.assert_index_equal(res, exp)
assert res.dtype == "period[M]"
res = PeriodIndex(idx, freq="M")
tm.assert_index_equal(res, exp)
assert res.dtype == "period[M]"
msg = "specified freq and dtype are different"
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(["2011-01"], freq="M", dtype="period[D]")
def test_constructor_empty(self):
idx = pd.PeriodIndex([], freq="M")
assert isinstance(idx, PeriodIndex)
assert len(idx) == 0
assert idx.freq == "M"
with pytest.raises(ValueError, match="freq not specified"):
pd.PeriodIndex([])
def test_constructor_pi_nat(self):
idx = PeriodIndex(
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")]
)
exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(
np.array([Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")])
)
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(
[pd.NaT, pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")]
)
exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M")
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(
np.array(
[
pd.NaT,
pd.NaT,
Period("2011-01", freq="M"),
Period("2011-01", freq="M"),
]
)
)
tm.assert_index_equal(idx, exp)
idx = PeriodIndex([pd.NaT, pd.NaT, "2011-01", "2011-01"], freq="M")
tm.assert_index_equal(idx, exp)
with pytest.raises(ValueError, match="freq not specified"):
PeriodIndex([pd.NaT, pd.NaT])
with pytest.raises(ValueError, match="freq not specified"):
PeriodIndex(np.array([pd.NaT, pd.NaT]))
with pytest.raises(ValueError, match="freq not specified"):
PeriodIndex(["NaT", "NaT"])
with pytest.raises(ValueError, match="freq not specified"):
PeriodIndex(np.array(["NaT", "NaT"]))
def test_constructor_incompat_freq(self):
msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")]
)
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(
np.array(
[Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")]
)
)
# first element is pd.NaT
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(
[pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]
)
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(
np.array(
[pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]
)
)
def test_constructor_mixed(self):
idx = PeriodIndex(["2011-01", pd.NaT, Period("2011-01", freq="M")])
exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(["NaT", pd.NaT, Period("2011-01", freq="M")])
exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M")
tm.assert_index_equal(idx, exp)
idx = PeriodIndex([Period("2011-01-01", freq="D"), pd.NaT, "2012-01-01"])
exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D")
tm.assert_index_equal(idx, exp)
def test_constructor_simple_new(self):
idx = period_range("2007-01", name="p", periods=2, freq="M")
result = idx._simple_new(idx, name="p", freq=idx.freq)
tm.assert_index_equal(result, idx)
result = idx._simple_new(idx.astype("i8"), name="p", freq=idx.freq)
tm.assert_index_equal(result, idx)
def test_constructor_simple_new_empty(self):
# GH13079
idx = PeriodIndex([], freq="M", name="p")
result = idx._simple_new(idx, name="p", freq="M")
tm.assert_index_equal(result, idx)
@pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])])
def test_constructor_floats(self, floats):
msg = r"PeriodIndex\._simple_new does not accept floats"
with pytest.raises(TypeError, match=msg):
pd.PeriodIndex._simple_new(floats, freq="M")
msg = "PeriodIndex does not allow floating point in construction"
with pytest.raises(TypeError, match=msg):
pd.PeriodIndex(floats, freq="M")
def test_constructor_nat(self):
msg = "start and end must not be NaT"
with pytest.raises(ValueError, match=msg):
period_range(start="NaT", end="2011-01-01", freq="M")
with pytest.raises(ValueError, match=msg):
period_range(start="2011-01-01", end="NaT", freq="M")
def test_constructor_year_and_quarter(self):
year = pd.Series([2001, 2002, 2003])
quarter = year - 2000
idx = PeriodIndex(year=year, quarter=quarter)
strs = ["{t[0]:d}Q{t[1]:d}".format(t=t) for t in zip(quarter, year)]
lops = list(map(Period, strs))
p = PeriodIndex(lops)
tm.assert_index_equal(p, idx)
@pytest.mark.parametrize(
"func, warning", [(PeriodIndex, FutureWarning), (period_range, None)]
)
def test_constructor_freq_mult(self, func, warning):
# GH #7811
with tm.assert_produces_warning(warning):
# must be the same, but for sure...
pidx = func(start="2014-01", freq="2M", periods=4)
expected = PeriodIndex(["2014-01", "2014-03", "2014-05", "2014-07"], freq="2M")
tm.assert_index_equal(pidx, expected)
with tm.assert_produces_warning(warning):
pidx = func(start="2014-01-02", end="2014-01-15", freq="3D")
expected = PeriodIndex(
["2014-01-02", "2014-01-05", "2014-01-08", "2014-01-11", "2014-01-14"],
freq="3D",
)
tm.assert_index_equal(pidx, expected)
with tm.assert_produces_warning(warning):
pidx = func(end="2014-01-01 17:00", freq="4H", periods=3)
expected = PeriodIndex(
["2014-01-01 09:00", "2014-01-01 13:00", "2014-01-01 17:00"], freq="4H"
)
tm.assert_index_equal(pidx, expected)
msg = "Frequency must be positive, because it represents span: -1M"
with pytest.raises(ValueError, match=msg):
PeriodIndex(["2011-01"], freq="-1M")
msg = "Frequency must be positive, because it represents span: 0M"
with pytest.raises(ValueError, match=msg):
PeriodIndex(["2011-01"], freq="0M")
msg = "Frequency must be positive, because it represents span: 0M"
with pytest.raises(ValueError, match=msg):
period_range("2011-01", periods=3, freq="0M")
@pytest.mark.parametrize("freq", ["A", "M", "D", "T", "S"])
@pytest.mark.parametrize("mult", [1, 2, 3, 4, 5])
def test_constructor_freq_mult_dti_compat(self, mult, freq):
freqstr = str(mult) + freq
pidx = period_range(start="2014-04-01", freq=freqstr, periods=10)
expected = date_range(start="2014-04-01", freq=freqstr, periods=10).to_period(
freqstr
)
tm.assert_index_equal(pidx, expected)
def test_constructor_freq_combined(self):
for freq in ["1D1H", "1H1D"]:
pidx = PeriodIndex(["2016-01-01", "2016-01-02"], freq=freq)
expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 00:00"], freq="25H")
for freq in ["1D1H", "1H1D"]:
pidx = period_range(start="2016-01-01", periods=2, freq=freq)
expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 01:00"], freq="25H")
tm.assert_index_equal(pidx, expected)
def test_constructor_range_based_deprecated(self):
with tm.assert_produces_warning(FutureWarning):
pi = PeriodIndex(freq="A", start="1/1/2001", end="12/1/2009")
assert len(pi) == 9
def test_constructor_range_based_deprecated_different_freq(self):
with tm.assert_produces_warning(FutureWarning) as m:
PeriodIndex(start="2000", periods=2)
warning, = m
assert 'freq="A-DEC"' in str(warning.message)
def test_constructor(self):
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
assert len(pi) == 9
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
assert len(pi) == 4 * 9
pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
assert len(pi) == 12 * 9
pi = period_range(freq="D", start="1/1/2001", end="12/31/2009")
assert len(pi) == 365 * 9 + 2
pi = period_range(freq="B", start="1/1/2001", end="12/31/2009")
assert len(pi) == 261 * 9
pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00")
assert len(pi) == 365 * 24
pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59")
assert len(pi) == 24 * 60
pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59")
assert len(pi) == 24 * 60 * 60
start = Period("02-Apr-2005", "B")
i1 = period_range(start=start, periods=20)
assert len(i1) == 20
assert i1.freq == start.freq
assert i1[0] == start
end_intv = Period("2006-12-31", "W")
i1 = period_range(end=end_intv, periods=10)
assert len(i1) == 10
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv
end_intv = Period("2006-12-31", "1w")
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period("2006-12-31", ("w", 1))
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period("2005-05-01", "B")
i1 = period_range(start=start, end=end_intv)
# infer freq from first element
i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
assert len(i2) == 2
assert i2[0] == end_intv
i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
assert len(i2) == 2
assert i2[0] == end_intv
# Mixed freq should fail
vals = [end_intv, Period("2006-12-31", "w")]
msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
with pytest.raises(IncompatibleFrequency, match=msg):
PeriodIndex(vals)
vals = np.array(vals)
with pytest.raises(IncompatibleFrequency, match=msg):
PeriodIndex(vals)
def test_constructor_error(self):
start = Period("02-Apr-2005", "B")
end_intv = Period("2006-12-31", ("w", 1))
msg = "start and end must have same freq"
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start, end=end_intv)
msg = (
"Of the three parameters: start, end, and periods, "
"exactly two must be specified"
)
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start)
@pytest.mark.parametrize(
"freq", ["M", "Q", "A", "D", "B", "T", "S", "L", "U", "N", "H"]
)
def test_recreate_from_data(self, freq):
org = period_range(start="2001/04/01", freq=freq, periods=1)
idx = PeriodIndex(org.values, freq=freq)
tm.assert_index_equal(idx, org)
def test_map_with_string_constructor(self):
raw = [2005, 2007, 2009]
index = PeriodIndex(raw, freq="A")
expected = Index([str(num) for num in raw])
res = index.map(str)
# should return an Index
assert isinstance(res, Index)
# preserve element types
assert all(isinstance(resi, str) for resi in res)
# lastly, values should compare equal
tm.assert_index_equal(res, expected)
class TestSeriesPeriod:
def setup_method(self, method):
self.series = Series(period_range("2000-01-01", periods=10, freq="D"))
def test_constructor_cant_cast_period(self):
msg = "Cannot cast PeriodArray to dtype float64"
with pytest.raises(TypeError, match=msg):
Series(period_range("2000-01-01", periods=10, freq="D"), dtype=float)
def test_constructor_cast_object(self):
s = Series(period_range("1/1/2000", periods=10), dtype=PeriodDtype("D"))
exp = Series(period_range("1/1/2000", periods=10))
tm.assert_series_equal(s, exp)

View File

@@ -0,0 +1,211 @@
import numpy as np
import pytest
import pandas as pd
from pandas import PeriodIndex
import pandas.util.testing as tm
def test_to_native_types():
index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D")
# First, with no arguments.
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype="=U10")
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index.to_native_types(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
# Make sure slicing works
expected = np.array(["2017-01-01", "2017-01-03"], dtype="=U10")
result = index.to_native_types([0, 2])
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype="=U10")
result = index.to_native_types(date_format="%m-%Y-%d")
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = PeriodIndex(["2017-01-01", pd.NaT, "2017-01-03"], freq="D")
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
result = index.to_native_types(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
class TestPeriodIndexRendering:
def test_frame_repr(self):
df = pd.DataFrame({"A": [1, 2, 3]}, index=pd.date_range("2000", periods=3))
result = repr(df)
expected = " A\n2000-01-01 1\n2000-01-02 2\n2000-01-03 3"
assert result == expected
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
def test_representation(self, method):
# GH#7601
idx1 = PeriodIndex([], freq="D")
idx2 = PeriodIndex(["2011-01-01"], freq="D")
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
idx10 = PeriodIndex(["2011-01-01", "2011-02-01"], freq="3D")
exp1 = "PeriodIndex([], dtype='period[D]', freq='D')"
exp2 = "PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')"
exp3 = "PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', freq='D')"
exp4 = (
"PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='period[D]', freq='D')"
)
exp5 = (
"PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', "
"freq='A-DEC')"
)
exp6 = (
"PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], "
"dtype='period[H]', freq='H')"
)
exp7 = "PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', freq='Q-DEC')"
exp8 = "PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', freq='Q-DEC')"
exp9 = (
"PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], "
"dtype='period[Q-DEC]', freq='Q-DEC')"
)
exp10 = (
"PeriodIndex(['2011-01-01', '2011-02-01'], "
"dtype='period[3D]', freq='3D')"
)
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10],
):
result = getattr(idx, method)()
assert result == expected
def test_representation_to_series(self):
# GH#10971
idx1 = PeriodIndex([], freq="D")
idx2 = PeriodIndex(["2011-01-01"], freq="D")
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
exp1 = """Series([], dtype: period[D])"""
exp2 = """0 2011-01-01
dtype: period[D]"""
exp3 = """0 2011-01-01
1 2011-01-02
dtype: period[D]"""
exp4 = """0 2011-01-01
1 2011-01-02
2 2011-01-03
dtype: period[D]"""
exp5 = """0 2011
1 2012
2 2013
dtype: period[A-DEC]"""
exp6 = """0 2011-01-01 09:00
1 2012-02-01 10:00
2 NaT
dtype: period[H]"""
exp7 = """0 2013Q1
dtype: period[Q-DEC]"""
exp8 = """0 2013Q1
1 2013Q2
dtype: period[Q-DEC]"""
exp9 = """0 2013Q1
1 2013Q2
2 2013Q3
dtype: period[Q-DEC]"""
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
):
result = repr(pd.Series(idx))
assert result == expected
def test_summary(self):
# GH#9116
idx1 = PeriodIndex([], freq="D")
idx2 = PeriodIndex(["2011-01-01"], freq="D")
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A")
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H")
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
exp1 = """PeriodIndex: 0 entries
Freq: D"""
exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01
Freq: D"""
exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02
Freq: D"""
exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03
Freq: D"""
exp5 = """PeriodIndex: 3 entries, 2011 to 2013
Freq: A-DEC"""
exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT
Freq: H"""
exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1
Freq: Q-DEC"""
exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2
Freq: Q-DEC"""
exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3
Freq: Q-DEC"""
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
):
result = idx._summary()
assert result == expected

View File

@@ -0,0 +1,687 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs.tslibs import period as libperiod
import pandas as pd
from pandas import DatetimeIndex, Period, PeriodIndex, Series, notna, period_range
from pandas.util import testing as tm
class TestGetItem:
def test_ellipsis(self):
# GH#21282
idx = period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
for idx in [idx1]:
result = idx[0]
assert result == pd.Period("2011-01-01", freq="D")
result = idx[-1]
assert result == pd.Period("2011-01-31", freq="D")
result = idx[0:5]
expected = pd.period_range("2011-01-01", "2011-01-05", freq="D", name="idx")
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx[0:10:2]
expected = pd.PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-07", "2011-01-09"],
freq="D",
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx[-20:-5:3]
expected = pd.PeriodIndex(
["2011-01-12", "2011-01-15", "2011-01-18", "2011-01-21", "2011-01-24"],
freq="D",
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx[4::-1]
expected = PeriodIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
freq="D",
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
def test_getitem_index(self):
idx = period_range("2007-01", periods=10, freq="M", name="x")
result = idx[[1, 3, 5]]
exp = pd.PeriodIndex(["2007-02", "2007-04", "2007-06"], freq="M", name="x")
tm.assert_index_equal(result, exp)
result = idx[[True, True, False, False, False, True, True, False, False, False]]
exp = pd.PeriodIndex(
["2007-01", "2007-02", "2007-06", "2007-07"], freq="M", name="x"
)
tm.assert_index_equal(result, exp)
def test_getitem_partial(self):
rng = period_range("2007-01", periods=50, freq="M")
ts = Series(np.random.randn(len(rng)), rng)
with pytest.raises(KeyError, match=r"^'2006'$"):
ts["2006"]
result = ts["2008"]
assert (result.index.year == 2008).all()
result = ts["2008":"2009"]
assert len(result) == 24
result = ts["2008-1":"2009-12"]
assert len(result) == 24
result = ts["2008Q1":"2009Q4"]
assert len(result) == 24
result = ts[:"2009"]
assert len(result) == 36
result = ts["2009":]
assert len(result) == 50 - 24
exp = result
result = ts[24:]
tm.assert_series_equal(exp, result)
ts = ts[10:].append(ts[10:])
msg = "left slice bound for non-unique label: '2008'"
with pytest.raises(KeyError, match=msg):
ts[slice("2008", "2009")]
def test_getitem_datetime(self):
rng = period_range(start="2012-01-01", periods=10, freq="W-MON")
ts = Series(range(len(rng)), index=rng)
dt1 = datetime(2011, 10, 2)
dt4 = datetime(2012, 4, 20)
rs = ts[dt1:dt4]
tm.assert_series_equal(rs, ts)
def test_getitem_nat(self):
idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M")
assert idx[0] == pd.Period("2011-01", freq="M")
assert idx[1] is pd.NaT
s = pd.Series([0, 1, 2], index=idx)
assert s[pd.NaT] == 1
s = pd.Series(idx, index=idx)
assert s[pd.Period("2011-01", freq="M")] == pd.Period("2011-01", freq="M")
assert s[pd.NaT] is pd.NaT
def test_getitem_list_periods(self):
# GH 7710
rng = period_range(start="2012-01-01", periods=10, freq="D")
ts = Series(range(len(rng)), index=rng)
exp = ts.iloc[[1]]
tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp)
def test_getitem_seconds(self):
# GH#6716
didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = [
"2014",
"2013/02",
"2013/01/02",
"2013/02/01 9H",
"2013/02/01 09:00",
]
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s["2013/01/01 10:00"], s[3600:3660])
tm.assert_series_equal(s["2013/01/01 9H"], s[:3600])
for d in ["2013/01/01", "2013/01", "2013"]:
tm.assert_series_equal(s[d], s)
def test_getitem_day(self):
# GH#6716
# Confirm DatetimeIndex and PeriodIndex works identically
didx = pd.date_range(start="2013/01/01", freq="D", periods=400)
pidx = period_range(start="2013/01/01", freq="D", periods=400)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = [
"2014",
"2013/02",
"2013/01/02",
"2013/02/01 9H",
"2013/02/01 09:00",
]
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s["2013/01"], s[0:31])
tm.assert_series_equal(s["2013/02"], s[31:59])
tm.assert_series_equal(s["2014"], s[365:])
invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
for v in invalid:
with pytest.raises(KeyError, match=v):
s[v]
class TestWhere:
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
def test_where(self, klass):
i = period_range("20130101", periods=5, freq="D")
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * (len(i) - 1)
expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq="D")
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_where_other(self):
i = period_range("20130101", periods=5, freq="D")
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D")
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D")
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
class TestTake:
def test_take(self):
# GH#10295
idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
for idx in [idx1]:
result = idx.take([0])
assert result == pd.Period("2011-01-01", freq="D")
result = idx.take([5])
assert result == pd.Period("2011-01-06", freq="D")
result = idx.take([0, 1, 2])
expected = pd.period_range("2011-01-01", "2011-01-03", freq="D", name="idx")
tm.assert_index_equal(result, expected)
assert result.freq == "D"
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05"], freq="D", name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx.take([7, 4, 1])
expected = pd.PeriodIndex(
["2011-01-08", "2011-01-05", "2011-01-02"], freq="D", name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx.take([3, 2, 5])
expected = PeriodIndex(
["2011-01-04", "2011-01-03", "2011-01-06"], freq="D", name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
result = idx.take([-3, 2, 5])
expected = PeriodIndex(
["2011-01-29", "2011-01-03", "2011-01-06"], freq="D", name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == "D"
def test_take_misc(self):
index = period_range(start="1/1/10", end="12/31/12", freq="D", name="idx")
expected = PeriodIndex(
[
datetime(2010, 1, 6),
datetime(2010, 1, 7),
datetime(2010, 1, 9),
datetime(2010, 1, 13),
],
freq="D",
name="idx",
)
taken1 = index.take([5, 6, 8, 12])
taken2 = index[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, PeriodIndex)
assert taken.freq == index.freq
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.PeriodIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", freq="D"
)
result = idx.take(np.array([1, 0, -1]))
expected = pd.PeriodIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.PeriodIndex(
["2011-02-01", "2011-01-01", "NaT"], name="xxx", freq="D"
)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.PeriodIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
class TestIndexing:
def test_get_loc_msg(self):
idx = period_range("2000-1-1", freq="A", periods=10)
bad_period = Period("2012", "A")
with pytest.raises(KeyError, match=r"^Period\('2012', 'A-DEC'\)$"):
idx.get_loc(bad_period)
try:
idx.get_loc(bad_period)
except KeyError as inst:
assert inst.args[0] == bad_period
def test_get_loc_nat(self):
didx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-03"])
pidx = PeriodIndex(["2011-01-01", "NaT", "2011-01-03"], freq="M")
# check DatetimeIndex compat
for idx in [didx, pidx]:
assert idx.get_loc(pd.NaT) == 1
assert idx.get_loc(None) == 1
assert idx.get_loc(float("nan")) == 1
assert idx.get_loc(np.nan) == 1
def test_get_loc(self):
# GH 17717
p0 = pd.Period("2017-09-01")
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with non-duplicate
idx0 = pd.PeriodIndex([p0, p1, p2])
expected_idx1_p1 = 1
expected_idx1_p2 = 2
assert idx0.get_loc(p1) == expected_idx1_p1
assert idx0.get_loc(str(p1)) == expected_idx1_p1
assert idx0.get_loc(p2) == expected_idx1_p2
assert idx0.get_loc(str(p2)) == expected_idx1_p2
msg = "Cannot interpret 'foo' as period"
with pytest.raises(KeyError, match=msg):
idx0.get_loc("foo")
with pytest.raises(KeyError, match=r"^1\.1$"):
idx0.get_loc(1.1)
msg = (
r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\],"
r" dtype='period\[D\]', freq='D'\)' is an invalid key"
)
with pytest.raises(TypeError, match=msg):
idx0.get_loc(idx0)
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with duplicate
idx1 = pd.PeriodIndex([p1, p1, p2])
expected_idx1_p1 = slice(0, 2)
expected_idx1_p2 = 2
assert idx1.get_loc(p1) == expected_idx1_p1
assert idx1.get_loc(str(p1)) == expected_idx1_p1
assert idx1.get_loc(p2) == expected_idx1_p2
assert idx1.get_loc(str(p2)) == expected_idx1_p2
msg = "Cannot interpret 'foo' as period"
with pytest.raises(KeyError, match=msg):
idx1.get_loc("foo")
with pytest.raises(KeyError, match=r"^1\.1$"):
idx1.get_loc(1.1)
msg = (
r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\],"
r" dtype='period\[D\]', freq='D'\)' is an invalid key"
)
with pytest.raises(TypeError, match=msg):
idx1.get_loc(idx1)
# get the location of p1/p2 from
# non-monotonic increasing/decreasing PeriodIndex with duplicate
idx2 = pd.PeriodIndex([p2, p1, p2])
expected_idx2_p1 = 1
expected_idx2_p2 = np.array([True, False, True])
assert idx2.get_loc(p1) == expected_idx2_p1
assert idx2.get_loc(str(p1)) == expected_idx2_p1
tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2)
tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2)
def test_is_monotonic_increasing(self):
# GH 17717
p0 = pd.Period("2017-09-01")
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert idx_inc0.is_monotonic_increasing is True
assert idx_inc1.is_monotonic_increasing is True
assert idx_dec0.is_monotonic_increasing is False
assert idx_dec1.is_monotonic_increasing is False
assert idx.is_monotonic_increasing is False
def test_is_monotonic_decreasing(self):
# GH 17717
p0 = pd.Period("2017-09-01")
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert idx_inc0.is_monotonic_decreasing is False
assert idx_inc1.is_monotonic_decreasing is False
assert idx_dec0.is_monotonic_decreasing is True
assert idx_dec1.is_monotonic_decreasing is True
assert idx.is_monotonic_decreasing is False
def test_contains(self):
# GH 17717
p0 = pd.Period("2017-09-01")
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
p3 = pd.Period("2017-09-04")
ps0 = [p0, p1, p2]
idx0 = pd.PeriodIndex(ps0)
for p in ps0:
assert p in idx0
assert str(p) in idx0
assert "2017-09-01 00:00:01" in idx0
assert "2017-09" in idx0
assert p3 not in idx0
def test_get_value(self):
# GH 17717
p0 = pd.Period("2017-09-01")
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
idx0 = pd.PeriodIndex([p0, p1, p2])
input0 = np.array([1, 2, 3])
expected0 = 2
result0 = idx0.get_value(input0, p1)
assert result0 == expected0
idx1 = pd.PeriodIndex([p1, p1, p2])
input1 = np.array([1, 2, 3])
expected1 = np.array([1, 2])
result1 = idx1.get_value(input1, p1)
tm.assert_numpy_array_equal(result1, expected1)
idx2 = pd.PeriodIndex([p1, p2, p1])
input2 = np.array([1, 2, 3])
expected2 = np.array([1, 3])
result2 = idx2.get_value(input2, p1)
tm.assert_numpy_array_equal(result2, expected2)
def test_get_indexer(self):
# GH 17717
p1 = pd.Period("2017-09-01")
p2 = pd.Period("2017-09-04")
p3 = pd.Period("2017-09-07")
tp0 = pd.Period("2017-08-31")
tp1 = pd.Period("2017-09-02")
tp2 = pd.Period("2017-09-05")
tp3 = pd.Period("2017-09-09")
idx = pd.PeriodIndex([p1, p2, p3])
tm.assert_numpy_array_equal(
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
)
target = pd.PeriodIndex([tp0, tp1, tp2, tp3])
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2, -1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 0, 1, 2], dtype=np.intp)
)
res = idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 day"))
tm.assert_numpy_array_equal(res, np.array([0, 0, 1, -1], dtype=np.intp))
def test_get_indexer_non_unique(self):
# GH 17717
p1 = pd.Period("2017-09-02")
p2 = pd.Period("2017-09-03")
p3 = pd.Period("2017-09-04")
p4 = pd.Period("2017-09-05")
idx1 = pd.PeriodIndex([p1, p2, p1])
idx2 = pd.PeriodIndex([p2, p1, p3, p4])
result = idx1.get_indexer_non_unique(idx2)
expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp)
expected_missing = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result[0], expected_indexer)
tm.assert_numpy_array_equal(result[1], expected_missing)
# TODO: This method came from test_period; de-dup with version above
def test_get_loc2(self):
idx = pd.period_range("2000-01-01", periods=3)
for method in [None, "pad", "backfill", "nearest"]:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].asfreq("H", how="start"), method) == 1
assert idx.get_loc(idx[1].to_timestamp(), method) == 1
assert idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
idx = pd.period_range("2000-01-01", periods=5)[::2]
assert idx.get_loc("2000-01-02T12", method="nearest", tolerance="1 day") == 1
assert (
idx.get_loc("2000-01-02T12", method="nearest", tolerance=pd.Timedelta("1D"))
== 1
)
assert (
idx.get_loc(
"2000-01-02T12", method="nearest", tolerance=np.timedelta64(1, "D")
)
== 1
)
assert (
idx.get_loc("2000-01-02T12", method="nearest", tolerance=timedelta(1)) == 1
)
msg = "unit abbreviation w/o a number"
with pytest.raises(ValueError, match=msg):
idx.get_loc("2000-01-10", method="nearest", tolerance="foo")
msg = "Input has different freq=None from PeriodArray\\(freq=D\\)"
with pytest.raises(ValueError, match=msg):
idx.get_loc("2000-01-10", method="nearest", tolerance="1 hour")
with pytest.raises(KeyError, match=r"^Period\('2000-01-10', 'D'\)$"):
idx.get_loc("2000-01-10", method="nearest", tolerance="1 day")
with pytest.raises(
ValueError, match="list-like tolerance size must match target index size"
):
idx.get_loc(
"2000-01-10",
method="nearest",
tolerance=[
pd.Timedelta("1 day").to_timedelta64(),
pd.Timedelta("1 day").to_timedelta64(),
],
)
# TODO: This method came from test_period; de-dup with version above
def test_get_indexer2(self):
idx = pd.period_range("2000-01-01", periods=3).asfreq("H", how="start")
tm.assert_numpy_array_equal(
idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
)
target = pd.PeriodIndex(
["1999-12-31T23", "2000-01-01T12", "2000-01-02T01"], freq="H"
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance="1 hour"),
np.array([0, -1, 1], dtype=np.intp),
)
msg = "Input has different freq=None from PeriodArray\\(freq=H\\)"
with pytest.raises(ValueError, match=msg):
idx.get_indexer(target, "nearest", tolerance="1 minute")
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance="1 day"),
np.array([0, 1, 1], dtype=np.intp),
)
tol_raw = [
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour"),
np.timedelta64(1, "D"),
]
tm.assert_numpy_array_equal(
idx.get_indexer(
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
),
np.array([0, -1, 1], dtype=np.intp),
)
tol_bad = [
pd.Timedelta("2 hour").to_timedelta64(),
pd.Timedelta("1 hour").to_timedelta64(),
np.timedelta64(1, "M"),
]
with pytest.raises(
libperiod.IncompatibleFrequency, match="Input has different freq=None from"
):
idx.get_indexer(target, "nearest", tolerance=tol_bad)
def test_indexing(self):
# GH 4390, iat incorrectly indexing
index = period_range("1/1/2001", periods=10)
s = Series(np.random.randn(10), index=index)
expected = s[index[0]]
result = s.iat[0]
assert expected == result
def test_period_index_indexer(self):
# GH4125
idx = pd.period_range("2002-01", "2003-12", freq="M")
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
tm.assert_frame_equal(df, df.loc[idx])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
tm.assert_frame_equal(df, df.loc[list(idx)])

View File

@@ -0,0 +1,347 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
from pandas.core.arrays import PeriodArray
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
class TestPeriodIndexOps(Ops):
def setup_method(self, method):
super().setup_method(method)
mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = [o for o in self.objs if not mask(o)]
def test_ops_properties(self):
f = lambda x: isinstance(x, PeriodIndex)
self.check_ops_properties(PeriodArray._field_ops, f)
self.check_ops_properties(PeriodArray._object_ops, f)
self.check_ops_properties(PeriodArray._bool_ops, f)
def test_resolution(self):
for freq, expected in zip(
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
[
"day",
"day",
"day",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
],
):
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
assert idx.resolution == expected
def test_value_counts_unique(self):
# GH 7735
idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")
exp_idx = PeriodIndex(
[
"2011-01-01 18:00",
"2011-01-01 17:00",
"2011-01-01 16:00",
"2011-01-01 15:00",
"2011-01-01 14:00",
"2011-01-01 13:00",
"2011-01-01 12:00",
"2011-01-01 11:00",
"2011-01-01 10:00",
"2011-01-01 09:00",
],
freq="H",
)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
tm.assert_index_equal(idx.unique(), expected)
idx = PeriodIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
freq="H",
)
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx) # freq will not be reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
def test_order_compat(self):
def _check_freq(index, expected_index):
if isinstance(index, PeriodIndex):
assert index.freq == expected_index.freq
pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
# for compatibility check
iidx = Index([2011, 2012, 2013], name="idx")
for idx in [pidx, iidx]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
_check_freq(ordered, idx[::-1])
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
_check_freq(ordered, idx[::-1])
pidx = PeriodIndex(
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
)
pexpected = PeriodIndex(
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
)
# for compatibility check
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
result = pidx.sort_values()
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
tm.assert_index_equal(result, expected)
assert result.freq == "D"
result = pidx.sort_values(ascending=False)
expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
tm.assert_index_equal(result, expected)
assert result.freq == "D"
def test_order(self):
for freq in ["D", "2D", "4D"]:
idx = PeriodIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
)
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
assert ordered.freq == idx.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
assert ordered.freq == expected.freq
assert ordered.freq == freq
idx1 = PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
freq="D",
name="idx1",
)
exp1 = PeriodIndex(
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx1",
)
idx2 = PeriodIndex(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
freq="D",
name="idx2",
)
exp2 = PeriodIndex(
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx2",
)
idx3 = PeriodIndex(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
)
exp3 = PeriodIndex(
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
)
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq == "D"
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq == "D"
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == "D"
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == "D"
def test_shift(self):
# This is tested in test_arithmetic
pass
def test_nat(self):
assert pd.PeriodIndex._na_value is NaT
assert pd.PeriodIndex([], freq="M")._na_value is NaT
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
idx = pd.PeriodIndex(["2011-01-01", "NaT"], freq="D")
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
@pytest.mark.parametrize("freq", ["D", "M"])
def test_equals(self, freq):
# GH#13107
idx = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = pd.PeriodIndex._simple_new(
idx._values._simple_new(idx._values.asi8, freq="H")
)
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
def test_freq_setter_deprecated(self):
# GH 20678
idx = pd.period_range("2018Q1", periods=4, freq="Q")
# no warning for getter
with tm.assert_produces_warning(None):
idx.freq
# warning for setter
with tm.assert_produces_warning(FutureWarning):
idx.freq = pd.offsets.Day()

View File

@@ -0,0 +1,135 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Period, Series, period_range
from pandas.util import testing as tm
class TestPeriodIndex:
def setup_method(self, method):
pass
def test_slice_with_negative_step(self):
ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M"))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Period("2014-10") :: -1], SLC[9::-1])
assert_slices_equivalent(SLC["2014-10"::-1], SLC[9::-1])
assert_slices_equivalent(SLC[: Period("2014-10") : -1], SLC[:8:-1])
assert_slices_equivalent(SLC[:"2014-10":-1], SLC[:8:-1])
assert_slices_equivalent(SLC["2015-02":"2014-10":-1], SLC[13:8:-1])
assert_slices_equivalent(
SLC[Period("2015-02") : Period("2014-10") : -1], SLC[13:8:-1]
)
assert_slices_equivalent(SLC["2015-02" : Period("2014-10") : -1], SLC[13:8:-1])
assert_slices_equivalent(SLC[Period("2015-02") : "2014-10" : -1], SLC[13:8:-1])
assert_slices_equivalent(SLC["2014-10":"2015-02":-1], SLC[:0])
def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M"))
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]
def test_slice_keep_name(self):
idx = period_range("20010101", periods=10, freq="D", name="bob")
assert idx.name == idx[1:].name
def test_pindex_slice_index(self):
pi = period_range(start="1/1/10", end="12/31/12", freq="M")
s = Series(np.random.rand(len(pi)), index=pi)
res = s["2010"]
exp = s[0:12]
tm.assert_series_equal(res, exp)
res = s["2011"]
exp = s[12:24]
tm.assert_series_equal(res, exp)
def test_range_slice_day(self):
# GH#6716
didx = pd.date_range(start="2013/01/01", freq="D", periods=400)
pidx = period_range(start="2013/01/01", freq="D", periods=400)
for idx in [didx, pidx]:
# slices against index should raise IndexError
values = [
"2014",
"2013/02",
"2013/01/02",
"2013/02/01 9H",
"2013/02/01 09:00",
]
for v in values:
with pytest.raises(TypeError):
idx[v:]
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s["2013/01/02":], s[1:])
tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5])
tm.assert_series_equal(s["2013/02":], s[31:])
tm.assert_series_equal(s["2014":], s[365:])
invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
for v in invalid:
with pytest.raises(TypeError):
idx[v:]
def test_range_slice_seconds(self):
# GH#6716
didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
for idx in [didx, pidx]:
# slices against index should raise IndexError
values = [
"2014",
"2013/02",
"2013/01/02",
"2013/02/01 9H",
"2013/02/01 09:00",
]
for v in values:
with pytest.raises(TypeError):
idx[v:]
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660])
tm.assert_series_equal(
s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]
)
tm.assert_series_equal(s["2013/01/01 10H":], s[3600:])
tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860])
for d in ["2013/01/01", "2013/01", "2013"]:
tm.assert_series_equal(s[d:], s)
def test_range_slice_outofbounds(self):
# GH#5407
didx = pd.date_range(start="2013/10/01", freq="D", periods=10)
pidx = period_range(start="2013/10/01", freq="D", periods=10)
for idx in [didx, pidx]:
df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
empty = DataFrame(index=idx.__class__([], freq="D"), columns=["units"])
empty["units"] = empty["units"].astype("int64")
tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty)
tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2])
tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2])
tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty)
tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty)
tm.assert_frame_equal(df["2013-06":"2013-09"], empty)
tm.assert_frame_equal(df["2013-11":"2013-12"], empty)

View File

@@ -0,0 +1,630 @@
import numpy as np
import pytest
from pandas._libs.tslibs.period import IncompatibleFrequency
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
NaT,
Period,
PeriodIndex,
Series,
date_range,
offsets,
period_range,
)
from pandas.util import testing as tm
from ..datetimelike import DatetimeLike
class TestPeriodIndex(DatetimeLike):
_holder = PeriodIndex
def setup_method(self, method):
self.indices = dict(
index=tm.makePeriodIndex(10),
index_dec=period_range("20130101", periods=10, freq="D")[::-1],
)
self.setup_indices()
def create_index(self):
return period_range("20130101", periods=5, freq="D")
def test_pickle_compat_construction(self):
pass
@pytest.mark.parametrize("freq", ["D", "M", "A"])
def test_pickle_round_trip(self, freq):
idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq)
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)
def test_where(self):
# This is handled in test_indexing
pass
@pytest.mark.parametrize("use_numpy", [True, False])
@pytest.mark.parametrize(
"index",
[
pd.period_range("2000-01-01", periods=3, freq="D"),
pd.period_range("2001-01-01", periods=3, freq="2D"),
pd.PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"),
],
)
def test_repeat_freqstr(self, index, use_numpy):
# GH10183
expected = PeriodIndex([p for p in index for _ in range(3)])
result = np.repeat(index, 3) if use_numpy else index.repeat(3)
tm.assert_index_equal(result, expected)
assert result.freqstr == index.freqstr
def test_fillna_period(self):
# GH 11343
idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H")
exp = pd.PeriodIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H"
)
tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01 10:00", freq="H")), exp)
exp = pd.Index(
[
pd.Period("2011-01-01 09:00", freq="H"),
"x",
pd.Period("2011-01-01 11:00", freq="H"),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)
exp = pd.Index(
[
pd.Period("2011-01-01 09:00", freq="H"),
pd.Period("2011-01-01", freq="D"),
pd.Period("2011-01-01 11:00", freq="H"),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp)
def test_no_millisecond_field(self):
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex.millisecond
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex([]).millisecond
@pytest.mark.parametrize("sort", [None, False])
def test_difference_freq(self, sort):
# GH14323: difference of Period MUST preserve frequency
# but the ability to union results must be preserved
index = period_range("20160920", "20160925", freq="D")
other = period_range("20160921", "20160924", freq="D")
expected = PeriodIndex(["20160920", "20160925"], freq="D")
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
other = period_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = PeriodIndex(["20160920", "20160921"], freq="D")
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
def test_hash_error(self):
index = period_range("20010101", periods=10)
msg = "unhashable type: '{}'".format(type(index).__name__)
with pytest.raises(TypeError, match=msg):
hash(index)
def test_make_time_series(self):
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
series = Series(1, index=index)
assert isinstance(series, Series)
def test_shallow_copy_empty(self):
# GH13067
idx = PeriodIndex([], freq="M")
result = idx._shallow_copy()
expected = idx
tm.assert_index_equal(result, expected)
def test_shallow_copy_i8(self):
# GH-24391
pi = period_range("2018-01-01", periods=3, freq="2D")
result = pi._shallow_copy(pi.asi8, freq=pi.freq)
tm.assert_index_equal(result, pi)
def test_shallow_copy_changing_freq_raises(self):
pi = period_range("2018-01-01", periods=3, freq="2D")
msg = "specified freq and dtype are different"
with pytest.raises(IncompatibleFrequency, match=msg):
pi._shallow_copy(pi, freq="H")
def test_dtype_str(self):
pi = pd.PeriodIndex([], freq="M")
with tm.assert_produces_warning(FutureWarning):
assert pi.dtype_str == "period[M]"
assert pi.dtype_str == str(pi.dtype)
with tm.assert_produces_warning(FutureWarning):
pi = pd.PeriodIndex([], freq="3M")
assert pi.dtype_str == "period[3M]"
assert pi.dtype_str == str(pi.dtype)
def test_view_asi8(self):
idx = pd.PeriodIndex([], freq="M")
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view("i8"), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view("i8"), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
tm.assert_numpy_array_equal(idx.view("i8"), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
def test_values(self):
idx = pd.PeriodIndex([], freq="M")
exp = np.array([], dtype=np.object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
with tm.assert_produces_warning(FutureWarning):
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")
exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
def test_period_index_length(self):
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
assert len(pi) == 9
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
assert len(pi) == 4 * 9
pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
assert len(pi) == 12 * 9
start = Period("02-Apr-2005", "B")
i1 = period_range(start=start, periods=20)
assert len(i1) == 20
assert i1.freq == start.freq
assert i1[0] == start
end_intv = Period("2006-12-31", "W")
i1 = period_range(end=end_intv, periods=10)
assert len(i1) == 10
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv
end_intv = Period("2006-12-31", "1w")
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period("2006-12-31", ("w", 1))
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
msg = "start and end must have same freq"
with pytest.raises(ValueError, match=msg):
period_range(start=start, end=end_intv)
end_intv = Period("2005-05-01", "B")
i1 = period_range(start=start, end=end_intv)
msg = (
"Of the three parameters: start, end, and periods, exactly two"
" must be specified"
)
with pytest.raises(ValueError, match=msg):
period_range(start=start)
# infer freq from first element
i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
assert len(i2) == 2
assert i2[0] == end_intv
i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
assert len(i2) == 2
assert i2[0] == end_intv
# Mixed freq should fail
vals = [end_intv, Period("2006-12-31", "w")]
msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
with pytest.raises(IncompatibleFrequency, match=msg):
PeriodIndex(vals)
vals = np.array(vals)
with pytest.raises(ValueError, match=msg):
PeriodIndex(vals)
def test_fields(self):
# year, month, day, hour, minute
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
# qyear
pi = period_range(freq="A", start="1/1/2001", end="12/1/2005")
self._check_all_fields(pi)
pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002")
self._check_all_fields(pi)
pi = period_range(freq="M", start="1/1/2001", end="1/1/2002")
self._check_all_fields(pi)
pi = period_range(freq="D", start="12/1/2001", end="6/1/2001")
self._check_all_fields(pi)
pi = period_range(freq="B", start="12/1/2001", end="6/1/2001")
self._check_all_fields(pi)
pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00")
self._check_all_fields(pi)
pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20")
self._check_all_fields(pi)
pi = period_range(
freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00"
)
self._check_all_fields(pi)
end_intv = Period("2006-12-31", "W")
i1 = period_range(end=end_intv, periods=10)
self._check_all_fields(i1)
def _check_all_fields(self, periodindex):
fields = [
"year",
"month",
"day",
"hour",
"minute",
"second",
"weekofyear",
"week",
"dayofweek",
"dayofyear",
"quarter",
"qyear",
"days_in_month",
]
periods = list(periodindex)
s = pd.Series(periodindex)
for field in fields:
field_idx = getattr(periodindex, field)
assert len(periodindex) == len(field_idx)
for x, val in zip(periods, field_idx):
assert getattr(x, field) == val
if len(s) == 0:
continue
field_s = getattr(s.dt, field)
assert len(periodindex) == len(field_s)
for x, val in zip(periods, field_s):
assert getattr(x, field) == val
def test_period_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = period_range("2011/01/01", periods=6, freq="M")
idx2 = period_range("2013", periods=6, freq="A")
df = df.set_index(idx1)
tm.assert_index_equal(df.index, idx1)
df = df.set_index(idx2)
tm.assert_index_equal(df.index, idx2)
def test_factorize(self):
idx1 = PeriodIndex(
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M"
)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
idx2 = pd.PeriodIndex(
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M"
)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M")
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
def test_is_(self):
create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009")
index = create_index()
assert index.is_(index)
assert not index.is_(create_index())
assert index.is_(index.view())
assert index.is_(index.view().view().view().view().view())
assert index.view().is_(index)
ind2 = index.view()
index.name = "Apple"
assert ind2.is_(index)
assert not index.is_(index[:])
assert not index.is_(index.asfreq("M"))
assert not index.is_(index.asfreq("A"))
assert not index.is_(index - 2)
assert not index.is_(index - 0)
def test_contains(self):
rng = period_range("2007-01", freq="M", periods=10)
assert Period("2007-01", freq="M") in rng
assert not Period("2007-01", freq="D") in rng
assert not Period("2007-01", freq="2M") in rng
def test_contains_nat(self):
# see gh-13582
idx = period_range("2007-01", freq="M", periods=10)
assert pd.NaT not in idx
assert None not in idx
assert float("nan") not in idx
assert np.nan not in idx
idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M")
assert pd.NaT in idx
assert None in idx
assert float("nan") in idx
assert np.nan in idx
def test_periods_number_check(self):
msg = (
"Of the three parameters: start, end, and periods, exactly two"
" must be specified"
)
with pytest.raises(ValueError, match=msg):
period_range("2011-1-1", "2012-1-1", "B")
def test_start_time(self):
# GH 17157
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS")
tm.assert_index_equal(index.start_time, expected_index)
def test_end_time(self):
# GH 17157
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
expected_index = date_range("2016-01-01", end="2016-05-31", freq="M")
expected_index = expected_index.shift(1, freq="D").shift(-1, freq="ns")
tm.assert_index_equal(index.end_time, expected_index)
def test_index_duplicate_periods(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
assert (ts[1:3] == 1).all()
# not monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[idx == 2007]
tm.assert_series_equal(result, expected)
def test_index_unique(self):
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN")
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern")
expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern")
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
def test_shift(self):
# This is tested in test_arithmetic
pass
@td.skip_if_32bit
def test_ndarray_compat_properties(self):
super().test_ndarray_compat_properties()
def test_negative_ordinals(self):
Period(ordinal=-1000, freq="A")
Period(ordinal=0, freq="A")
idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A")
idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A")
tm.assert_index_equal(idx1, idx2)
def test_pindex_fieldaccessor_nat(self):
idx = PeriodIndex(
["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name"
)
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name")
tm.assert_index_equal(idx.year, exp)
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name")
tm.assert_index_equal(idx.month, exp)
def test_pindex_qaccess(self):
pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
s = Series(np.random.rand(len(pi)), index=pi).cumsum()
# Todo: fix these accessors!
assert s["05Q4"] == s[2]
def test_pindex_multiples(self):
with tm.assert_produces_warning(FutureWarning):
pi = PeriodIndex(start="1/1/11", end="12/31/11", freq="2M")
expected = PeriodIndex(
["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"],
freq="2M",
)
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == "2M"
pi = period_range(start="1/1/11", end="12/31/11", freq="2M")
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == "2M"
pi = period_range(start="1/1/11", periods=6, freq="2M")
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == "2M"
def test_iteration(self):
index = period_range(start="1/1/10", periods=4, freq="B")
result = list(index)
assert isinstance(result[0], Period)
assert result[0].freq == index.freq
def test_is_full(self):
index = PeriodIndex([2005, 2007, 2009], freq="A")
assert not index.is_full
index = PeriodIndex([2005, 2006, 2007], freq="A")
assert index.is_full
index = PeriodIndex([2005, 2005, 2007], freq="A")
assert not index.is_full
index = PeriodIndex([2005, 2005, 2006], freq="A")
assert index.is_full
index = PeriodIndex([2006, 2005, 2005], freq="A")
with pytest.raises(ValueError, match="Index is not monotonic"):
index.is_full
assert index[:0].is_full
def test_with_multi_index(self):
# #1705
index = date_range("1/1/2012", periods=4, freq="12H")
index_as_arrays = [index.to_period(freq="D"), index.hour]
s = Series([0, 1, 2, 3], index_as_arrays)
assert isinstance(s.index.levels[0], PeriodIndex)
assert isinstance(s.index.values[0][0], Period)
def test_convert_array_of_periods(self):
rng = period_range("1/1/2000", periods=20, freq="D")
periods = list(rng)
result = pd.Index(periods)
assert isinstance(result, PeriodIndex)
def test_append_concat(self):
# #1815
d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")
s1 = Series(np.random.randn(10), d1)
s2 = Series(np.random.randn(10), d2)
s1 = s1.to_period()
s2 = s2.to_period()
# drops index
result = pd.concat([s1, s2])
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]
def test_pickle_freq(self):
# GH2891
prng = period_range("1/1/2011", "1/1/2012", freq="M")
new_prng = tm.round_trip_pickle(prng)
assert new_prng.freq == offsets.MonthEnd()
assert new_prng.freqstr == "M"
def test_map(self):
# test_map_dictlike generally tests
index = PeriodIndex([2005, 2007, 2009], freq="A")
result = index.map(lambda x: x.ordinal)
exp = Index([x.ordinal for x in index])
tm.assert_index_equal(result, exp)
def test_join_self(self, join_type):
index = period_range("1/1/2000", periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def test_insert(self):
# GH 18295 (test missing)
expected = PeriodIndex(
["2017Q1", pd.NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q"
)
for na in (np.nan, pd.NaT, None):
result = period_range("2017Q1", periods=4, freq="Q").insert(1, na)
tm.assert_index_equal(result, expected)
def test_maybe_convert_timedelta():
pi = PeriodIndex(["2000", "2001"], freq="D")
offset = offsets.Day(2)
assert pi._maybe_convert_timedelta(offset) == 2
assert pi._maybe_convert_timedelta(2) == 2
offset = offsets.BusinessDay()
msg = r"Input has different freq=B from PeriodIndex\(freq=D\)"
with pytest.raises(ValueError, match=msg):
pi._maybe_convert_timedelta(offset)

View File

@@ -0,0 +1,99 @@
import pytest
from pandas import NaT, Period, PeriodIndex, date_range, period_range
import pandas.util.testing as tm
class TestPeriodRange:
@pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"])
def test_construction_from_string(self, freq):
# non-empty
expected = date_range(
start="2017-01-01", periods=5, freq=freq, name="foo"
).to_period()
start, end = str(expected[0]), str(expected[-1])
result = period_range(start=start, end=end, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
result = period_range(start=start, periods=5, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=5, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
# empty
expected = PeriodIndex([], freq=freq, name="foo")
result = period_range(start=start, periods=0, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=0, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
result = period_range(start=end, end=start, freq=freq, name="foo")
tm.assert_index_equal(result, expected)
def test_construction_from_period(self):
# upsampling
start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q")
expected = date_range(
start="2017-03-31", end="2018-03-31", freq="M", name="foo"
).to_period()
result = period_range(start=start, end=end, freq="M", name="foo")
tm.assert_index_equal(result, expected)
# downsampling
start, end = Period("2017-1", freq="M"), Period("2019-12", freq="M")
expected = date_range(
start="2017-01-31", end="2019-12-31", freq="Q", name="foo"
).to_period()
result = period_range(start=start, end=end, freq="Q", name="foo")
tm.assert_index_equal(result, expected)
# empty
expected = PeriodIndex([], freq="W", name="foo")
result = period_range(start=start, periods=0, freq="W", name="foo")
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=0, freq="W", name="foo")
tm.assert_index_equal(result, expected)
result = period_range(start=end, end=start, freq="W", name="foo")
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = (
"Of the three parameters: start, end, and periods, "
"exactly two must be specified"
)
with pytest.raises(ValueError, match=msg):
period_range(start="2017Q1")
with pytest.raises(ValueError, match=msg):
period_range(end="2017Q1")
with pytest.raises(ValueError, match=msg):
period_range(periods=5)
with pytest.raises(ValueError, match=msg):
period_range()
# too many params
with pytest.raises(ValueError, match=msg):
period_range(start="2017Q1", end="2018Q1", periods=8, freq="Q")
# start/end NaT
msg = "start and end must not be NaT"
with pytest.raises(ValueError, match=msg):
period_range(start=NaT, end="2018Q1")
with pytest.raises(ValueError, match=msg):
period_range(start="2017Q1", end=NaT)
# invalid periods param
msg = "periods must be a number, got foo"
with pytest.raises(TypeError, match=msg):
period_range(start="2017Q1", periods="foo")

View File

@@ -0,0 +1,17 @@
"""Tests for PeriodIndex behaving like a vectorized Period scalar"""
from pandas import Timedelta, date_range, period_range
import pandas.util.testing as tm
class TestPeriodIndexOps:
def test_start_time(self):
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS")
tm.assert_index_equal(index.start_time, expected_index)
def test_end_time(self):
index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
expected_index = date_range("2016-01-01", end="2016-05-31", freq="M")
expected_index += Timedelta(1, "D") - Timedelta(1, "ns")
tm.assert_index_equal(index.end_time, expected_index)

View File

@@ -0,0 +1,355 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, PeriodIndex, date_range, period_range
import pandas.core.indexes.period as period
import pandas.util.testing as tm
def _permute(obj):
return obj.take(np.random.permutation(len(obj)))
class TestPeriodIndex:
def test_joins(self, join_type):
index = period_range("1/1/2000", "1/20/2000", freq="D")
joined = index.join(index[:-5], how=join_type)
assert isinstance(joined, PeriodIndex)
assert joined.freq == index.freq
def test_join_self(self, join_type):
index = period_range("1/1/2000", "1/20/2000", freq="D")
res = index.join(index, how=join_type)
assert index is res
def test_join_does_not_recur(self):
df = tm.makeCustomDataframe(
3,
2,
data_gen_f=lambda *args: np.random.randint(2),
c_idx_type="p",
r_idx_type="dt",
)
s = df.iloc[:2, 0]
res = s.index.join(df.columns, how="outer")
expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object)
tm.assert_index_equal(res, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_union(self, sort):
# union
other1 = pd.period_range("1/1/2000", freq="D", periods=5)
rng1 = pd.period_range("1/6/2000", freq="D", periods=5)
expected1 = pd.PeriodIndex(
[
"2000-01-06",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-04",
"2000-01-05",
],
freq="D",
)
rng2 = pd.period_range("1/1/2000", freq="D", periods=5)
other2 = pd.period_range("1/4/2000", freq="D", periods=5)
expected2 = pd.period_range("1/1/2000", freq="D", periods=8)
rng3 = pd.period_range("1/1/2000", freq="D", periods=5)
other3 = pd.PeriodIndex([], freq="D")
expected3 = pd.period_range("1/1/2000", freq="D", periods=5)
rng4 = pd.period_range("2000-01-01 09:00", freq="H", periods=5)
other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
expected4 = pd.PeriodIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-02 09:00",
"2000-01-02 10:00",
"2000-01-02 11:00",
"2000-01-02 12:00",
"2000-01-02 13:00",
],
freq="H",
)
rng5 = pd.PeriodIndex(
["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T"
)
other5 = pd.PeriodIndex(
["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T"
)
expected5 = pd.PeriodIndex(
[
"2000-01-01 09:01",
"2000-01-01 09:03",
"2000-01-01 09:05",
"2000-01-01 09:08",
],
freq="T",
)
rng6 = pd.period_range("2000-01-01", freq="M", periods=7)
other6 = pd.period_range("2000-04-01", freq="M", periods=7)
expected6 = pd.period_range("2000-01-01", freq="M", periods=10)
rng7 = pd.period_range("2003-01-01", freq="A", periods=5)
other7 = pd.period_range("1998-01-01", freq="A", periods=8)
expected7 = pd.PeriodIndex(
[
"2003",
"2004",
"2005",
"2006",
"2007",
"1998",
"1999",
"2000",
"2001",
"2002",
],
freq="A",
)
rng8 = pd.PeriodIndex(
["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D"
)
other8 = pd.period_range("1/6/2000", freq="D", periods=5)
expected8 = pd.PeriodIndex(
[
"1/3/2000",
"1/2/2000",
"1/1/2000",
"1/5/2000",
"1/4/2000",
"1/6/2000",
"1/7/2000",
"1/8/2000",
"1/9/2000",
"1/10/2000",
],
freq="D",
)
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7),
(rng8, other8, expected8),
]:
result_union = rng.union(other, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result_union, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_union_misc(self, sort):
index = period_range("1/1/2000", "1/20/2000", freq="D")
result = index[:-5].union(index[10:], sort=sort)
tm.assert_index_equal(result, index)
# not in order
result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
if sort is None:
tm.assert_index_equal(result, index)
assert tm.equalContents(result, index)
# raise if different frequencies
index = period_range("1/1/2000", "1/20/2000", freq="D")
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
with pytest.raises(period.IncompatibleFrequency):
index.union(index2, sort=sort)
index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
with pytest.raises(period.IncompatibleFrequency):
index.join(index3)
def test_union_dataframe_index(self):
rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M")
s1 = pd.Series(np.random.randn(len(rng1)), rng1)
rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M")
s2 = pd.Series(np.random.randn(len(rng2)), rng2)
df = pd.DataFrame({"s1": s1, "s2": s2})
exp = pd.period_range("1/1/1980", "1/1/2012", freq="M")
tm.assert_index_equal(df.index, exp)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection(self, sort):
index = period_range("1/1/2000", "1/20/2000", freq="D")
result = index[:-5].intersection(index[10:], sort=sort)
tm.assert_index_equal(result, index[10:-5])
# not in order
left = _permute(index[:-5])
right = _permute(index[10:])
result = left.intersection(right, sort=sort)
if sort is None:
tm.assert_index_equal(result, index[10:-5])
assert tm.equalContents(result, index[10:-5])
# raise if different frequencies
index = period_range("1/1/2000", "1/20/2000", freq="D")
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
with pytest.raises(period.IncompatibleFrequency):
index.intersection(index2, sort=sort)
index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
with pytest.raises(period.IncompatibleFrequency):
index.intersection(index3, sort=sort)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_cases(self, sort):
base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx")
# if target has the same name, it is preserved
rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx")
expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx")
# if target name is different, it will be reset
rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other")
expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None)
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = PeriodIndex([], name="idx", freq="D")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# non-monotonic
base = PeriodIndex(
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
freq="D",
name="idx",
)
rng2 = PeriodIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
freq="D",
name="idx",
)
expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx")
rng3 = PeriodIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
freq="D",
name="other",
)
expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None)
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = PeriodIndex([], freq="D", name="idx")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == "D"
# empty same freq
rng = date_range("6/1/2000", "6/15/2000", freq="T")
result = rng[0:0].intersection(rng)
assert len(result) == 0
result = rng.intersection(rng[0:0])
assert len(result) == 0
@pytest.mark.parametrize("sort", [None, False])
def test_difference(self, sort):
# diff
period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"]
rng1 = pd.PeriodIndex(period_rng, freq="D")
other1 = pd.period_range("1/6/2000", freq="D", periods=5)
expected1 = rng1
rng2 = pd.PeriodIndex(period_rng, freq="D")
other2 = pd.period_range("1/4/2000", freq="D", periods=5)
expected2 = pd.PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D")
rng3 = pd.PeriodIndex(period_rng, freq="D")
other3 = pd.PeriodIndex([], freq="D")
expected3 = rng3
period_rng = [
"2000-01-01 10:00",
"2000-01-01 09:00",
"2000-01-01 12:00",
"2000-01-01 11:00",
"2000-01-01 13:00",
]
rng4 = pd.PeriodIndex(period_rng, freq="H")
other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5)
expected4 = rng4
rng5 = pd.PeriodIndex(
["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T"
)
other5 = pd.PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T")
expected5 = pd.PeriodIndex(["2000-01-01 09:03"], freq="T")
period_rng = [
"2000-02-01",
"2000-01-01",
"2000-06-01",
"2000-07-01",
"2000-05-01",
"2000-03-01",
"2000-04-01",
]
rng6 = pd.PeriodIndex(period_rng, freq="M")
other6 = pd.period_range("2000-04-01", freq="M", periods=7)
expected6 = pd.PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M")
period_rng = ["2003", "2007", "2006", "2005", "2004"]
rng7 = pd.PeriodIndex(period_rng, freq="A")
other7 = pd.period_range("1998-01-01", freq="A", periods=8)
expected7 = pd.PeriodIndex(["2007", "2006"], freq="A")
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7),
]:
result_difference = rng.difference(other, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result_difference, expected)

View File

@@ -0,0 +1,352 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs.tslibs.ccalendar import MONTHS
import pandas as pd
from pandas import (
DatetimeIndex,
Period,
PeriodIndex,
Series,
Timedelta,
Timestamp,
date_range,
period_range,
to_datetime,
)
import pandas.core.indexes.period as period
import pandas.util.testing as tm
class TestPeriodRepresentation:
"""
Wish to match NumPy units
"""
def _check_freq(self, freq, base_date):
rng = period_range(start=base_date, periods=10, freq=freq)
exp = np.arange(10, dtype=np.int64)
tm.assert_numpy_array_equal(rng.asi8, exp)
def test_annual(self):
self._check_freq("A", 1970)
def test_monthly(self):
self._check_freq("M", "1970-01")
@pytest.mark.parametrize("freq", ["W-THU", "D", "B", "H", "T", "S", "L", "U", "N"])
def test_freq(self, freq):
self._check_freq(freq, "1970-01-01")
def test_negone_ordinals(self):
freqs = ["A", "M", "Q", "D", "H", "T", "S"]
period = Period(ordinal=-1, freq="D")
for freq in freqs:
repr(period.asfreq(freq))
for freq in freqs:
period = Period(ordinal=-1, freq=freq)
repr(period)
assert period.year == 1969
period = Period(ordinal=-1, freq="B")
repr(period)
period = Period(ordinal=-1, freq="W")
repr(period)
class TestPeriodIndex:
def test_to_timestamp(self):
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
series = Series(1, index=index, name="foo")
exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC")
result = series.to_timestamp(how="end")
exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns")
tm.assert_index_equal(result.index, exp_index)
assert result.name == "foo"
exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
result = series.to_timestamp(how="start")
tm.assert_index_equal(result.index, exp_index)
def _get_with_delta(delta, freq="A-DEC"):
return date_range(
to_datetime("1/1/2001") + delta,
to_datetime("12/31/2009") + delta,
freq=freq,
)
delta = timedelta(hours=23)
result = series.to_timestamp("H", "end")
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns")
tm.assert_index_equal(result.index, exp_index)
delta = timedelta(hours=23, minutes=59)
result = series.to_timestamp("T", "end")
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns")
tm.assert_index_equal(result.index, exp_index)
result = series.to_timestamp("S", "end")
delta = timedelta(hours=23, minutes=59, seconds=59)
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
tm.assert_index_equal(result.index, exp_index)
index = period_range(freq="H", start="1/1/2001", end="1/2/2001")
series = Series(1, index=index, name="foo")
exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H")
result = series.to_timestamp(how="end")
exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns")
tm.assert_index_equal(result.index, exp_index)
assert result.name == "foo"
def test_to_timestamp_freq(self):
idx = pd.period_range("2017", periods=12, freq="A-DEC")
result = idx.to_timestamp()
expected = pd.date_range("2017", periods=12, freq="AS-JAN")
tm.assert_index_equal(result, expected)
def test_to_timestamp_repr_is_code(self):
zs = [
Timestamp("99-04-17 00:00:00", tz="UTC"),
Timestamp("2001-04-17 00:00:00", tz="UTC"),
Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"),
Timestamp("2001-04-17 00:00:00", tz=None),
]
for z in zs:
assert eval(repr(z)) == z
def test_to_timestamp_to_period_astype(self):
idx = DatetimeIndex([pd.NaT, "2011-01-01", "2011-02-01"], name="idx")
res = idx.astype("period[M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
tm.assert_index_equal(res, exp)
res = idx.astype("period[3M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
tm.assert_index_equal(res, exp)
def test_dti_to_period(self):
dti = pd.date_range(start="1/1/2005", end="12/1/2005", freq="M")
pi1 = dti.to_period()
pi2 = dti.to_period(freq="D")
pi3 = dti.to_period(freq="3D")
assert pi1[0] == Period("Jan 2005", freq="M")
assert pi2[0] == Period("1/31/2005", freq="D")
assert pi3[0] == Period("1/31/2005", freq="3D")
assert pi1[-1] == Period("Nov 2005", freq="M")
assert pi2[-1] == Period("11/30/2005", freq="D")
assert pi3[-1], Period("11/30/2005", freq="3D")
tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M"))
tm.assert_index_equal(
pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D")
)
tm.assert_index_equal(
pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D")
)
@pytest.mark.parametrize("month", MONTHS)
def test_to_period_quarterly(self, month):
# make sure we can make the round trip
freq = "Q-{month}".format(month=month)
rng = period_range("1989Q3", "1991Q3", freq=freq)
stamps = rng.to_timestamp()
result = stamps.to_period(freq)
tm.assert_index_equal(rng, result)
@pytest.mark.parametrize("off", ["BQ", "QS", "BQS"])
def test_to_period_quarterlyish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "Q-DEC"
@pytest.mark.parametrize("off", ["BA", "AS", "BAS"])
def test_to_period_annualish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "A-DEC"
def test_to_period_monthish(self):
offsets = ["MS", "BM"]
for off in offsets:
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "M"
rng = date_range("01-Jan-2012", periods=8, freq="M")
prng = rng.to_period()
assert prng.freq == "M"
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
date_range("01-Jan-2012", periods=8, freq="EOM")
def test_period_dt64_round_trip(self):
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period()
tm.assert_index_equal(pi.to_timestamp(), dti)
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period(freq="H")
tm.assert_index_equal(pi.to_timestamp(), dti)
def test_combine_first(self):
# GH#3367
didx = pd.date_range(start="1950-01-31", end="1950-07-31", freq="M")
pidx = pd.period_range(
start=pd.Period("1950-1"), end=pd.Period("1950-7"), freq="M"
)
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx)
result = a.combine_first(b)
expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("freq", ["D", "2D"])
def test_searchsorted(self, freq):
pidx = pd.PeriodIndex(
["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"],
freq=freq,
)
p1 = pd.Period("2014-01-01", freq=freq)
assert pidx.searchsorted(p1) == 0
p2 = pd.Period("2014-01-04", freq=freq)
assert pidx.searchsorted(p2) == 3
msg = "Input has different freq=H from PeriodIndex"
with pytest.raises(period.IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period("2014-01-01", freq="H"))
msg = "Input has different freq=5D from PeriodIndex"
with pytest.raises(period.IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period("2014-01-01", freq="5D"))
class TestPeriodIndexConversion:
def test_tolist(self):
index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
rs = index.tolist()
for x in rs:
assert isinstance(x, Period)
recon = PeriodIndex(rs)
tm.assert_index_equal(index, recon)
def test_to_timestamp_pi_nat(self):
# GH#7228
index = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
result = index.to_timestamp("D")
expected = DatetimeIndex(
[pd.NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name="idx"
)
tm.assert_index_equal(result, expected)
assert result.name == "idx"
result2 = result.to_period(freq="M")
tm.assert_index_equal(result2, index)
assert result2.name == "idx"
result3 = result.to_period(freq="3M")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
tm.assert_index_equal(result3, exp)
assert result3.freqstr == "3M"
msg = "Frequency must be positive, because it represents span: -2A"
with pytest.raises(ValueError, match=msg):
result.to_period(freq="-2A")
def test_to_timestamp_preserve_name(self):
index = period_range(freq="A", start="1/1/2001", end="12/1/2009", name="foo")
assert index.name == "foo"
conv = index.to_timestamp("D")
assert conv.name == "foo"
def test_to_timestamp_quarterly_bug(self):
years = np.arange(1960, 2000).repeat(4)
quarters = np.tile(list(range(1, 5)), 40)
pindex = PeriodIndex(year=years, quarter=quarters)
stamps = pindex.to_timestamp("D", "end")
expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex])
tm.assert_index_equal(stamps, expected)
def test_to_timestamp_pi_mult(self):
idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="2M", name="idx")
result = idx.to_timestamp()
expected = DatetimeIndex(["2011-01-01", "NaT", "2011-02-01"], name="idx")
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how="E")
expected = DatetimeIndex(["2011-02-28", "NaT", "2011-03-31"], name="idx")
expected = expected + Timedelta(1, "D") - Timedelta(1, "ns")
tm.assert_index_equal(result, expected)
def test_to_timestamp_pi_combined(self):
idx = period_range(start="2011", periods=2, freq="1D1H", name="idx")
result = idx.to_timestamp()
expected = DatetimeIndex(["2011-01-01 00:00", "2011-01-02 01:00"], name="idx")
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how="E")
expected = DatetimeIndex(
["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx"
)
expected = expected + Timedelta(1, "s") - Timedelta(1, "ns")
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how="E", freq="H")
expected = DatetimeIndex(["2011-01-02 00:00", "2011-01-03 01:00"], name="idx")
expected = expected + Timedelta(1, "h") - Timedelta(1, "ns")
tm.assert_index_equal(result, expected)
def test_period_astype_to_timestamp(self):
pi = pd.PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M")
exp = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
tm.assert_index_equal(pi.astype("datetime64[ns]"), exp)
exp = pd.DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"])
exp = exp + Timedelta(1, "D") - Timedelta(1, "ns")
tm.assert_index_equal(pi.astype("datetime64[ns]", how="end"), exp)
exp = pd.DatetimeIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], tz="US/Eastern"
)
res = pi.astype("datetime64[ns, US/Eastern]")
tm.assert_index_equal(pi.astype("datetime64[ns, US/Eastern]"), exp)
exp = pd.DatetimeIndex(
["2011-01-31", "2011-02-28", "2011-03-31"], tz="US/Eastern"
)
exp = exp + Timedelta(1, "D") - Timedelta(1, "ns")
res = pi.astype("datetime64[ns, US/Eastern]", how="end")
tm.assert_index_equal(res, exp)
def test_to_timestamp_1703(self):
index = period_range("1/1/2012", periods=4, freq="D")
result = index.to_timestamp()
assert result[0] == Timestamp("1/1/2012")