970 lines
32 KiB
Python
970 lines
32 KiB
Python
from datetime import datetime, time
|
|
from itertools import product
|
|
|
|
import numpy as np
|
|
import pytest
|
|
import pytz
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
DataFrame,
|
|
DatetimeIndex,
|
|
Index,
|
|
MultiIndex,
|
|
Series,
|
|
Timestamp,
|
|
date_range,
|
|
period_range,
|
|
to_datetime,
|
|
)
|
|
from pandas.tests.frame.common import TestData
|
|
import pandas.util.testing as tm
|
|
from pandas.util.testing import (
|
|
assert_frame_equal,
|
|
assert_index_equal,
|
|
assert_series_equal,
|
|
)
|
|
|
|
import pandas.tseries.offsets as offsets
|
|
|
|
|
|
@pytest.fixture(params=product([True, False], [True, False]))
|
|
def close_open_fixture(request):
|
|
return request.param
|
|
|
|
|
|
class TestDataFrameTimeSeriesMethods(TestData):
|
|
def test_diff(self):
|
|
the_diff = self.tsframe.diff(1)
|
|
|
|
assert_series_equal(
|
|
the_diff["A"], self.tsframe["A"] - self.tsframe["A"].shift(1)
|
|
)
|
|
|
|
# int dtype
|
|
a = 10000000000000000
|
|
b = a + 1
|
|
s = Series([a, b])
|
|
|
|
rs = DataFrame({"s": s}).diff()
|
|
assert rs.s[1] == 1
|
|
|
|
# mixed numeric
|
|
tf = self.tsframe.astype("float32")
|
|
the_diff = tf.diff(1)
|
|
assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1))
|
|
|
|
# issue 10907
|
|
df = pd.DataFrame({"y": pd.Series([2]), "z": pd.Series([3])})
|
|
df.insert(0, "x", 1)
|
|
result = df.diff(axis=1)
|
|
expected = pd.DataFrame(
|
|
{"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)}
|
|
).astype("float64")
|
|
assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize("tz", [None, "UTC"])
|
|
def test_diff_datetime_axis0(self, tz):
|
|
# GH 18578
|
|
df = DataFrame(
|
|
{
|
|
0: date_range("2010", freq="D", periods=2, tz=tz),
|
|
1: date_range("2010", freq="D", periods=2, tz=tz),
|
|
}
|
|
)
|
|
|
|
result = df.diff(axis=0)
|
|
expected = DataFrame(
|
|
{
|
|
0: pd.TimedeltaIndex(["NaT", "1 days"]),
|
|
1: pd.TimedeltaIndex(["NaT", "1 days"]),
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize("tz", [None, "UTC"])
|
|
def test_diff_datetime_axis1(self, tz):
|
|
# GH 18578
|
|
df = DataFrame(
|
|
{
|
|
0: date_range("2010", freq="D", periods=2, tz=tz),
|
|
1: date_range("2010", freq="D", periods=2, tz=tz),
|
|
}
|
|
)
|
|
if tz is None:
|
|
result = df.diff(axis=1)
|
|
expected = DataFrame(
|
|
{
|
|
0: pd.TimedeltaIndex(["NaT", "NaT"]),
|
|
1: pd.TimedeltaIndex(["0 days", "0 days"]),
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
else:
|
|
with pytest.raises(NotImplementedError):
|
|
result = df.diff(axis=1)
|
|
|
|
def test_diff_timedelta(self):
|
|
# GH 4533
|
|
df = DataFrame(
|
|
dict(
|
|
time=[Timestamp("20130101 9:01"), Timestamp("20130101 9:02")],
|
|
value=[1.0, 2.0],
|
|
)
|
|
)
|
|
|
|
res = df.diff()
|
|
exp = DataFrame(
|
|
[[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"]
|
|
)
|
|
assert_frame_equal(res, exp)
|
|
|
|
def test_diff_mixed_dtype(self):
|
|
df = DataFrame(np.random.randn(5, 3))
|
|
df["A"] = np.array([1, 2, 3, 4, 5], dtype=object)
|
|
|
|
result = df.diff()
|
|
assert result[0].dtype == np.float64
|
|
|
|
def test_diff_neg_n(self):
|
|
rs = self.tsframe.diff(-1)
|
|
xp = self.tsframe - self.tsframe.shift(-1)
|
|
assert_frame_equal(rs, xp)
|
|
|
|
def test_diff_float_n(self):
|
|
rs = self.tsframe.diff(1.0)
|
|
xp = self.tsframe.diff(1)
|
|
assert_frame_equal(rs, xp)
|
|
|
|
def test_diff_axis(self):
|
|
# GH 9727
|
|
df = DataFrame([[1.0, 2.0], [3.0, 4.0]])
|
|
assert_frame_equal(df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]]))
|
|
assert_frame_equal(df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]))
|
|
|
|
def test_pct_change(self):
|
|
rs = self.tsframe.pct_change(fill_method=None)
|
|
assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)
|
|
|
|
rs = self.tsframe.pct_change(2)
|
|
filled = self.tsframe.fillna(method="pad")
|
|
assert_frame_equal(rs, filled / filled.shift(2) - 1)
|
|
|
|
rs = self.tsframe.pct_change(fill_method="bfill", limit=1)
|
|
filled = self.tsframe.fillna(method="bfill", limit=1)
|
|
assert_frame_equal(rs, filled / filled.shift(1) - 1)
|
|
|
|
rs = self.tsframe.pct_change(freq="5D")
|
|
filled = self.tsframe.fillna(method="pad")
|
|
assert_frame_equal(
|
|
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
|
|
)
|
|
|
|
def test_pct_change_shift_over_nas(self):
|
|
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
|
|
|
|
df = DataFrame({"a": s, "b": s})
|
|
|
|
chg = df.pct_change()
|
|
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
|
|
edf = DataFrame({"a": expected, "b": expected})
|
|
assert_frame_equal(chg, edf)
|
|
|
|
@pytest.mark.parametrize(
|
|
"freq, periods, fill_method, limit",
|
|
[
|
|
("5B", 5, None, None),
|
|
("3B", 3, None, None),
|
|
("3B", 3, "bfill", None),
|
|
("7B", 7, "pad", 1),
|
|
("7B", 7, "bfill", 3),
|
|
("14B", 14, None, None),
|
|
],
|
|
)
|
|
def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
|
|
# GH 7292
|
|
rs_freq = self.tsframe.pct_change(
|
|
freq=freq, fill_method=fill_method, limit=limit
|
|
)
|
|
rs_periods = self.tsframe.pct_change(
|
|
periods, fill_method=fill_method, limit=limit
|
|
)
|
|
assert_frame_equal(rs_freq, rs_periods)
|
|
|
|
empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns)
|
|
rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
|
|
rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
|
|
assert_frame_equal(rs_freq, rs_periods)
|
|
|
|
def test_frame_ctor_datetime64_column(self):
|
|
rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
|
|
dates = np.asarray(rng)
|
|
|
|
df = DataFrame({"A": np.random.randn(len(rng)), "B": dates})
|
|
assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]"))
|
|
|
|
def test_frame_append_datetime64_column(self):
|
|
rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
|
|
df = DataFrame(index=np.arange(len(rng)))
|
|
|
|
df["A"] = rng
|
|
assert np.issubdtype(df["A"].dtype, np.dtype("M8[ns]"))
|
|
|
|
def test_frame_datetime64_pre1900_repr(self):
|
|
df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")})
|
|
# it works!
|
|
repr(df)
|
|
|
|
def test_frame_append_datetime64_col_other_units(self):
|
|
n = 100
|
|
|
|
units = ["h", "m", "s", "ms", "D", "M", "Y"]
|
|
|
|
ns_dtype = np.dtype("M8[ns]")
|
|
|
|
for unit in units:
|
|
dtype = np.dtype("M8[{unit}]".format(unit=unit))
|
|
vals = np.arange(n, dtype=np.int64).view(dtype)
|
|
|
|
df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
|
|
df[unit] = vals
|
|
|
|
ex_vals = to_datetime(vals.astype("O")).values
|
|
|
|
assert df[unit].dtype == ns_dtype
|
|
assert (df[unit].values == ex_vals).all()
|
|
|
|
# Test insertion into existing datetime64 column
|
|
df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
|
|
df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype)
|
|
|
|
for unit in units:
|
|
dtype = np.dtype("M8[{unit}]".format(unit=unit))
|
|
vals = np.arange(n, dtype=np.int64).view(dtype)
|
|
|
|
tmp = df.copy()
|
|
|
|
tmp["dates"] = vals
|
|
ex_vals = to_datetime(vals.astype("O")).values
|
|
|
|
assert (tmp["dates"].values == ex_vals).all()
|
|
|
|
def test_shift(self):
|
|
# naive shift
|
|
shiftedFrame = self.tsframe.shift(5)
|
|
tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
|
|
|
|
shiftedSeries = self.tsframe["A"].shift(5)
|
|
assert_series_equal(shiftedFrame["A"], shiftedSeries)
|
|
|
|
shiftedFrame = self.tsframe.shift(-5)
|
|
tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
|
|
|
|
shiftedSeries = self.tsframe["A"].shift(-5)
|
|
assert_series_equal(shiftedFrame["A"], shiftedSeries)
|
|
|
|
# shift by 0
|
|
unshifted = self.tsframe.shift(0)
|
|
assert_frame_equal(unshifted, self.tsframe)
|
|
|
|
# shift by DateOffset
|
|
shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay())
|
|
assert len(shiftedFrame) == len(self.tsframe)
|
|
|
|
shiftedFrame2 = self.tsframe.shift(5, freq="B")
|
|
assert_frame_equal(shiftedFrame, shiftedFrame2)
|
|
|
|
d = self.tsframe.index[0]
|
|
shifted_d = d + offsets.BDay(5)
|
|
assert_series_equal(
|
|
self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False
|
|
)
|
|
|
|
# shift int frame
|
|
int_shifted = self.intframe.shift(1) # noqa
|
|
|
|
# Shifting with PeriodIndex
|
|
ps = tm.makePeriodFrame()
|
|
shifted = ps.shift(1)
|
|
unshifted = shifted.shift(-1)
|
|
tm.assert_index_equal(shifted.index, ps.index)
|
|
tm.assert_index_equal(unshifted.index, ps.index)
|
|
tm.assert_numpy_array_equal(
|
|
unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values
|
|
)
|
|
|
|
shifted2 = ps.shift(1, "B")
|
|
shifted3 = ps.shift(1, offsets.BDay())
|
|
assert_frame_equal(shifted2, shifted3)
|
|
assert_frame_equal(ps, shifted2.shift(-1, "B"))
|
|
|
|
msg = "does not match PeriodIndex freq"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ps.shift(freq="D")
|
|
|
|
# shift other axis
|
|
# GH 6371
|
|
df = DataFrame(np.random.rand(10, 5))
|
|
expected = pd.concat(
|
|
[DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
|
|
ignore_index=True,
|
|
axis=1,
|
|
)
|
|
result = df.shift(1, axis=1)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# shift named axis
|
|
df = DataFrame(np.random.rand(10, 5))
|
|
expected = pd.concat(
|
|
[DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]],
|
|
ignore_index=True,
|
|
axis=1,
|
|
)
|
|
result = df.shift(1, axis="columns")
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_shift_bool(self):
|
|
df = DataFrame({"high": [True, False], "low": [False, False]})
|
|
rs = df.shift(1)
|
|
xp = DataFrame(
|
|
np.array([[np.nan, np.nan], [True, False]], dtype=object),
|
|
columns=["high", "low"],
|
|
)
|
|
assert_frame_equal(rs, xp)
|
|
|
|
def test_shift_categorical(self):
|
|
# GH 9416
|
|
s1 = pd.Series(["a", "b", "c"], dtype="category")
|
|
s2 = pd.Series(["A", "B", "C"], dtype="category")
|
|
df = DataFrame({"one": s1, "two": s2})
|
|
rs = df.shift(1)
|
|
xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)})
|
|
assert_frame_equal(rs, xp)
|
|
|
|
def test_shift_fill_value(self):
|
|
# GH #24128
|
|
df = DataFrame(
|
|
[1, 2, 3, 4, 5], index=date_range("1/1/2000", periods=5, freq="H")
|
|
)
|
|
exp = DataFrame(
|
|
[0, 1, 2, 3, 4], index=date_range("1/1/2000", periods=5, freq="H")
|
|
)
|
|
result = df.shift(1, fill_value=0)
|
|
assert_frame_equal(result, exp)
|
|
|
|
exp = DataFrame(
|
|
[0, 0, 1, 2, 3], index=date_range("1/1/2000", periods=5, freq="H")
|
|
)
|
|
result = df.shift(2, fill_value=0)
|
|
assert_frame_equal(result, exp)
|
|
|
|
def test_shift_empty(self):
|
|
# Regression test for #8019
|
|
df = DataFrame({"foo": []})
|
|
rs = df.shift(-1)
|
|
|
|
assert_frame_equal(df, rs)
|
|
|
|
def test_shift_duplicate_columns(self):
|
|
# GH 9092; verify that position-based shifting works
|
|
# in the presence of duplicate columns
|
|
column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]]
|
|
data = np.random.randn(20, 5)
|
|
|
|
shifted = []
|
|
for columns in column_lists:
|
|
df = pd.DataFrame(data.copy(), columns=columns)
|
|
for s in range(5):
|
|
df.iloc[:, s] = df.iloc[:, s].shift(s + 1)
|
|
df.columns = range(5)
|
|
shifted.append(df)
|
|
|
|
# sanity check the base case
|
|
nulls = shifted[0].isna().sum()
|
|
assert_series_equal(nulls, Series(range(1, 6), dtype="int64"))
|
|
|
|
# check all answers are the same
|
|
assert_frame_equal(shifted[0], shifted[1])
|
|
assert_frame_equal(shifted[0], shifted[2])
|
|
|
|
def test_tshift(self):
|
|
# PeriodIndex
|
|
ps = tm.makePeriodFrame()
|
|
shifted = ps.tshift(1)
|
|
unshifted = shifted.tshift(-1)
|
|
|
|
assert_frame_equal(unshifted, ps)
|
|
|
|
shifted2 = ps.tshift(freq="B")
|
|
assert_frame_equal(shifted, shifted2)
|
|
|
|
shifted3 = ps.tshift(freq=offsets.BDay())
|
|
assert_frame_equal(shifted, shifted3)
|
|
|
|
with pytest.raises(ValueError, match="does not match"):
|
|
ps.tshift(freq="M")
|
|
|
|
# DatetimeIndex
|
|
shifted = self.tsframe.tshift(1)
|
|
unshifted = shifted.tshift(-1)
|
|
|
|
assert_frame_equal(self.tsframe, unshifted)
|
|
|
|
shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq)
|
|
assert_frame_equal(shifted, shifted2)
|
|
|
|
inferred_ts = DataFrame(
|
|
self.tsframe.values,
|
|
Index(np.asarray(self.tsframe.index)),
|
|
columns=self.tsframe.columns,
|
|
)
|
|
shifted = inferred_ts.tshift(1)
|
|
unshifted = shifted.tshift(-1)
|
|
assert_frame_equal(shifted, self.tsframe.tshift(1))
|
|
assert_frame_equal(unshifted, inferred_ts)
|
|
|
|
no_freq = self.tsframe.iloc[[0, 5, 7], :]
|
|
msg = "Freq was not given and was not set in the index"
|
|
with pytest.raises(ValueError, match=msg):
|
|
no_freq.tshift()
|
|
|
|
def test_truncate(self):
|
|
ts = self.tsframe[::3]
|
|
|
|
start, end = self.tsframe.index[3], self.tsframe.index[6]
|
|
|
|
start_missing = self.tsframe.index[2]
|
|
end_missing = self.tsframe.index[7]
|
|
|
|
# neither specified
|
|
truncated = ts.truncate()
|
|
assert_frame_equal(truncated, ts)
|
|
|
|
# both specified
|
|
expected = ts[1:3]
|
|
|
|
truncated = ts.truncate(start, end)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
truncated = ts.truncate(start_missing, end_missing)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
# start specified
|
|
expected = ts[1:]
|
|
|
|
truncated = ts.truncate(before=start)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
truncated = ts.truncate(before=start_missing)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
# end specified
|
|
expected = ts[:3]
|
|
|
|
truncated = ts.truncate(after=end)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
truncated = ts.truncate(after=end_missing)
|
|
assert_frame_equal(truncated, expected)
|
|
|
|
msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.truncate(
|
|
before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq
|
|
)
|
|
|
|
def test_truncate_copy(self):
|
|
index = self.tsframe.index
|
|
truncated = self.tsframe.truncate(index[5], index[10])
|
|
truncated.values[:] = 5.0
|
|
assert not (self.tsframe.values[5:11] == 5).any()
|
|
|
|
def test_truncate_nonsortedindex(self):
|
|
# GH 17935
|
|
|
|
df = pd.DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0])
|
|
msg = "truncate requires a sorted index"
|
|
with pytest.raises(ValueError, match=msg):
|
|
df.truncate(before=3, after=9)
|
|
|
|
rng = pd.date_range("2011-01-01", "2012-01-01", freq="W")
|
|
ts = pd.DataFrame(
|
|
{"A": np.random.randn(len(rng)), "B": np.random.randn(len(rng))}, index=rng
|
|
)
|
|
msg = "truncate requires a sorted index"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values("A", ascending=False).truncate(
|
|
before="2011-11", after="2011-12"
|
|
)
|
|
|
|
df = pd.DataFrame(
|
|
{
|
|
3: np.random.randn(5),
|
|
20: np.random.randn(5),
|
|
2: np.random.randn(5),
|
|
0: np.random.randn(5),
|
|
},
|
|
columns=[3, 20, 2, 0],
|
|
)
|
|
msg = "truncate requires a sorted index"
|
|
with pytest.raises(ValueError, match=msg):
|
|
df.truncate(before=2, after=20, axis=1)
|
|
|
|
def test_asfreq(self):
|
|
offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd())
|
|
rule_monthly = self.tsframe.asfreq("BM")
|
|
|
|
tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"])
|
|
|
|
filled = rule_monthly.asfreq("B", method="pad") # noqa
|
|
# TODO: actually check that this worked.
|
|
|
|
# don't forget!
|
|
filled_dep = rule_monthly.asfreq("B", method="pad") # noqa
|
|
|
|
# test does not blow up on length-0 DataFrame
|
|
zero_length = self.tsframe.reindex([])
|
|
result = zero_length.asfreq("BM")
|
|
assert result is not zero_length
|
|
|
|
def test_asfreq_datetimeindex(self):
|
|
df = DataFrame(
|
|
{"A": [1, 2, 3]},
|
|
index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)],
|
|
)
|
|
df = df.asfreq("B")
|
|
assert isinstance(df.index, DatetimeIndex)
|
|
|
|
ts = df["A"].asfreq("B")
|
|
assert isinstance(ts.index, DatetimeIndex)
|
|
|
|
def test_asfreq_fillvalue(self):
|
|
# test for fill value during upsampling, related to issue 3715
|
|
|
|
# setup
|
|
rng = pd.date_range("1/1/2016", periods=10, freq="2S")
|
|
ts = pd.Series(np.arange(len(rng)), index=rng)
|
|
df = pd.DataFrame({"one": ts})
|
|
|
|
# insert pre-existing missing value
|
|
df.loc["2016-01-01 00:00:08", "one"] = None
|
|
|
|
actual_df = df.asfreq(freq="1S", fill_value=9.0)
|
|
expected_df = df.asfreq(freq="1S").fillna(9.0)
|
|
expected_df.loc["2016-01-01 00:00:08", "one"] = None
|
|
assert_frame_equal(expected_df, actual_df)
|
|
|
|
expected_series = ts.asfreq(freq="1S").fillna(9.0)
|
|
actual_series = ts.asfreq(freq="1S", fill_value=9.0)
|
|
assert_series_equal(expected_series, actual_series)
|
|
|
|
@pytest.mark.parametrize(
|
|
"data,idx,expected_first,expected_last",
|
|
[
|
|
({"A": [1, 2, 3]}, [1, 1, 2], 1, 2),
|
|
({"A": [1, 2, 3]}, [1, 2, 2], 1, 2),
|
|
({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"),
|
|
({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2),
|
|
({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
|
|
({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2),
|
|
],
|
|
)
|
|
def test_first_last_valid(self, data, idx, expected_first, expected_last):
|
|
N = len(self.frame.index)
|
|
mat = np.random.randn(N)
|
|
mat[:5] = np.nan
|
|
mat[-5:] = np.nan
|
|
|
|
frame = DataFrame({"foo": mat}, index=self.frame.index)
|
|
index = frame.first_valid_index()
|
|
|
|
assert index == frame.index[5]
|
|
|
|
index = frame.last_valid_index()
|
|
assert index == frame.index[-6]
|
|
|
|
# GH12800
|
|
empty = DataFrame()
|
|
assert empty.last_valid_index() is None
|
|
assert empty.first_valid_index() is None
|
|
|
|
# GH17400: no valid entries
|
|
frame[:] = np.nan
|
|
assert frame.last_valid_index() is None
|
|
assert frame.first_valid_index() is None
|
|
|
|
# GH20499: its preserves freq with holes
|
|
frame.index = date_range("20110101", periods=N, freq="B")
|
|
frame.iloc[1] = 1
|
|
frame.iloc[-2] = 1
|
|
assert frame.first_valid_index() == frame.index[1]
|
|
assert frame.last_valid_index() == frame.index[-2]
|
|
assert frame.first_valid_index().freq == frame.index.freq
|
|
assert frame.last_valid_index().freq == frame.index.freq
|
|
|
|
# GH 21441
|
|
df = DataFrame(data, index=idx)
|
|
assert expected_first == df.first_valid_index()
|
|
assert expected_last == df.last_valid_index()
|
|
|
|
def test_first_subset(self):
|
|
ts = tm.makeTimeDataFrame(freq="12h")
|
|
result = ts.first("10d")
|
|
assert len(result) == 20
|
|
|
|
ts = tm.makeTimeDataFrame(freq="D")
|
|
result = ts.first("10d")
|
|
assert len(result) == 10
|
|
|
|
result = ts.first("3M")
|
|
expected = ts[:"3/31/2000"]
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = ts.first("21D")
|
|
expected = ts[:21]
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = ts[:0].first("3M")
|
|
assert_frame_equal(result, ts[:0])
|
|
|
|
def test_first_raises(self):
|
|
# GH20725
|
|
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
|
|
with pytest.raises(TypeError): # index is not a DatetimeIndex
|
|
df.first("1D")
|
|
|
|
def test_last_subset(self):
|
|
ts = tm.makeTimeDataFrame(freq="12h")
|
|
result = ts.last("10d")
|
|
assert len(result) == 20
|
|
|
|
ts = tm.makeTimeDataFrame(nper=30, freq="D")
|
|
result = ts.last("10d")
|
|
assert len(result) == 10
|
|
|
|
result = ts.last("21D")
|
|
expected = ts["2000-01-10":]
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = ts.last("21D")
|
|
expected = ts[-21:]
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = ts[:0].last("3M")
|
|
assert_frame_equal(result, ts[:0])
|
|
|
|
def test_last_raises(self):
|
|
# GH20725
|
|
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
|
|
with pytest.raises(TypeError): # index is not a DatetimeIndex
|
|
df.last("1D")
|
|
|
|
def test_at_time(self):
|
|
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
|
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
|
rs = ts.at_time(rng[1])
|
|
assert (rs.index.hour == rng[1].hour).all()
|
|
assert (rs.index.minute == rng[1].minute).all()
|
|
assert (rs.index.second == rng[1].second).all()
|
|
|
|
result = ts.at_time("9:30")
|
|
expected = ts.at_time(time(9, 30))
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = ts.loc[time(9, 30)]
|
|
expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
|
|
|
|
assert_frame_equal(result, expected)
|
|
|
|
# midnight, everything
|
|
rng = date_range("1/1/2000", "1/31/2000")
|
|
ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
|
|
|
|
result = ts.at_time(time(0, 0))
|
|
assert_frame_equal(result, ts)
|
|
|
|
# time doesn't exist
|
|
rng = date_range("1/1/2012", freq="23Min", periods=384)
|
|
ts = DataFrame(np.random.randn(len(rng), 2), rng)
|
|
rs = ts.at_time("16:00")
|
|
assert len(rs) == 0
|
|
|
|
@pytest.mark.parametrize(
|
|
"hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)]
|
|
)
|
|
def test_at_time_errors(self, hour):
|
|
# GH 24043
|
|
dti = pd.date_range("2018", periods=3, freq="H")
|
|
df = pd.DataFrame(list(range(len(dti))), index=dti)
|
|
if getattr(hour, "tzinfo", None) is None:
|
|
result = df.at_time(hour)
|
|
expected = df.iloc[1:2]
|
|
tm.assert_frame_equal(result, expected)
|
|
else:
|
|
with pytest.raises(ValueError, match="Index must be timezone"):
|
|
df.at_time(hour)
|
|
|
|
def test_at_time_tz(self):
|
|
# GH 24043
|
|
dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific")
|
|
df = pd.DataFrame(list(range(len(dti))), index=dti)
|
|
result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern")))
|
|
expected = df.iloc[1:2]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_at_time_raises(self):
|
|
# GH20725
|
|
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
|
|
with pytest.raises(TypeError): # index is not a DatetimeIndex
|
|
df.at_time("00:00")
|
|
|
|
@pytest.mark.parametrize("axis", ["index", "columns", 0, 1])
|
|
def test_at_time_axis(self, axis):
|
|
# issue 8839
|
|
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
|
ts = DataFrame(np.random.randn(len(rng), len(rng)))
|
|
ts.index, ts.columns = rng, rng
|
|
|
|
indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
|
|
|
|
if axis in ["index", 0]:
|
|
expected = ts.loc[indices, :]
|
|
elif axis in ["columns", 1]:
|
|
expected = ts.loc[:, indices]
|
|
|
|
result = ts.at_time("9:30", axis=axis)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_between_time(self, close_open_fixture):
|
|
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
|
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
|
stime = time(0, 0)
|
|
etime = time(1, 0)
|
|
inc_start, inc_end = close_open_fixture
|
|
|
|
filtered = ts.between_time(stime, etime, inc_start, inc_end)
|
|
exp_len = 13 * 4 + 1
|
|
if not inc_start:
|
|
exp_len -= 5
|
|
if not inc_end:
|
|
exp_len -= 4
|
|
|
|
assert len(filtered) == exp_len
|
|
for rs in filtered.index:
|
|
t = rs.time()
|
|
if inc_start:
|
|
assert t >= stime
|
|
else:
|
|
assert t > stime
|
|
|
|
if inc_end:
|
|
assert t <= etime
|
|
else:
|
|
assert t < etime
|
|
|
|
result = ts.between_time("00:00", "01:00")
|
|
expected = ts.between_time(stime, etime)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# across midnight
|
|
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
|
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
|
stime = time(22, 0)
|
|
etime = time(9, 0)
|
|
|
|
filtered = ts.between_time(stime, etime, inc_start, inc_end)
|
|
exp_len = (12 * 11 + 1) * 4 + 1
|
|
if not inc_start:
|
|
exp_len -= 4
|
|
if not inc_end:
|
|
exp_len -= 4
|
|
|
|
assert len(filtered) == exp_len
|
|
for rs in filtered.index:
|
|
t = rs.time()
|
|
if inc_start:
|
|
assert (t >= stime) or (t <= etime)
|
|
else:
|
|
assert (t > stime) or (t <= etime)
|
|
|
|
if inc_end:
|
|
assert (t <= etime) or (t >= stime)
|
|
else:
|
|
assert (t < etime) or (t >= stime)
|
|
|
|
def test_between_time_raises(self):
|
|
# GH20725
|
|
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
|
|
with pytest.raises(TypeError): # index is not a DatetimeIndex
|
|
df.between_time(start_time="00:00", end_time="12:00")
|
|
|
|
def test_between_time_axis(self, axis):
|
|
# issue 8839
|
|
rng = date_range("1/1/2000", periods=100, freq="10min")
|
|
ts = DataFrame(np.random.randn(len(rng), len(rng)))
|
|
stime, etime = ("08:00:00", "09:00:00")
|
|
exp_len = 7
|
|
|
|
if axis in ["index", 0]:
|
|
ts.index = rng
|
|
assert len(ts.between_time(stime, etime)) == exp_len
|
|
assert len(ts.between_time(stime, etime, axis=0)) == exp_len
|
|
|
|
if axis in ["columns", 1]:
|
|
ts.columns = rng
|
|
selected = ts.between_time(stime, etime, axis=1).columns
|
|
assert len(selected) == exp_len
|
|
|
|
def test_between_time_axis_raises(self, axis):
|
|
# issue 8839
|
|
rng = date_range("1/1/2000", periods=100, freq="10min")
|
|
mask = np.arange(0, len(rng))
|
|
rand_data = np.random.randn(len(rng), len(rng))
|
|
ts = DataFrame(rand_data, index=rng, columns=rng)
|
|
stime, etime = ("08:00:00", "09:00:00")
|
|
|
|
msg = "Index must be DatetimeIndex"
|
|
if axis in ["columns", 1]:
|
|
ts.index = mask
|
|
with pytest.raises(TypeError, match=msg):
|
|
ts.between_time(stime, etime)
|
|
with pytest.raises(TypeError, match=msg):
|
|
ts.between_time(stime, etime, axis=0)
|
|
|
|
if axis in ["index", 0]:
|
|
ts.columns = mask
|
|
with pytest.raises(TypeError, match=msg):
|
|
ts.between_time(stime, etime, axis=1)
|
|
|
|
def test_operation_on_NaT(self):
|
|
# Both NaT and Timestamp are in DataFrame.
|
|
df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]})
|
|
|
|
res = df.min()
|
|
exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
res = df.max()
|
|
exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
# GH12941, only NaTs are in DataFrame.
|
|
df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]})
|
|
|
|
res = df.min()
|
|
exp = pd.Series([pd.NaT], index=["foo"])
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
res = df.max()
|
|
exp = pd.Series([pd.NaT], index=["foo"])
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
def test_datetime_assignment_with_NaT_and_diff_time_units(self):
|
|
# GH 7492
|
|
data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
|
|
result = pd.Series(data_ns).to_frame()
|
|
result["new"] = data_ns
|
|
expected = pd.DataFrame(
|
|
{0: [1, None], "new": [1, None]}, dtype="datetime64[ns]"
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
# OutOfBoundsDatetime error shouldn't occur
|
|
data_s = np.array([1, "nat"], dtype="datetime64[s]")
|
|
result["new"] = data_s
|
|
expected = pd.DataFrame(
|
|
{0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]"
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_frame_to_period(self):
|
|
K = 5
|
|
|
|
dr = date_range("1/1/2000", "1/1/2001")
|
|
pr = period_range("1/1/2000", "1/1/2001")
|
|
df = DataFrame(np.random.randn(len(dr), K), index=dr)
|
|
df["mix"] = "a"
|
|
|
|
pts = df.to_period()
|
|
exp = df.copy()
|
|
exp.index = pr
|
|
assert_frame_equal(pts, exp)
|
|
|
|
pts = df.to_period("M")
|
|
tm.assert_index_equal(pts.index, exp.index.asfreq("M"))
|
|
|
|
df = df.T
|
|
pts = df.to_period(axis=1)
|
|
exp = df.copy()
|
|
exp.columns = pr
|
|
assert_frame_equal(pts, exp)
|
|
|
|
pts = df.to_period("M", axis=1)
|
|
tm.assert_index_equal(pts.columns, exp.columns.asfreq("M"))
|
|
|
|
msg = "No axis named 2 for object type <class 'pandas.core.frame.DataFrame'>"
|
|
with pytest.raises(ValueError, match=msg):
|
|
df.to_period(axis=2)
|
|
|
|
@pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"])
|
|
def test_tz_convert_and_localize(self, fn):
|
|
l0 = date_range("20140701", periods=5, freq="D")
|
|
l1 = date_range("20140701", periods=5, freq="D")
|
|
|
|
int_idx = Index(range(5))
|
|
|
|
if fn == "tz_convert":
|
|
l0 = l0.tz_localize("UTC")
|
|
l1 = l1.tz_localize("UTC")
|
|
|
|
for idx in [l0, l1]:
|
|
|
|
l0_expected = getattr(idx, fn)("US/Pacific")
|
|
l1_expected = getattr(idx, fn)("US/Pacific")
|
|
|
|
df1 = DataFrame(np.ones(5), index=l0)
|
|
df1 = getattr(df1, fn)("US/Pacific")
|
|
assert_index_equal(df1.index, l0_expected)
|
|
|
|
# MultiIndex
|
|
# GH7846
|
|
df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
|
|
|
|
df3 = getattr(df2, fn)("US/Pacific", level=0)
|
|
assert not df3.index.levels[0].equals(l0)
|
|
assert_index_equal(df3.index.levels[0], l0_expected)
|
|
assert_index_equal(df3.index.levels[1], l1)
|
|
assert not df3.index.levels[1].equals(l1_expected)
|
|
|
|
df3 = getattr(df2, fn)("US/Pacific", level=1)
|
|
assert_index_equal(df3.index.levels[0], l0)
|
|
assert not df3.index.levels[0].equals(l0_expected)
|
|
assert_index_equal(df3.index.levels[1], l1_expected)
|
|
assert not df3.index.levels[1].equals(l1)
|
|
|
|
df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
|
|
|
|
# TODO: untested
|
|
df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa
|
|
|
|
assert_index_equal(df3.index.levels[0], l0)
|
|
assert not df3.index.levels[0].equals(l0_expected)
|
|
assert_index_equal(df3.index.levels[1], l1_expected)
|
|
assert not df3.index.levels[1].equals(l1)
|
|
|
|
# Bad Inputs
|
|
|
|
# Not DatetimeIndex / PeriodIndex
|
|
with pytest.raises(TypeError, match="DatetimeIndex"):
|
|
df = DataFrame(index=int_idx)
|
|
df = getattr(df, fn)("US/Pacific")
|
|
|
|
# Not DatetimeIndex / PeriodIndex
|
|
with pytest.raises(TypeError, match="DatetimeIndex"):
|
|
df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0]))
|
|
df = getattr(df, fn)("US/Pacific", level=0)
|
|
|
|
# Invalid level
|
|
with pytest.raises(ValueError, match="not valid"):
|
|
df = DataFrame(index=l0)
|
|
df = getattr(df, fn)("US/Pacific", level=1)
|