8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,158 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = [
|
||||
"min",
|
||||
"max",
|
||||
"first",
|
||||
"last",
|
||||
"sum",
|
||||
"mean",
|
||||
"sem",
|
||||
"median",
|
||||
"prod",
|
||||
"var",
|
||||
"std",
|
||||
"ohlc",
|
||||
"quantile",
|
||||
]
|
||||
upsample_methods = ["count", "size"]
|
||||
series_methods = ["nunique"]
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=upsample_methods)
|
||||
def upsample_method(request):
|
||||
"""Fixture for parametrization of Grouper upsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_date_range_series():
|
||||
"""
|
||||
Series with date range index and random data for test purposes.
|
||||
"""
|
||||
|
||||
def _simple_date_range_series(start, end, freq="D"):
|
||||
rng = date_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
return _simple_date_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_period_range_series():
|
||||
"""
|
||||
Series with period range index and random data for test purposes.
|
||||
"""
|
||||
|
||||
def _simple_period_range_series(start, end, freq="D"):
|
||||
rng = period_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
return _simple_period_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_start():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_end():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_freq():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return "D"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_name():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
|
||||
"""Fixture for parametrization of date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _static_values(index):
|
||||
"""Fixture for parametrization of values used in parametrization of
|
||||
Series and DataFrames with date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return np.arange(len(index))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
"""Fixture for parametrization of Series name for Series used with
|
||||
date_range, period_range and timedelta_range indexes"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series(index, _series_name, _static_values):
|
||||
"""Fixture for parametrization of Series with date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return Series(_static_values, index=index, name=_series_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series(series):
|
||||
"""Fixture for parametrization of empty Series with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
return series[:0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(index, _series_name, _static_values):
|
||||
"""Fixture for parametrization of DataFrame with date_range, period_range
|
||||
and timedelta_range indexes"""
|
||||
# _series_name is intentionally unused
|
||||
return DataFrame({"value": _static_values}, index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame(series):
|
||||
"""Fixture for parametrization of empty DataFrame with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
index = series.index[:0]
|
||||
return DataFrame(index=index)
|
||||
|
||||
|
||||
@pytest.fixture(params=[Series, DataFrame])
|
||||
def series_and_frame(request, series, frame):
|
||||
"""Fixture for parametrization of Series and DataFrame with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
if request.param == Series:
|
||||
return series
|
||||
if request.param == DataFrame:
|
||||
return frame
|
@@ -0,0 +1,228 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import PeriodIndex, period_range
|
||||
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_index_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
# a fixture value can be overridden by the test parameter value. Note that the
|
||||
# value of the fixture can be overridden this way even if the test doesn't use
|
||||
# it directly (doesn't mention it in the function prototype).
|
||||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
|
||||
# in this module we override the fixture values defined in conftest.py
|
||||
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
||||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
||||
|
||||
all_ts = pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end",
|
||||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
""" return the _index_factory created using the args, kwargs """
|
||||
return _index_factory(*args, **kwargs)
|
||||
|
||||
return _create_index
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2D", "1H"])
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq(series_and_frame, freq, create_index):
|
||||
obj = series_and_frame
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq_fill_value(series, create_index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
|
||||
result = s.resample("1H").asfreq()
|
||||
new_index = create_index(s.index[0], s.index[-1], freq="1H")
|
||||
expected = s.reindex(new_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame("value")
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample("1H").asfreq(fill_value=4.0)
|
||||
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_interpolate(frame):
|
||||
# # 12925
|
||||
df = frame
|
||||
assert_frame_equal(
|
||||
df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
|
||||
)
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
|
||||
" but got an instance of 'Index'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
xp.resample("A").mean()
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_empty_series(freq, empty_series, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
if resample_method == "ohlc":
|
||||
pytest.skip("need to test for ohlc from GH13083")
|
||||
|
||||
s = empty_series
|
||||
result = getattr(s.resample(freq), resample_method)()
|
||||
|
||||
expected = s.copy()
|
||||
if isinstance(s.index, PeriodIndex):
|
||||
expected.index = s.index.asfreq(freq=freq)
|
||||
else:
|
||||
expected.index = s.index._shallow_copy(freq=freq)
|
||||
assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
|
||||
# GH13212
|
||||
df = empty_frame
|
||||
# count retains dimensions too
|
||||
result = getattr(df.resample(freq), resample_method)()
|
||||
if resample_method != "size":
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([])
|
||||
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
expected.index = df.index.asfreq(freq=freq)
|
||||
else:
|
||||
expected.index = df.index._shallow_copy(freq=freq)
|
||||
assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
assert_almost_equal(result, expected, check_dtype=False)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
|
||||
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
empty_series = Series([], index, dtype)
|
||||
try:
|
||||
getattr(empty_series.resample("d"), resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object)
|
||||
pass
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_loffset_arg_type(frame, create_index):
|
||||
# GH 13218, 15002
|
||||
df = frame
|
||||
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
|
||||
expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D")
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
if isinstance(expected_index, PeriodIndex):
|
||||
expected_index = expected_index.to_timestamp()
|
||||
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({"value": expected_means}, index=expected_index)
|
||||
|
||||
for arg in ["mean", {"value": "mean"}, ["mean"]]:
|
||||
|
||||
result_agg = df.resample("2D", loffset="2H").agg(arg)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result_how = df.resample("2D", how=arg, loffset="2H")
|
||||
|
||||
if isinstance(arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
|
||||
|
||||
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
|
||||
if isinstance(expected.index, TimedeltaIndex):
|
||||
msg = "DataFrame are different"
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
assert_frame_equal(result_agg, expected)
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
assert_frame_equal(result_how, expected)
|
||||
else:
|
||||
assert_frame_equal(result_agg, expected)
|
||||
assert_frame_equal(result_how, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_apply_to_empty_series(empty_series):
|
||||
# GH 14313
|
||||
s = empty_series
|
||||
for freq in ["M", "D", "H"]:
|
||||
result = s.resample(freq).apply(lambda x: 1)
|
||||
expected = s.resample(freq).apply(np.sum)
|
||||
|
||||
assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resampler_is_iterable(series):
|
||||
# GH 15314
|
||||
freq = "H"
|
||||
tg = Grouper(freq=freq, convention="start")
|
||||
grouped = series.groupby(tg)
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_quantile(series):
|
||||
# GH 15023
|
||||
s = series
|
||||
q = 0.75
|
||||
freq = "H"
|
||||
result = s.resample(freq).quantile(q)
|
||||
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
|
||||
tm.assert_series_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,880 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import Period, PeriodIndex, period_range
|
||||
from pandas.core.resample import _get_period_range_edges
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
import pandas.tseries.offsets as offsets
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _index_factory():
|
||||
return period_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
return "pi"
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
@pytest.mark.parametrize("freq", ["2D", "1H", "2H"])
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
def test_asfreq(self, series_and_frame, freq, kind):
|
||||
# GH 12884, 15944
|
||||
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
|
||||
|
||||
obj = series_and_frame
|
||||
if kind == "timestamp":
|
||||
expected = obj.to_timestamp().resample(freq).asfreq()
|
||||
else:
|
||||
start = obj.index[0].to_timestamp(how="start")
|
||||
end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
|
||||
new_index = date_range(start=start, end=end, freq=freq, closed="left")
|
||||
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
|
||||
result = obj.resample(freq, kind=kind).asfreq()
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
def test_asfreq_fill_value(self, series):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
new_index = date_range(
|
||||
s.index[0].to_timestamp(how="start"),
|
||||
(s.index[-1]).to_timestamp(how="start"),
|
||||
freq="1H",
|
||||
)
|
||||
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
|
||||
result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame("value")
|
||||
new_index = date_range(
|
||||
frame.index[0].to_timestamp(how="start"),
|
||||
(frame.index[-1]).to_timestamp(how="start"),
|
||||
freq="1H",
|
||||
)
|
||||
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
|
||||
result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"])
|
||||
@pytest.mark.parametrize("kind", [None, "period", "timestamp"])
|
||||
@pytest.mark.parametrize("kwargs", [dict(on="date"), dict(level="d")])
|
||||
def test_selection(self, index, freq, kind, kwargs):
|
||||
# This is a bug, these should be implemented
|
||||
# GH 14008
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame(
|
||||
{"date": index, "a": rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
|
||||
)
|
||||
msg = (
|
||||
"Resampling from level= or on= selection with a PeriodIndex is"
|
||||
r" not currently supported, use \.set_index\(\.\.\.\) to"
|
||||
" explicitly set index"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
df.resample(freq, kind=kind, **kwargs)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
@pytest.mark.parametrize("meth", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize("conv", ["start", "end"])
|
||||
@pytest.mark.parametrize("targ", ["D", "B", "M"])
|
||||
def test_annual_upsample_cases(
|
||||
self, targ, conv, meth, month, simple_period_range_series
|
||||
):
|
||||
ts = simple_period_range_series(
|
||||
"1/1/1990", "12/31/1991", freq="A-{month}".format(month=month)
|
||||
)
|
||||
|
||||
result = getattr(ts.resample(targ, convention=conv), meth)()
|
||||
expected = result.to_timestamp(targ, how=conv)
|
||||
expected = expected.asfreq(targ, meth).to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_basic_downsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
|
||||
result = ts.resample("a-dec").mean()
|
||||
|
||||
expected = ts.groupby(ts.index.year).mean()
|
||||
expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# this is ok
|
||||
assert_series_equal(ts.resample("a-dec").mean(), result)
|
||||
assert_series_equal(ts.resample("a").mean(), result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rule,expected_error_msg",
|
||||
[
|
||||
("a-dec", "<YearEnd: month=12>"),
|
||||
("q-mar", "<QuarterEnd: startingMonth=3>"),
|
||||
("M", "<MonthEnd>"),
|
||||
("w-thu", "<Week: weekday=3>"),
|
||||
],
|
||||
)
|
||||
def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
|
||||
# These are incompatible period rules for resampling
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
|
||||
msg = (
|
||||
"Frequency <Week: weekday=2> cannot be resampled to {}, as they"
|
||||
" are not sub or super periods"
|
||||
).format(expected_error_msg)
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts.resample(rule).mean()
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D"])
|
||||
def test_basic_upsample(self, freq, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
|
||||
result = ts.resample("a-dec").mean()
|
||||
|
||||
resampled = result.resample(freq, convention="end").ffill()
|
||||
expected = result.to_timestamp(freq, how="end")
|
||||
expected = expected.asfreq(freq, "ffill").to_period(freq)
|
||||
assert_series_equal(resampled, expected)
|
||||
|
||||
def test_upsample_with_limit(self):
|
||||
rng = period_range("1/1/2000", periods=5, freq="A")
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
|
||||
result = ts.resample("M", convention="end").ffill(limit=2)
|
||||
expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_annual_upsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
|
||||
df = DataFrame({"a": ts})
|
||||
rdf = df.resample("D").ffill()
|
||||
exp = df["a"].resample("D").ffill()
|
||||
assert_series_equal(rdf["a"], exp)
|
||||
|
||||
rng = period_range("2000", "2003", freq="A-DEC")
|
||||
ts = Series([1, 2, 3, 4], index=rng)
|
||||
|
||||
result = ts.resample("M").ffill()
|
||||
ex_index = period_range("2000-01", "2003-12", freq="M")
|
||||
|
||||
expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
@pytest.mark.parametrize("target", ["D", "B", "M"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_quarterly_upsample(
|
||||
self, month, target, convention, simple_period_range_series
|
||||
):
|
||||
freq = "Q-{month}".format(month=month)
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("target", ["D", "B"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_monthly_upsample(self, target, convention, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_basic(self):
|
||||
# GH3609
|
||||
s = Series(
|
||||
range(100),
|
||||
index=date_range("20130101", freq="s", periods=100, name="idx"),
|
||||
dtype="float",
|
||||
)
|
||||
s[10:30] = np.nan
|
||||
index = PeriodIndex(
|
||||
[Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
|
||||
name="idx",
|
||||
)
|
||||
expected = Series([34.5, 79.5], index=index)
|
||||
result = s.to_period().resample("T", kind="period").mean()
|
||||
assert_series_equal(result, expected)
|
||||
result2 = s.resample("T", kind="period").mean()
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
|
||||
)
|
||||
def test_resample_count(self, freq, expected_vals):
|
||||
# GH12774
|
||||
series = Series(1, index=pd.period_range(start="2000", periods=100))
|
||||
result = series.resample(freq).count()
|
||||
expected_index = pd.period_range(
|
||||
start="2000", freq=freq, periods=len(expected_vals)
|
||||
)
|
||||
expected = Series(expected_vals, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_same_freq(self, resample_method):
|
||||
|
||||
# GH12770
|
||||
series = Series(
|
||||
range(3), index=pd.period_range(start="2000", periods=3, freq="M")
|
||||
)
|
||||
expected = series
|
||||
|
||||
result = getattr(series.resample("M"), resample_method)()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_incompat_freq(self):
|
||||
msg = (
|
||||
"Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>,"
|
||||
" as they are not sub or super periods"
|
||||
)
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
Series(
|
||||
range(3), index=pd.period_range(start="2000", periods=3, freq="M")
|
||||
).resample("W").mean()
|
||||
|
||||
def test_with_local_timezone_pytz(self):
|
||||
# see gh-5430
|
||||
local_timezone = pytz.timezone("America/Los_Angeles")
|
||||
|
||||
start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
|
||||
# 1 day later
|
||||
end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
|
||||
|
||||
index = pd.date_range(start, end, freq="H")
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample("D", kind="period").mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = pd.period_range(start=start, end=end, freq="D") - offsets.Day()
|
||||
expected = Series(1, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_with_pytz(self):
|
||||
# GH 13238
|
||||
s = Series(
|
||||
2, index=pd.date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern")
|
||||
)
|
||||
result = s.resample("D").mean()
|
||||
expected = Series(
|
||||
2, index=pd.DatetimeIndex(["2017-01-01", "2017-01-02"], tz="US/Eastern")
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
# Especially assert that the timezone is LMT for pytz
|
||||
assert result.index.tz == pytz.timezone("US/Eastern")
|
||||
|
||||
def test_with_local_timezone_dateutil(self):
|
||||
# see gh-5430
|
||||
local_timezone = "dateutil/America/Los_Angeles"
|
||||
|
||||
start = datetime(
|
||||
year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
|
||||
)
|
||||
# 1 day later
|
||||
end = datetime(
|
||||
year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
|
||||
)
|
||||
|
||||
index = pd.date_range(start, end, freq="H", name="idx")
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample("D", kind="period").mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = (
|
||||
pd.period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
|
||||
)
|
||||
expected = Series(1, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_nonexistent_time_bin_edge(self):
|
||||
# GH 19375
|
||||
index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T")
|
||||
s = Series(np.zeros(len(index)), index=index)
|
||||
expected = s.tz_localize("US/Pacific")
|
||||
result = expected.resample("900S").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 23742
|
||||
index = date_range(start="2017-10-10", end="2017-10-20", freq="1H")
|
||||
index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
|
||||
df = DataFrame(data=list(range(len(index))), index=index)
|
||||
result = df.groupby(pd.Grouper(freq="1D")).count()
|
||||
expected = date_range(
|
||||
start="2017-10-09",
|
||||
end="2017-10-20",
|
||||
freq="D",
|
||||
tz="America/Sao_Paulo",
|
||||
nonexistent="shift_forward",
|
||||
closed="left",
|
||||
)
|
||||
tm.assert_index_equal(result.index, expected)
|
||||
|
||||
def test_resample_ambiguous_time_bin_edge(self):
|
||||
# GH 10117
|
||||
idx = pd.date_range(
|
||||
"2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London"
|
||||
)
|
||||
expected = Series(np.zeros(len(idx)), index=idx)
|
||||
result = expected.resample("30T").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fill_method_and_how_upsample(self):
|
||||
# GH2073
|
||||
s = Series(
|
||||
np.arange(9, dtype="int64"),
|
||||
index=date_range("2010-01-01", periods=9, freq="Q"),
|
||||
)
|
||||
last = s.resample("M").ffill()
|
||||
both = s.resample("M").ffill().resample("M").last().astype("int64")
|
||||
assert_series_equal(last, both)
|
||||
|
||||
@pytest.mark.parametrize("day", DAYS)
|
||||
@pytest.mark.parametrize("target", ["D", "B"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
|
||||
freq = "W-{day}".format(day=day)
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_timestamps(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
|
||||
|
||||
result = ts.resample("A-DEC", kind="timestamp").mean()
|
||||
expected = ts.to_timestamp(how="start").resample("A-DEC").mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_quarterly(self, simple_period_range_series):
|
||||
for month in MONTHS:
|
||||
ts = simple_period_range_series(
|
||||
"1990", "1992", freq="A-{month}".format(month=month)
|
||||
)
|
||||
quar_ts = ts.resample("Q-{month}".format(month=month)).ffill()
|
||||
|
||||
stamps = ts.to_timestamp("D", how="start")
|
||||
qdates = period_range(
|
||||
ts.index[0].asfreq("D", "start"),
|
||||
ts.index[-1].asfreq("D", "end"),
|
||||
freq="Q-{month}".format(month=month),
|
||||
)
|
||||
|
||||
expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
|
||||
expected.index = qdates
|
||||
|
||||
assert_series_equal(quar_ts, expected)
|
||||
|
||||
# conforms, but different month
|
||||
ts = simple_period_range_series("1990", "1992", freq="A-JUN")
|
||||
|
||||
for how in ["start", "end"]:
|
||||
result = ts.resample("Q-MAR", convention=how).ffill()
|
||||
expected = ts.asfreq("Q-MAR", how=how)
|
||||
expected = expected.reindex(result.index, method="ffill")
|
||||
|
||||
# .to_timestamp('D')
|
||||
# expected = expected.resample('Q-MAR').ffill()
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_fill_missing(self):
|
||||
rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A")
|
||||
|
||||
s = Series(np.random.randn(4), index=rng)
|
||||
|
||||
stamps = s.to_timestamp()
|
||||
filled = s.resample("A").ffill()
|
||||
expected = stamps.resample("A").ffill().to_period("A")
|
||||
assert_series_equal(filled, expected)
|
||||
|
||||
def test_cant_fill_missing_dups(self):
|
||||
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A")
|
||||
s = Series(np.random.randn(5), index=rng)
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
s.resample("A").ffill()
|
||||
|
||||
@pytest.mark.parametrize("freq", ["5min"])
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
def test_resample_5minute(self, freq, kind):
|
||||
rng = period_range("1/1/2000", "1/5/2000", freq="T")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
expected = ts.to_timestamp().resample(freq).mean()
|
||||
if kind != "timestamp":
|
||||
expected = expected.to_period(freq)
|
||||
result = ts.resample(freq, kind=kind).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_upsample_daily_business_daily(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
|
||||
|
||||
result = ts.resample("D").asfreq()
|
||||
expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
ts = simple_period_range_series("1/1/2000", "2/1/2000")
|
||||
result = ts.resample("H", convention="s").asfreq()
|
||||
exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H")
|
||||
expected = ts.asfreq("H", how="s").reindex(exp_rng)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_irregular_sparse(self):
|
||||
dr = date_range(start="1/1/2012", freq="5min", periods=1000)
|
||||
s = Series(np.array(100), index=dr)
|
||||
# subset the data.
|
||||
subset = s[:"2012-01-04 06:55"]
|
||||
|
||||
result = subset.resample("10min").apply(len)
|
||||
expected = s.resample("10min").apply(len).loc[result.index]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_weekly_all_na(self):
|
||||
rng = date_range("1/1/2000", periods=10, freq="W-WED")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts.resample("W-THU").asfreq()
|
||||
|
||||
assert result.isna().all()
|
||||
|
||||
result = ts.resample("W-THU").asfreq().ffill()[:-1]
|
||||
expected = ts.asfreq("W-THU").ffill()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_tz_localized(self):
|
||||
dr = date_range(start="2012-4-13", end="2012-5-1")
|
||||
ts = Series(range(len(dr)), index=dr)
|
||||
|
||||
ts_utc = ts.tz_localize("UTC")
|
||||
ts_local = ts_utc.tz_convert("America/Los_Angeles")
|
||||
|
||||
result = ts_local.resample("W").mean()
|
||||
|
||||
ts_local_naive = ts_local.copy()
|
||||
ts_local_naive.index = [
|
||||
x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()
|
||||
]
|
||||
|
||||
exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
|
||||
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
# it works
|
||||
result = ts_local.resample("D").mean()
|
||||
|
||||
# #2245
|
||||
idx = date_range(
|
||||
"2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney"
|
||||
)
|
||||
s = Series([1, 2], index=idx)
|
||||
|
||||
result = s.resample("D", closed="right", label="right").mean()
|
||||
ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
|
||||
expected = Series([1.5], index=ex_index)
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# for good measure
|
||||
result = s.resample("D", kind="period").mean()
|
||||
ex_index = period_range("2001-09-20", periods=1, freq="D")
|
||||
expected = Series([1.5], index=ex_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 6397
|
||||
# comparing an offset that doesn't propagate tz's
|
||||
rng = date_range("1/1/2011", periods=20000, freq="H")
|
||||
rng = rng.tz_localize("EST")
|
||||
ts = DataFrame(index=rng)
|
||||
ts["first"] = np.random.randn(len(rng))
|
||||
ts["second"] = np.cumsum(np.random.randn(len(rng)))
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": ts.resample("A").sum()["first"],
|
||||
"second": ts.resample("A").mean()["second"],
|
||||
},
|
||||
columns=["first", "second"],
|
||||
)
|
||||
result = (
|
||||
ts.resample("A")
|
||||
.agg({"first": np.sum, "second": np.mean})
|
||||
.reindex(columns=["first", "second"])
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed_left_corner(self):
|
||||
# #1465
|
||||
s = Series(
|
||||
np.random.randn(21),
|
||||
index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
|
||||
)
|
||||
s[0] = np.nan
|
||||
|
||||
result = s.resample("10min", closed="left", label="right").mean()
|
||||
exp = s[1:].resample("10min", closed="left", label="right").mean()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
result = s.resample("10min", closed="left", label="left").mean()
|
||||
exp = s[1:].resample("10min", closed="left", label="left").mean()
|
||||
|
||||
ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
|
||||
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_quarterly_resampling(self):
|
||||
rng = period_range("2000Q1", periods=10, freq="Q-DEC")
|
||||
ts = Series(np.arange(10), index=rng)
|
||||
|
||||
result = ts.resample("A").mean()
|
||||
exp = ts.to_timestamp().resample("A").mean().to_period()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_resample_weekly_bug_1726(self):
|
||||
# 8/6/12 is a Monday
|
||||
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
|
||||
n = len(ind)
|
||||
data = [[x] * 5 for x in range(n)]
|
||||
df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
|
||||
|
||||
# it works!
|
||||
df.resample("W-MON", closed="left", label="left").first()
|
||||
|
||||
def test_resample_with_dst_time_change(self):
|
||||
# GH 15549
|
||||
index = (
|
||||
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
|
||||
.tz_localize("UTC")
|
||||
.tz_convert("America/Chicago")
|
||||
)
|
||||
df = pd.DataFrame([1, 2], index=index)
|
||||
result = df.resample("12h", closed="right", label="right").last().ffill()
|
||||
|
||||
expected_index_values = [
|
||||
"2016-03-09 12:00:00-06:00",
|
||||
"2016-03-10 00:00:00-06:00",
|
||||
"2016-03-10 12:00:00-06:00",
|
||||
"2016-03-11 00:00:00-06:00",
|
||||
"2016-03-11 12:00:00-06:00",
|
||||
"2016-03-12 00:00:00-06:00",
|
||||
"2016-03-12 12:00:00-06:00",
|
||||
"2016-03-13 00:00:00-06:00",
|
||||
"2016-03-13 13:00:00-05:00",
|
||||
"2016-03-14 01:00:00-05:00",
|
||||
"2016-03-14 13:00:00-05:00",
|
||||
"2016-03-15 01:00:00-05:00",
|
||||
"2016-03-15 13:00:00-05:00",
|
||||
]
|
||||
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
|
||||
"America/Chicago"
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
|
||||
index=index,
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_bms_2752(self):
|
||||
# GH2753
|
||||
foo = Series(index=pd.bdate_range("20000101", "20000201"))
|
||||
res1 = foo.resample("BMS").mean()
|
||||
res2 = foo.resample("BMS").mean().resample("B").mean()
|
||||
assert res1.index[0] == Timestamp("20000103")
|
||||
assert res1.index[0] == res2.index[0]
|
||||
|
||||
# def test_monthly_convention_span(self):
|
||||
# rng = period_range('2000-01', periods=3, freq='M')
|
||||
# ts = Series(np.arange(3), index=rng)
|
||||
|
||||
# # hacky way to get same thing
|
||||
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
|
||||
# expected = ts.asfreq('D', how='end').reindex(exp_index)
|
||||
# expected = expected.fillna(method='bfill')
|
||||
|
||||
# result = ts.resample('D', convention='span').mean()
|
||||
|
||||
# assert_series_equal(result, expected)
|
||||
|
||||
def test_default_right_closed_label(self):
|
||||
end_freq = ["D", "Q", "M", "D"]
|
||||
end_types = ["M", "A", "Q", "W"]
|
||||
|
||||
for from_freq, to_freq in zip(end_freq, end_types):
|
||||
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
assert_frame_equal(
|
||||
resampled, df.resample(to_freq, closed="right", label="right").mean()
|
||||
)
|
||||
|
||||
def test_default_left_closed_label(self):
|
||||
others = ["MS", "AS", "QS", "D", "H"]
|
||||
others_freq = ["D", "Q", "M", "H", "T"]
|
||||
|
||||
for from_freq, to_freq in zip(others_freq, others):
|
||||
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
assert_frame_equal(
|
||||
resampled, df.resample(to_freq, closed="left", label="left").mean()
|
||||
)
|
||||
|
||||
def test_all_values_single_bin(self):
|
||||
# 2070
|
||||
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
|
||||
s = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
result = s.resample("A").mean()
|
||||
tm.assert_almost_equal(result[0], s.mean())
|
||||
|
||||
def test_evenly_divisible_with_no_extra_bins(self):
|
||||
# 4076
|
||||
# when the frequency is evenly divisible, sometimes extra bins
|
||||
|
||||
df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9))
|
||||
result = df.resample("5D").mean()
|
||||
expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
|
||||
expected.index = [Timestamp("2000-1-1"), Timestamp("2000-1-6")]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
index = date_range(start="2001-5-4", periods=28)
|
||||
df = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 1,
|
||||
"DLY_TRN_QT": 80,
|
||||
"DLY_SLS_AMT": 90,
|
||||
"COOP_DLY_TRN_QT": 30,
|
||||
"COOP_DLY_SLS_AMT": 20,
|
||||
}
|
||||
]
|
||||
* 28
|
||||
+ [
|
||||
{
|
||||
"REST_KEY": 2,
|
||||
"DLY_TRN_QT": 70,
|
||||
"DLY_SLS_AMT": 10,
|
||||
"COOP_DLY_TRN_QT": 50,
|
||||
"COOP_DLY_SLS_AMT": 20,
|
||||
}
|
||||
]
|
||||
* 28,
|
||||
index=index.append(index),
|
||||
).sort_index()
|
||||
|
||||
index = date_range("2001-5-4", periods=4, freq="7D")
|
||||
expected = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 14,
|
||||
"DLY_TRN_QT": 14,
|
||||
"DLY_SLS_AMT": 14,
|
||||
"COOP_DLY_TRN_QT": 14,
|
||||
"COOP_DLY_SLS_AMT": 14,
|
||||
}
|
||||
]
|
||||
* 4,
|
||||
index=index,
|
||||
)
|
||||
result = df.resample("7D").count()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 21,
|
||||
"DLY_TRN_QT": 1050,
|
||||
"DLY_SLS_AMT": 700,
|
||||
"COOP_DLY_TRN_QT": 560,
|
||||
"COOP_DLY_SLS_AMT": 280,
|
||||
}
|
||||
]
|
||||
* 4,
|
||||
index=index,
|
||||
)
|
||||
result = df.resample("7D").sum()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]])
|
||||
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
|
||||
# make sure passing loffset returns DatetimeIndex in all cases
|
||||
# basic method taken from Base.test_resample_loffset_arg_type()
|
||||
df = frame
|
||||
expected_means = [
|
||||
df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)
|
||||
]
|
||||
expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D")
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
expected_index = expected_index.to_timestamp()
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({"value": expected_means}, index=expected_index)
|
||||
|
||||
result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result_how = df.resample("2D", how=agg_arg, loffset="2H", kind=kind)
|
||||
if isinstance(agg_arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
|
||||
assert_frame_equal(result_agg, expected)
|
||||
assert_frame_equal(result_how, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)])
|
||||
@pytest.mark.parametrize("kind", [None, "period"])
|
||||
def test_upsampling_ohlc(self, freq, period_mult, kind):
|
||||
# GH 13083
|
||||
pi = period_range(start="2000", freq="D", periods=10)
|
||||
s = Series(range(len(pi)), index=pi)
|
||||
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
|
||||
|
||||
# timestamp-based resampling doesn't include all sub-periods
|
||||
# of the last original period, so extend accordingly:
|
||||
new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
|
||||
expected = expected.reindex(new_index)
|
||||
result = s.resample(freq, kind=kind).ohlc()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, values",
|
||||
[
|
||||
(
|
||||
[
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:00",
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:02",
|
||||
"1970-01-01 00:00:03",
|
||||
],
|
||||
[2, 3, 5, 7, 11],
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:00",
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:02",
|
||||
"1970-01-01 00:00:03",
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
],
|
||||
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"freq, expected_values",
|
||||
[
|
||||
("1s", [3, np.NaN, 7, 11]),
|
||||
("2s", [3, int((7 + 11) / 2)]),
|
||||
("3s", [int((3 + 7) / 2), 11]),
|
||||
],
|
||||
)
|
||||
def test_resample_with_nat(self, periods, values, freq, expected_values):
|
||||
# GH 13224
|
||||
index = PeriodIndex(periods, freq="S")
|
||||
frame = DataFrame(values, index=index)
|
||||
|
||||
expected_index = period_range(
|
||||
"1970-01-01 00:00:00", periods=len(expected_values), freq=freq
|
||||
)
|
||||
expected = DataFrame(expected_values, index=expected_index)
|
||||
result = frame.resample(freq).mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_with_only_nat(self):
|
||||
# GH 13224
|
||||
pi = PeriodIndex([pd.NaT] * 3, freq="S")
|
||||
frame = DataFrame([2, 3, 5], index=pi)
|
||||
expected_index = PeriodIndex(data=[], freq=pi.freq)
|
||||
expected = DataFrame(index=expected_index)
|
||||
result = frame.resample("1s").mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,start_freq,end_freq,base",
|
||||
[
|
||||
("19910905", "19910909 03:00", "H", "24H", 10),
|
||||
("19910905", "19910909 12:00", "H", "24H", 10),
|
||||
("19910905", "19910909 23:00", "H", "24H", 10),
|
||||
("19910905 10:00", "19910909", "H", "24H", 10),
|
||||
("19910905 10:00", "19910909 10:00", "H", "24H", 10),
|
||||
("19910905", "19910909 10:00", "H", "24H", 10),
|
||||
("19910905 12:00", "19910909", "H", "24H", 10),
|
||||
("19910905 12:00", "19910909 03:00", "H", "24H", 10),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", 10),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", 34),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", 10),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", 3),
|
||||
("19910905 12:00", "19910909 1:00", "H", "M", 3),
|
||||
("19910905", "19910913 06:00", "2H", "24H", 10),
|
||||
("19910905", "19910905 01:39", "Min", "5Min", 3),
|
||||
("19910905", "19910905 03:18", "2Min", "5Min", 3),
|
||||
],
|
||||
)
|
||||
def test_resample_with_non_zero_base(self, start, end, start_freq, end_freq, base):
|
||||
# GH 23882
|
||||
s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq))
|
||||
s = s + np.arange(len(s))
|
||||
result = s.resample(end_freq, base=base).mean()
|
||||
result = result.to_timestamp(end_freq)
|
||||
# to_timestamp casts 24H -> D
|
||||
result = result.asfreq(end_freq) if end_freq == "24H" else result
|
||||
expected = s.to_timestamp().resample(end_freq, base=base).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first,last,offset,exp_first,exp_last",
|
||||
[
|
||||
("19910905", "19920406", "D", "19910905", "19920406"),
|
||||
("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"),
|
||||
(
|
||||
"19910905 06:00",
|
||||
"19920406 06:00",
|
||||
"H",
|
||||
"19910905 06:00",
|
||||
"19920406 06:00",
|
||||
),
|
||||
("19910906", "19920406", "M", "1991-09", "1992-04"),
|
||||
("19910831", "19920430", "M", "1991-08", "1992-04"),
|
||||
("1991-08", "1992-04", "M", "1991-08", "1992-04"),
|
||||
],
|
||||
)
|
||||
def test_get_period_range_edges(self, first, last, offset, exp_first, exp_last):
|
||||
first = pd.Period(first)
|
||||
last = pd.Period(last)
|
||||
|
||||
exp_first = pd.Period(exp_first, freq=offset)
|
||||
exp_last = pd.Period(exp_last, freq=offset)
|
||||
|
||||
offset = pd.tseries.frequencies.to_offset(offset)
|
||||
result = _get_period_range_edges(first, last, offset)
|
||||
expected = (exp_first, exp_last)
|
||||
assert result == expected
|
@@ -0,0 +1,567 @@
|
||||
from collections import OrderedDict
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
|
||||
|
||||
test_series = Series(np.random.rand(len(dti)), dti)
|
||||
_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_frame():
|
||||
return _test_frame.copy()
|
||||
|
||||
|
||||
def test_str():
|
||||
|
||||
r = test_series.resample("H")
|
||||
assert (
|
||||
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
|
||||
"label=left, convention=start, base=0]" in str(r)
|
||||
)
|
||||
|
||||
|
||||
def test_api():
|
||||
|
||||
r = test_series.resample("H")
|
||||
result = r.mean()
|
||||
assert isinstance(result, Series)
|
||||
assert len(result) == 217
|
||||
|
||||
r = test_series.to_frame().resample("H")
|
||||
result = r.mean()
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 217
|
||||
|
||||
|
||||
def test_groupby_resample_api():
|
||||
|
||||
# GH 12448
|
||||
# .groupby(...).resample(...) hitting warnings
|
||||
# when appropriate
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
# replication step
|
||||
i = (
|
||||
pd.date_range("2016-01-03", periods=8).tolist()
|
||||
+ pd.date_range("2016-01-17", periods=8).tolist()
|
||||
)
|
||||
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
|
||||
expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
|
||||
result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api():
|
||||
|
||||
# GH 15021
|
||||
# .groupby(...).resample(on=...) results in an unexpected
|
||||
# keyword warning.
|
||||
df = DataFrame(
|
||||
{
|
||||
"key": ["A", "B"] * 5,
|
||||
"dates": pd.date_range("2016-01-01", periods=10),
|
||||
"values": np.random.randn(10),
|
||||
}
|
||||
)
|
||||
|
||||
expected = df.set_index("dates").groupby("key").resample("D").mean()
|
||||
|
||||
result = df.groupby("key").resample("D", on="dates").mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_pipe(test_frame):
|
||||
# GH17905
|
||||
|
||||
# series
|
||||
r = test_series.resample("H")
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# dataframe
|
||||
r = test_frame.resample("H")
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem(test_frame):
|
||||
|
||||
r = test_frame.resample("H")
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
|
||||
|
||||
r = test_frame.resample("H")["B"]
|
||||
assert r._selected_obj.name == test_frame.columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = test_frame.resample("H")["A", "B"]
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
|
||||
|
||||
r = test_frame.resample("H")["A", "B"]
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key", [["D"], ["A", "D"]])
|
||||
def test_select_bad_cols(key, test_frame):
|
||||
g = test_frame.resample("H")
|
||||
# 'A' should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
msg = r"^\"Columns not found: 'D'\"$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
g[key]
|
||||
|
||||
|
||||
def test_attribute_access(test_frame):
|
||||
|
||||
r = test_frame.resample("H")
|
||||
tm.assert_series_equal(r.A.sum(), r["A"].sum())
|
||||
|
||||
|
||||
def test_api_compat_before_use():
|
||||
|
||||
# make sure that we are setting the binner
|
||||
# on these attributes
|
||||
for attr in ["groups", "ngroups", "indices"]:
|
||||
rng = pd.date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
rs = ts.resample("30s")
|
||||
|
||||
# before use
|
||||
getattr(rs, attr)
|
||||
|
||||
# after grouper is initialized is ok
|
||||
rs.mean()
|
||||
getattr(rs, attr)
|
||||
|
||||
|
||||
def tests_skip_nuisance(test_frame):
|
||||
|
||||
df = test_frame
|
||||
df["D"] = "foo"
|
||||
r = df.resample("H")
|
||||
result = r[["A", "B"]].sum()
|
||||
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = r[["A", "B", "C"]].sum()
|
||||
result = r.sum()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_downsample_but_actually_upsampling():
|
||||
|
||||
# this is reindex / asfreq
|
||||
rng = pd.date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
|
||||
result = ts.resample("20s").asfreq()
|
||||
expected = Series(
|
||||
[0, 20, 40, 60, 80],
|
||||
index=pd.date_range("2012-01-01 00:00:00", freq="20s", periods=5),
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_combined_up_downsampling_of_irregular():
|
||||
|
||||
# since we are reallydoing an operation like this
|
||||
# ts2.resample('2s').mean().ffill()
|
||||
# preserve these semantics
|
||||
|
||||
rng = pd.date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ts2.resample("2s", how="mean", fill_method="ffill")
|
||||
expected = ts2.resample("2s").mean().ffill()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform():
|
||||
|
||||
r = test_series.resample("20min")
|
||||
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
|
||||
result = r.transform("mean")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_fillna():
|
||||
|
||||
# need to upsample here
|
||||
rng = pd.date_range("1/1/2012", periods=10, freq="2S")
|
||||
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
|
||||
r = ts.resample("s")
|
||||
|
||||
expected = r.ffill()
|
||||
result = r.fillna(method="ffill")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = r.bfill()
|
||||
result = r.fillna(method="bfill")
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
|
||||
r" \(bfill\) or nearest\. Got 0"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
r.fillna(0)
|
||||
|
||||
|
||||
def test_apply_without_aggregation():
|
||||
|
||||
# both resample and groupby should work w/o aggregation
|
||||
r = test_series.resample("20min")
|
||||
g = test_series.groupby(pd.Grouper(freq="20min"))
|
||||
|
||||
for t in [g, r]:
|
||||
result = t.apply(lambda x: x)
|
||||
assert_series_equal(result, test_series)
|
||||
|
||||
|
||||
def test_agg_consistency():
|
||||
|
||||
# make sure that we are consistent across
|
||||
# similar aggregations with and w/o selection list
|
||||
df = DataFrame(
|
||||
np.random.randn(1000, 3),
|
||||
index=pd.date_range("1/1/2012", freq="S", periods=1000),
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
r = df.resample("3T")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
expected = r[["A", "B", "C"]].agg({"r1": "mean", "r2": "sum"})
|
||||
result = r.agg({"r1": "mean", "r2": "sum"})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
# TODO: once GH 14008 is fixed, move these tests into
|
||||
# `Base` test class
|
||||
|
||||
|
||||
def test_agg():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
a_mean = r["A"].mean()
|
||||
a_std = r["A"].std()
|
||||
a_sum = r["A"].sum()
|
||||
b_mean = r["B"].mean()
|
||||
b_std = r["B"].std()
|
||||
b_sum = r["B"].sum()
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
|
||||
for t in cases:
|
||||
result = t.aggregate([np.mean, np.std])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, b_std], axis=1)
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": np.mean, "B": np.std})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": ["mean", "std"]})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ["mean", "sum"]
|
||||
for t in cases:
|
||||
result = t["A"].aggregate(["mean", "sum"])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "sum")])
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t.aggregate({"A": {"mean": "mean", "sum": "sum"}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
|
||||
)
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t.aggregate(
|
||||
{
|
||||
"A": {"mean": "mean", "sum": "sum"},
|
||||
"B": {"mean2": "mean", "sum2": "sum"},
|
||||
}
|
||||
)
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
)
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("r1", "A", "mean"),
|
||||
("r1", "A", "sum"),
|
||||
("r2", "B", "mean"),
|
||||
("r2", "B", "sum"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_agg_misc():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
# passed lambda
|
||||
for t in cases:
|
||||
result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
|
||||
rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
|
||||
expected = pd.concat([r["A"].sum(), rcustom], axis=1)
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# agg with renamers
|
||||
expected = pd.concat(
|
||||
[t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
|
||||
)
|
||||
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t[["A", "B"]].agg(
|
||||
OrderedDict([("result1", np.sum), ("result2", np.mean)])
|
||||
)
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# agg with different hows
|
||||
expected = pd.concat(
|
||||
[t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
)
|
||||
for t in cases:
|
||||
result = t.agg(OrderedDict([("A", ["sum", "std"]), ("B", ["mean", "std"])]))
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# equivalent of using a selection list / or not
|
||||
for t in cases:
|
||||
result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# series like aggs
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t["A"].agg({"A": ["sum", "std"]})
|
||||
expected = pd.concat([t["A"].sum(), t["A"].std()], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "std")])
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat(
|
||||
[t["A"].agg(["sum", "std"]), t["A"].agg(["mean", "std"])], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# errors
|
||||
# invalid names in the agg specification
|
||||
msg = "\"Column 'B' does not exist!\""
|
||||
for t in cases:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
|
||||
|
||||
def test_agg_nested_dicts():
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
msg = r"cannot perform renaming for r(1|2) with a nested dictionary"
|
||||
for t in cases:
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
|
||||
|
||||
for t in cases:
|
||||
expected = pd.concat(
|
||||
[t["A"].mean(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t[["A", "B"]].agg(
|
||||
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
|
||||
)
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
def test_try_aggregate_non_existing_column():
|
||||
# GH 16766
|
||||
data = [
|
||||
{"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
|
||||
{"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
|
||||
{"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
|
||||
]
|
||||
df = DataFrame(data).set_index("dt")
|
||||
|
||||
# Error as we don't have 'z' column
|
||||
msg = "\"Column 'z' does not exist!\""
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
|
||||
|
||||
|
||||
def test_selection_api_validation():
|
||||
# GH 13500
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame(
|
||||
{"date": index, "a": rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
|
||||
)
|
||||
df_exp = DataFrame({"a": rng}, index=index)
|
||||
|
||||
# non DatetimeIndex
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
|
||||
" but got an instance of 'Int64Index'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.resample("2D", level="v")
|
||||
|
||||
msg = "The Grouper cannot specify both a key and a level!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", on="date", level="d")
|
||||
|
||||
msg = "unhashable type: 'list'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.resample("2D", on=["a", "date"])
|
||||
|
||||
msg = r"\"Level \['a', 'date'\] not found\""
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.resample("2D", level=["a", "date"])
|
||||
|
||||
# upsampling not allowed
|
||||
msg = (
|
||||
"Upsampling from level= or on= selection is not supported, use"
|
||||
r" \.set_index\(\.\.\.\) to explicitly set index to datetime-like"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", level="d").asfreq()
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", on="date").asfreq()
|
||||
|
||||
exp = df_exp.resample("2D").sum()
|
||||
exp.index.name = "date"
|
||||
assert_frame_equal(exp, df.resample("2D", on="date").sum())
|
||||
|
||||
exp.index.name = "d"
|
||||
assert_frame_equal(exp, df.resample("2D", level="d").sum())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
|
||||
)
|
||||
def test_agg_with_datetime_index_list_agg_func(col_name):
|
||||
# GH 22660
|
||||
# The parametrized column names would get converted to dates by our
|
||||
# date parser. Some would result in OutOfBoundsError (ValueError) while
|
||||
# others would result in OverflowError when passed into Timestamp.
|
||||
# We catch these errors and move on to the correct branch.
|
||||
df = pd.DataFrame(
|
||||
list(range(200)),
|
||||
index=pd.date_range(
|
||||
start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
|
||||
),
|
||||
columns=[col_name],
|
||||
)
|
||||
result = df.resample("1d").aggregate(["mean"])
|
||||
expected = pd.DataFrame(
|
||||
[47.5, 143.5, 195.5],
|
||||
index=pd.date_range(
|
||||
start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
|
||||
),
|
||||
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
@@ -0,0 +1,278 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
test_frame = DataFrame(
|
||||
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
|
||||
index=date_range("1/1/2000", freq="s", periods=40),
|
||||
)
|
||||
|
||||
|
||||
def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = dedent(
|
||||
"""\
|
||||
import pandas.util.testing as tm
|
||||
s = tm.makeTimeSeries()
|
||||
rs = s.resample("D")
|
||||
"""
|
||||
)
|
||||
ip.run_code(code)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("rs.", 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [
|
||||
["2010-01-01", "A", 2],
|
||||
["2010-01-02", "A", 3],
|
||||
["2010-01-05", "A", 8],
|
||||
["2010-01-10", "A", 7],
|
||||
["2010-01-13", "A", 3],
|
||||
["2010-01-01", "B", 5],
|
||||
["2010-01-03", "B", 2],
|
||||
["2010-01-04", "B", 1],
|
||||
["2010-01-11", "B", 7],
|
||||
["2010-01-14", "B", 3],
|
||||
]
|
||||
|
||||
df = DataFrame(data, columns=["date", "id", "score"])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f(x):
|
||||
return x.set_index("date").resample("D").asfreq()
|
||||
|
||||
expected = df.groupby("id").apply(f)
|
||||
result = df.set_index("date").groupby("id").resample("D").asfreq()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
def f(x):
|
||||
return x.resample("1D").ffill()
|
||||
|
||||
expected = df.groupby("group").apply(f)
|
||||
result = df.groupby("group").resample("1D").ffill()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem():
|
||||
g = test_frame.groupby("A")
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample("2s").mean())
|
||||
|
||||
result = g.resample("2s").B.mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample("2s").mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = g.resample("2s").mean().B
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
|
||||
df = DataFrame(data, index=pd.date_range("2016-01-01", periods=2))
|
||||
r = df.groupby("id").resample("1D")
|
||||
result = r["buyer"].count()
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
|
||||
names=["id", None],
|
||||
),
|
||||
name="buyer",
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = r["buyer"].count()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = pd.DataFrame(
|
||||
{"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1}
|
||||
)
|
||||
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
|
||||
result = df.groupby("id").resample("2D", on="date")["data"].sum()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = pd.date_range("1/1/2000", periods=3, freq="T")
|
||||
result = Series(range(3), index=index).resample("20s").nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:40",
|
||||
"2000-01-01 00:01:00",
|
||||
"2000-01-01 00:01:20",
|
||||
"2000-01-01 00:01:40",
|
||||
"2000-01-01 00:02:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="20S",
|
||||
),
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods():
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["size"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for f in ["count"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# series only
|
||||
for f in ["nunique"]:
|
||||
result = getattr(r.B, f)()
|
||||
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for f in ["nearest", "backfill", "ffill", "asfreq"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = r.ohlc()
|
||||
expected = g.apply(lambda x: x.resample("2s").ohlc())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["std", "var"]:
|
||||
result = getattr(r, f)(ddof=1)
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply():
|
||||
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
# reduction
|
||||
expected = g.resample("2s").sum()
|
||||
|
||||
def f(x):
|
||||
return x.resample("2s").sum()
|
||||
|
||||
result = r.apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def f(x):
|
||||
return x.resample("2s").apply(lambda y: y.sum())
|
||||
|
||||
result = g.apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = pd.date_range("1-1-2015", "12-31-15", freq="D")
|
||||
df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=["a", "b"])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq="M")).apply(f)
|
||||
|
||||
result = df.resample("M").apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
|
||||
result = df["col1"].resample("M").apply(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label():
|
||||
# GH 13235
|
||||
index = date_range("2000-01-01", freq="2D", periods=5)
|
||||
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
|
||||
result = df.groupby("col0").resample("1W", label="left").sum()
|
||||
|
||||
mi = [
|
||||
np.array([0, 0, 1, 2]),
|
||||
pd.to_datetime(
|
||||
np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
|
||||
),
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
|
||||
expected = DataFrame(
|
||||
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
|
||||
)
|
||||
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window():
|
||||
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = pd.Int64Index([1, 2, 3], name="A")
|
||||
result = df.groupby("A").resample("2s").mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby("A").rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randn(20, 3),
|
||||
columns=list("aaa"),
|
||||
index=pd.date_range("2012-01-01", periods=20, freq="s"),
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.columns = ["a", "b", "c"]
|
||||
expected = df2.resample("5s").median()
|
||||
result = df.resample("5s").median()
|
||||
expected.columns = result.columns
|
||||
assert_frame_equal(result, expected)
|
@@ -0,0 +1,279 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
|
||||
|
||||
|
||||
def test_apply():
|
||||
grouper = Grouper(freq="A", label="right", closed="right")
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count():
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
grouper = Grouper(freq="A", label="right", closed="right")
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample("A").count()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction():
|
||||
result = test_series.resample("A", closed="right").prod()
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({"open": 1, "close": 2}, index=ind)
|
||||
tg = Grouper(freq="M")
|
||||
|
||||
_, grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df["close"] / df["open"]
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name, func",
|
||||
[
|
||||
("Int64Index", tm.makeIntIndex),
|
||||
("Index", tm.makeUnicodeIndex),
|
||||
("Float64Index", tm.makeFloatIndex),
|
||||
("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
|
||||
],
|
||||
)
|
||||
def test_fails_on_no_datetime_index(name, func):
|
||||
n = 2
|
||||
index = func(n)
|
||||
df = DataFrame({"a": np.random.randn(n)}, index=index)
|
||||
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
"or PeriodIndex, but got an instance of '{}'".format(name)
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(Grouper(freq="D"))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.randn(n, 4)
|
||||
df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
grouped = df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
if resample_method == "ohlc":
|
||||
pytest.xfail(reason="DataError: No numeric types to aggregate")
|
||||
|
||||
data = np.random.randn(20, 4)
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
# if TimeGrouper is used included, 'nth' doesn't work yet
|
||||
|
||||
"""
|
||||
for func in ['nth']:
|
||||
expected = getattr(normal_grouped, func)(3)
|
||||
expected.index = date_range(start='2013-01-01',
|
||||
freq='D', periods=5, name='key')
|
||||
dt_result = getattr(dt_grouped, func)(3)
|
||||
assert_frame_equal(expected, dt_result)
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, unit",
|
||||
[
|
||||
("sum", dict(), 0),
|
||||
("sum", dict(min_count=0), 0),
|
||||
("sum", dict(min_count=1), np.nan),
|
||||
("prod", dict(), 1),
|
||||
("prod", dict(min_count=0), 1),
|
||||
("prod", dict(min_count=1), np.nan),
|
||||
],
|
||||
)
|
||||
def test_resample_entirly_nat_window(method, method_args, unit):
|
||||
s = pd.Series([0] * 2 + [np.nan] * 2, index=pd.date_range("2017", periods=4))
|
||||
result = methodcaller(method, **method_args)(s.resample("2d"))
|
||||
expected = pd.Series(
|
||||
[0.0, unit], index=pd.to_datetime(["2017-01-01", "2017-01-03"])
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, fill_value",
|
||||
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
|
||||
)
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
|
||||
expected = normal_result.append(pad)
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = normal_result.append(pad)
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(Grouper(key="A", freq="H"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', base=0)"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, expected_values",
|
||||
[
|
||||
("sum", dict(), [1, 0, 1]),
|
||||
("sum", dict(min_count=0), [1, 0, 1]),
|
||||
("sum", dict(min_count=1), [1, np.nan, 1]),
|
||||
("sum", dict(min_count=2), [np.nan, np.nan, np.nan]),
|
||||
("prod", dict(), [1, 1, 1]),
|
||||
("prod", dict(min_count=0), [1, 1, 1]),
|
||||
("prod", dict(min_count=1), [1, np.nan, 1]),
|
||||
("prod", dict(min_count=2), [np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
|
||||
resampled = s.resample("30T")
|
||||
index = pd.to_datetime(
|
||||
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"]
|
||||
)
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = pd.Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
@@ -0,0 +1,128 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample("1T").asfreq()
|
||||
expected = DataFrame(
|
||||
data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range("0 day", periods=4, freq="1T"),
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
|
||||
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
|
||||
expected = DataFrame(
|
||||
{"value": [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range("0 day", periods=3, freq="1S"),
|
||||
)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range("00:00:00", "00:10:00", freq="5T")
|
||||
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
|
||||
result = df.resample("2T").asfreq()
|
||||
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(
|
||||
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
|
||||
expected = DataFrame({"A": np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = pd.timedelta_range("0 days", freq="30T", periods=50)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
|
||||
)
|
||||
result = df.resample("30T").sum()
|
||||
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
s = df["A"]
|
||||
result = s.resample("30T").sum()
|
||||
assert_series_equal(result, expected["A"])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
|
||||
s = Series(list(range(5)), index=pd.timedelta_range("1 day", freq="s", periods=5))
|
||||
result = s.resample("2s").sum()
|
||||
expected = Series(
|
||||
[1, 5, 4], index=pd.timedelta_range("1 day", freq="2s", periods=3)
|
||||
)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
|
||||
# GH 12072
|
||||
index = pd.timedelta_range("0", periods=9, freq="10L")
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample("10L").mean()
|
||||
expected = series
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_base_with_timedeltaindex():
|
||||
|
||||
# GH 10530
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample("2s", base=5).mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
|
||||
df["Group"] = df["Group_obj"].astype("category")
|
||||
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
|
||||
expected = DataFrame(
|
||||
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
|
||||
index=pd.to_timedelta([0, 10], unit="s"),
|
||||
)
|
||||
expected = expected.reindex(["Group_obj", "Group"], axis=1)
|
||||
expected["Group"] = expected["Group_obj"].astype("category")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range("1 day", "4 day", freq="4D")
|
||||
df = DataFrame({"time": times}, index=times)
|
||||
|
||||
times2 = timedelta_range("1 day", "4 day", freq="2D")
|
||||
exp = Series(times2, index=times2, name="time")
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample("2D").first()["time"]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df["time"].resample("2D").first()
|
||||
tm.assert_series_equal(res, exp)
|
Reference in New Issue
Block a user