8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,158 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import DataFrame, Series
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
# The various methods we support
downsample_methods = [
"min",
"max",
"first",
"last",
"sum",
"mean",
"sem",
"median",
"prod",
"var",
"std",
"ohlc",
"quantile",
]
upsample_methods = ["count", "size"]
series_methods = ["nunique"]
resample_methods = downsample_methods + upsample_methods + series_methods
@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param
@pytest.fixture(params=upsample_methods)
def upsample_method(request):
"""Fixture for parametrization of Grouper upsample methods."""
return request.param
@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param
@pytest.fixture
def simple_date_range_series():
"""
Series with date range index and random data for test purposes.
"""
def _simple_date_range_series(start, end, freq="D"):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_date_range_series
@pytest.fixture
def simple_period_range_series():
"""
Series with period range index and random data for test purposes.
"""
def _simple_period_range_series(start, end, freq="D"):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_period_range_series
@pytest.fixture
def _index_start():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 1)
@pytest.fixture
def _index_end():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 10)
@pytest.fixture
def _index_freq():
"""Fixture for parametrization of index, series and frame."""
return "D"
@pytest.fixture
def _index_name():
"""Fixture for parametrization of index, series and frame."""
return None
@pytest.fixture
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
"""Fixture for parametrization of date_range, period_range and
timedelta_range indexes"""
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
@pytest.fixture
def _static_values(index):
"""Fixture for parametrization of values used in parametrization of
Series and DataFrames with date_range, period_range and
timedelta_range indexes"""
return np.arange(len(index))
@pytest.fixture
def _series_name():
"""Fixture for parametrization of Series name for Series used with
date_range, period_range and timedelta_range indexes"""
return None
@pytest.fixture
def series(index, _series_name, _static_values):
"""Fixture for parametrization of Series with date_range, period_range and
timedelta_range indexes"""
return Series(_static_values, index=index, name=_series_name)
@pytest.fixture
def empty_series(series):
"""Fixture for parametrization of empty Series with date_range,
period_range and timedelta_range indexes"""
return series[:0]
@pytest.fixture
def frame(index, _series_name, _static_values):
"""Fixture for parametrization of DataFrame with date_range, period_range
and timedelta_range indexes"""
# _series_name is intentionally unused
return DataFrame({"value": _static_values}, index=index)
@pytest.fixture
def empty_frame(series):
"""Fixture for parametrization of empty DataFrame with date_range,
period_range and timedelta_range indexes"""
index = series.index[:0]
return DataFrame(index=index)
@pytest.fixture(params=[Series, DataFrame])
def series_and_frame(request, series, frame):
"""Fixture for parametrization of Series and DataFrame with date_range,
period_range and timedelta_range indexes"""
if request.param == Series:
return series
if request.param == DataFrame:
return frame

View File

@@ -0,0 +1,228 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import PeriodIndex, period_range
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_index_equal,
assert_series_equal,
)
# a fixture value can be overridden by the test parameter value. Note that the
# value of the fixture can be overridden this way even if the test doesn't use
# it directly (doesn't mention it in the function prototype).
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
# in this module we override the fixture values defined in conftest.py
# tuples of '_index_factory,_series_name,_index_start,_index_end'
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
all_ts = pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end",
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
)
@pytest.fixture
def create_index(_index_factory):
def _create_index(*args, **kwargs):
""" return the _index_factory created using the args, kwargs """
return _index_factory(*args, **kwargs)
return _create_index
@pytest.mark.parametrize("freq", ["2D", "1H"])
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq(series_and_frame, freq, create_index):
obj = series_and_frame
result = obj.resample(freq).asfreq()
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq_fill_value(series, create_index):
# test for fill value during resampling, issue 3715
s = series
result = s.resample("1H").asfreq()
new_index = create_index(s.index[0], s.index[-1], freq="1H")
expected = s.reindex(new_index)
assert_series_equal(result, expected)
frame = s.to_frame("value")
frame.iloc[1] = None
result = frame.resample("1H").asfreq(fill_value=4.0)
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
expected = frame.reindex(new_index, fill_value=4.0)
assert_frame_equal(result, expected)
@all_ts
def test_resample_interpolate(frame):
# # 12925
df = frame
assert_frame_equal(
df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
)
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
" but got an instance of 'Index'"
)
with pytest.raises(TypeError, match=msg):
xp.resample("A").mean()
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_series(freq, empty_series, resample_method):
# GH12771 & GH12868
if resample_method == "ohlc":
pytest.skip("need to test for ohlc from GH13083")
s = empty_series
result = getattr(s.resample(freq), resample_method)()
expected = s.copy()
if isinstance(s.index, PeriodIndex):
expected.index = s.index.asfreq(freq=freq)
else:
expected.index = s.index._shallow_copy(freq=freq)
assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
assert_series_equal(result, expected, check_dtype=False)
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
# GH13212
df = empty_frame
# count retains dimensions too
result = getattr(df.resample(freq), resample_method)()
if resample_method != "size":
expected = df.copy()
else:
# GH14962
expected = Series([])
if isinstance(df.index, PeriodIndex):
expected.index = df.index.asfreq(freq=freq)
else:
expected.index = df.index._shallow_copy(freq=freq)
assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
assert_almost_equal(result, expected, check_dtype=False)
# test size for GH13212 (currently stays as df)
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample("d"), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass
@all_ts
def test_resample_loffset_arg_type(frame, create_index):
# GH 13218, 15002
df = frame
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D")
# loffset coerces PeriodIndex to DateTimeIndex
if isinstance(expected_index, PeriodIndex):
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({"value": expected_means}, index=expected_index)
for arg in ["mean", {"value": "mean"}, ["mean"]]:
result_agg = df.resample("2D", loffset="2H").agg(arg)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result_how = df.resample("2D", how=arg, loffset="2H")
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
if isinstance(expected.index, TimedeltaIndex):
msg = "DataFrame are different"
with pytest.raises(AssertionError, match=msg):
assert_frame_equal(result_agg, expected)
with pytest.raises(AssertionError, match=msg):
assert_frame_equal(result_how, expected)
else:
assert_frame_equal(result_agg, expected)
assert_frame_equal(result_how, expected)
@all_ts
def test_apply_to_empty_series(empty_series):
# GH 14313
s = empty_series
for freq in ["M", "D", "H"]:
result = s.resample(freq).apply(lambda x: 1)
expected = s.resample(freq).apply(np.sum)
assert_series_equal(result, expected, check_dtype=False)
@all_ts
def test_resampler_is_iterable(series):
# GH 15314
freq = "H"
tg = Grouper(freq=freq, convention="start")
grouped = series.groupby(tg)
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
assert_series_equal(rv, gv)
@all_ts
def test_resample_quantile(series):
# GH 15023
s = series
q = 0.75
freq = "H"
result = s.resample(freq).quantile(q)
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,880 @@
from datetime import datetime, timedelta
import dateutil
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
from pandas._libs.tslibs.period import IncompatibleFrequency
import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.base import InvalidIndexError
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import Period, PeriodIndex, period_range
from pandas.core.resample import _get_period_range_edges
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal,
assert_frame_equal,
assert_series_equal,
)
import pandas.tseries.offsets as offsets
@pytest.fixture()
def _index_factory():
return period_range
@pytest.fixture
def _series_name():
return "pi"
class TestPeriodIndex:
@pytest.mark.parametrize("freq", ["2D", "1H", "2H"])
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
def test_asfreq(self, series_and_frame, freq, kind):
# GH 12884, 15944
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
obj = series_and_frame
if kind == "timestamp":
expected = obj.to_timestamp().resample(freq).asfreq()
else:
start = obj.index[0].to_timestamp(how="start")
end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
new_index = date_range(start=start, end=end, freq=freq, closed="left")
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
result = obj.resample(freq, kind=kind).asfreq()
assert_almost_equal(result, expected)
def test_asfreq_fill_value(self, series):
# test for fill value during resampling, issue 3715
s = series
new_index = date_range(
s.index[0].to_timestamp(how="start"),
(s.index[-1]).to_timestamp(how="start"),
freq="1H",
)
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0)
assert_series_equal(result, expected)
frame = s.to_frame("value")
new_index = date_range(
frame.index[0].to_timestamp(how="start"),
(frame.index[-1]).to_timestamp(how="start"),
freq="1H",
)
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0)
assert_frame_equal(result, expected)
@pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"])
@pytest.mark.parametrize("kind", [None, "period", "timestamp"])
@pytest.mark.parametrize("kwargs", [dict(on="date"), dict(level="d")])
def test_selection(self, index, freq, kind, kwargs):
# This is a bug, these should be implemented
# GH 14008
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame(
{"date": index, "a": rng},
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
)
msg = (
"Resampling from level= or on= selection with a PeriodIndex is"
r" not currently supported, use \.set_index\(\.\.\.\) to"
" explicitly set index"
)
with pytest.raises(NotImplementedError, match=msg):
df.resample(freq, kind=kind, **kwargs)
@pytest.mark.parametrize("month", MONTHS)
@pytest.mark.parametrize("meth", ["ffill", "bfill"])
@pytest.mark.parametrize("conv", ["start", "end"])
@pytest.mark.parametrize("targ", ["D", "B", "M"])
def test_annual_upsample_cases(
self, targ, conv, meth, month, simple_period_range_series
):
ts = simple_period_range_series(
"1/1/1990", "12/31/1991", freq="A-{month}".format(month=month)
)
result = getattr(ts.resample(targ, convention=conv), meth)()
expected = result.to_timestamp(targ, how=conv)
expected = expected.asfreq(targ, meth).to_period()
assert_series_equal(result, expected)
def test_basic_downsample(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
result = ts.resample("a-dec").mean()
expected = ts.groupby(ts.index.year).mean()
expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec")
assert_series_equal(result, expected)
# this is ok
assert_series_equal(ts.resample("a-dec").mean(), result)
assert_series_equal(ts.resample("a").mean(), result)
@pytest.mark.parametrize(
"rule,expected_error_msg",
[
("a-dec", "<YearEnd: month=12>"),
("q-mar", "<QuarterEnd: startingMonth=3>"),
("M", "<MonthEnd>"),
("w-thu", "<Week: weekday=3>"),
],
)
def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
# These are incompatible period rules for resampling
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
msg = (
"Frequency <Week: weekday=2> cannot be resampled to {}, as they"
" are not sub or super periods"
).format(expected_error_msg)
with pytest.raises(IncompatibleFrequency, match=msg):
ts.resample(rule).mean()
@pytest.mark.parametrize("freq", ["D", "2D"])
def test_basic_upsample(self, freq, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
result = ts.resample("a-dec").mean()
resampled = result.resample(freq, convention="end").ffill()
expected = result.to_timestamp(freq, how="end")
expected = expected.asfreq(freq, "ffill").to_period(freq)
assert_series_equal(resampled, expected)
def test_upsample_with_limit(self):
rng = period_range("1/1/2000", periods=5, freq="A")
ts = Series(np.random.randn(len(rng)), rng)
result = ts.resample("M", convention="end").ffill(limit=2)
expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
assert_series_equal(result, expected)
def test_annual_upsample(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
df = DataFrame({"a": ts})
rdf = df.resample("D").ffill()
exp = df["a"].resample("D").ffill()
assert_series_equal(rdf["a"], exp)
rng = period_range("2000", "2003", freq="A-DEC")
ts = Series([1, 2, 3, 4], index=rng)
result = ts.resample("M").ffill()
ex_index = period_range("2000-01", "2003-12", freq="M")
expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
assert_series_equal(result, expected)
@pytest.mark.parametrize("month", MONTHS)
@pytest.mark.parametrize("target", ["D", "B", "M"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_quarterly_upsample(
self, month, target, convention, simple_period_range_series
):
freq = "Q-{month}".format(month=month)
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
assert_series_equal(result, expected)
@pytest.mark.parametrize("target", ["D", "B"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_monthly_upsample(self, target, convention, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
assert_series_equal(result, expected)
def test_resample_basic(self):
# GH3609
s = Series(
range(100),
index=date_range("20130101", freq="s", periods=100, name="idx"),
dtype="float",
)
s[10:30] = np.nan
index = PeriodIndex(
[Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
name="idx",
)
expected = Series([34.5, 79.5], index=index)
result = s.to_period().resample("T", kind="period").mean()
assert_series_equal(result, expected)
result2 = s.resample("T", kind="period").mean()
assert_series_equal(result2, expected)
@pytest.mark.parametrize(
"freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
)
def test_resample_count(self, freq, expected_vals):
# GH12774
series = Series(1, index=pd.period_range(start="2000", periods=100))
result = series.resample(freq).count()
expected_index = pd.period_range(
start="2000", freq=freq, periods=len(expected_vals)
)
expected = Series(expected_vals, index=expected_index)
assert_series_equal(result, expected)
def test_resample_same_freq(self, resample_method):
# GH12770
series = Series(
range(3), index=pd.period_range(start="2000", periods=3, freq="M")
)
expected = series
result = getattr(series.resample("M"), resample_method)()
assert_series_equal(result, expected)
def test_resample_incompat_freq(self):
msg = (
"Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>,"
" as they are not sub or super periods"
)
with pytest.raises(IncompatibleFrequency, match=msg):
Series(
range(3), index=pd.period_range(start="2000", periods=3, freq="M")
).resample("W").mean()
def test_with_local_timezone_pytz(self):
# see gh-5430
local_timezone = pytz.timezone("America/Los_Angeles")
start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
# 1 day later
end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
index = pd.date_range(start, end, freq="H")
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = pd.period_range(start=start, end=end, freq="D") - offsets.Day()
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)
def test_resample_with_pytz(self):
# GH 13238
s = Series(
2, index=pd.date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern")
)
result = s.resample("D").mean()
expected = Series(
2, index=pd.DatetimeIndex(["2017-01-01", "2017-01-02"], tz="US/Eastern")
)
assert_series_equal(result, expected)
# Especially assert that the timezone is LMT for pytz
assert result.index.tz == pytz.timezone("US/Eastern")
def test_with_local_timezone_dateutil(self):
# see gh-5430
local_timezone = "dateutil/America/Los_Angeles"
start = datetime(
year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)
# 1 day later
end = datetime(
year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)
index = pd.date_range(start, end, freq="H", name="idx")
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
pd.period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)
def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T")
s = Series(np.zeros(len(index)), index=index)
expected = s.tz_localize("US/Pacific")
result = expected.resample("900S").mean()
tm.assert_series_equal(result, expected)
# GH 23742
index = date_range(start="2017-10-10", end="2017-10-20", freq="1H")
index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
df = DataFrame(data=list(range(len(index))), index=index)
result = df.groupby(pd.Grouper(freq="1D")).count()
expected = date_range(
start="2017-10-09",
end="2017-10-20",
freq="D",
tz="America/Sao_Paulo",
nonexistent="shift_forward",
closed="left",
)
tm.assert_index_equal(result.index, expected)
def test_resample_ambiguous_time_bin_edge(self):
# GH 10117
idx = pd.date_range(
"2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London"
)
expected = Series(np.zeros(len(idx)), index=idx)
result = expected.resample("30T").mean()
tm.assert_series_equal(result, expected)
def test_fill_method_and_how_upsample(self):
# GH2073
s = Series(
np.arange(9, dtype="int64"),
index=date_range("2010-01-01", periods=9, freq="Q"),
)
last = s.resample("M").ffill()
both = s.resample("M").ffill().resample("M").last().astype("int64")
assert_series_equal(last, both)
@pytest.mark.parametrize("day", DAYS)
@pytest.mark.parametrize("target", ["D", "B"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
freq = "W-{day}".format(day=day)
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
assert_series_equal(result, expected)
def test_resample_to_timestamps(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
result = ts.resample("A-DEC", kind="timestamp").mean()
expected = ts.to_timestamp(how="start").resample("A-DEC").mean()
assert_series_equal(result, expected)
def test_resample_to_quarterly(self, simple_period_range_series):
for month in MONTHS:
ts = simple_period_range_series(
"1990", "1992", freq="A-{month}".format(month=month)
)
quar_ts = ts.resample("Q-{month}".format(month=month)).ffill()
stamps = ts.to_timestamp("D", how="start")
qdates = period_range(
ts.index[0].asfreq("D", "start"),
ts.index[-1].asfreq("D", "end"),
freq="Q-{month}".format(month=month),
)
expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
expected.index = qdates
assert_series_equal(quar_ts, expected)
# conforms, but different month
ts = simple_period_range_series("1990", "1992", freq="A-JUN")
for how in ["start", "end"]:
result = ts.resample("Q-MAR", convention=how).ffill()
expected = ts.asfreq("Q-MAR", how=how)
expected = expected.reindex(result.index, method="ffill")
# .to_timestamp('D')
# expected = expected.resample('Q-MAR').ffill()
assert_series_equal(result, expected)
def test_resample_fill_missing(self):
rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A")
s = Series(np.random.randn(4), index=rng)
stamps = s.to_timestamp()
filled = s.resample("A").ffill()
expected = stamps.resample("A").ffill().to_period("A")
assert_series_equal(filled, expected)
def test_cant_fill_missing_dups(self):
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A")
s = Series(np.random.randn(5), index=rng)
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
s.resample("A").ffill()
@pytest.mark.parametrize("freq", ["5min"])
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
def test_resample_5minute(self, freq, kind):
rng = period_range("1/1/2000", "1/5/2000", freq="T")
ts = Series(np.random.randn(len(rng)), index=rng)
expected = ts.to_timestamp().resample(freq).mean()
if kind != "timestamp":
expected = expected.to_period(freq)
result = ts.resample(freq, kind=kind).mean()
assert_series_equal(result, expected)
def test_upsample_daily_business_daily(self, simple_period_range_series):
ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
result = ts.resample("D").asfreq()
expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
assert_series_equal(result, expected)
ts = simple_period_range_series("1/1/2000", "2/1/2000")
result = ts.resample("H", convention="s").asfreq()
exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H")
expected = ts.asfreq("H", how="s").reindex(exp_rng)
assert_series_equal(result, expected)
def test_resample_irregular_sparse(self):
dr = date_range(start="1/1/2012", freq="5min", periods=1000)
s = Series(np.array(100), index=dr)
# subset the data.
subset = s[:"2012-01-04 06:55"]
result = subset.resample("10min").apply(len)
expected = s.resample("10min").apply(len).loc[result.index]
assert_series_equal(result, expected)
def test_resample_weekly_all_na(self):
rng = date_range("1/1/2000", periods=10, freq="W-WED")
ts = Series(np.random.randn(len(rng)), index=rng)
result = ts.resample("W-THU").asfreq()
assert result.isna().all()
result = ts.resample("W-THU").asfreq().ffill()[:-1]
expected = ts.asfreq("W-THU").ffill()
assert_series_equal(result, expected)
def test_resample_tz_localized(self):
dr = date_range(start="2012-4-13", end="2012-5-1")
ts = Series(range(len(dr)), index=dr)
ts_utc = ts.tz_localize("UTC")
ts_local = ts_utc.tz_convert("America/Los_Angeles")
result = ts_local.resample("W").mean()
ts_local_naive = ts_local.copy()
ts_local_naive.index = [
x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()
]
exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
assert_series_equal(result, exp)
# it works
result = ts_local.resample("D").mean()
# #2245
idx = date_range(
"2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney"
)
s = Series([1, 2], index=idx)
result = s.resample("D", closed="right", label="right").mean()
ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
expected = Series([1.5], index=ex_index)
assert_series_equal(result, expected)
# for good measure
result = s.resample("D", kind="period").mean()
ex_index = period_range("2001-09-20", periods=1, freq="D")
expected = Series([1.5], index=ex_index)
assert_series_equal(result, expected)
# GH 6397
# comparing an offset that doesn't propagate tz's
rng = date_range("1/1/2011", periods=20000, freq="H")
rng = rng.tz_localize("EST")
ts = DataFrame(index=rng)
ts["first"] = np.random.randn(len(rng))
ts["second"] = np.cumsum(np.random.randn(len(rng)))
expected = DataFrame(
{
"first": ts.resample("A").sum()["first"],
"second": ts.resample("A").mean()["second"],
},
columns=["first", "second"],
)
result = (
ts.resample("A")
.agg({"first": np.sum, "second": np.mean})
.reindex(columns=["first", "second"])
)
assert_frame_equal(result, expected)
def test_closed_left_corner(self):
# #1465
s = Series(
np.random.randn(21),
index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
)
s[0] = np.nan
result = s.resample("10min", closed="left", label="right").mean()
exp = s[1:].resample("10min", closed="left", label="right").mean()
assert_series_equal(result, exp)
result = s.resample("10min", closed="left", label="left").mean()
exp = s[1:].resample("10min", closed="left", label="left").mean()
ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
tm.assert_index_equal(result.index, ex_index)
assert_series_equal(result, exp)
def test_quarterly_resampling(self):
rng = period_range("2000Q1", periods=10, freq="Q-DEC")
ts = Series(np.arange(10), index=rng)
result = ts.resample("A").mean()
exp = ts.to_timestamp().resample("A").mean().to_period()
assert_series_equal(result, exp)
def test_resample_weekly_bug_1726(self):
# 8/6/12 is a Monday
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
n = len(ind)
data = [[x] * 5 for x in range(n)]
df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
# it works!
df.resample("W-MON", closed="left", label="left").first()
def test_resample_with_dst_time_change(self):
# GH 15549
index = (
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
.tz_localize("UTC")
.tz_convert("America/Chicago")
)
df = pd.DataFrame([1, 2], index=index)
result = df.resample("12h", closed="right", label="right").last().ffill()
expected_index_values = [
"2016-03-09 12:00:00-06:00",
"2016-03-10 00:00:00-06:00",
"2016-03-10 12:00:00-06:00",
"2016-03-11 00:00:00-06:00",
"2016-03-11 12:00:00-06:00",
"2016-03-12 00:00:00-06:00",
"2016-03-12 12:00:00-06:00",
"2016-03-13 00:00:00-06:00",
"2016-03-13 13:00:00-05:00",
"2016-03-14 01:00:00-05:00",
"2016-03-14 13:00:00-05:00",
"2016-03-15 01:00:00-05:00",
"2016-03-15 13:00:00-05:00",
]
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
"America/Chicago"
)
expected = pd.DataFrame(
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
index=index,
)
assert_frame_equal(result, expected)
def test_resample_bms_2752(self):
# GH2753
foo = Series(index=pd.bdate_range("20000101", "20000201"))
res1 = foo.resample("BMS").mean()
res2 = foo.resample("BMS").mean().resample("B").mean()
assert res1.index[0] == Timestamp("20000103")
assert res1.index[0] == res2.index[0]
# def test_monthly_convention_span(self):
# rng = period_range('2000-01', periods=3, freq='M')
# ts = Series(np.arange(3), index=rng)
# # hacky way to get same thing
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
# expected = ts.asfreq('D', how='end').reindex(exp_index)
# expected = expected.fillna(method='bfill')
# result = ts.resample('D', convention='span').mean()
# assert_series_equal(result, expected)
def test_default_right_closed_label(self):
end_freq = ["D", "Q", "M", "D"]
end_types = ["M", "A", "Q", "W"]
for from_freq, to_freq in zip(end_freq, end_types):
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
assert_frame_equal(
resampled, df.resample(to_freq, closed="right", label="right").mean()
)
def test_default_left_closed_label(self):
others = ["MS", "AS", "QS", "D", "H"]
others_freq = ["D", "Q", "M", "H", "T"]
for from_freq, to_freq in zip(others_freq, others):
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
assert_frame_equal(
resampled, df.resample(to_freq, closed="left", label="left").mean()
)
def test_all_values_single_bin(self):
# 2070
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
s = Series(np.random.randn(len(index)), index=index)
result = s.resample("A").mean()
tm.assert_almost_equal(result[0], s.mean())
def test_evenly_divisible_with_no_extra_bins(self):
# 4076
# when the frequency is evenly divisible, sometimes extra bins
df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9))
result = df.resample("5D").mean()
expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
expected.index = [Timestamp("2000-1-1"), Timestamp("2000-1-6")]
assert_frame_equal(result, expected)
index = date_range(start="2001-5-4", periods=28)
df = DataFrame(
[
{
"REST_KEY": 1,
"DLY_TRN_QT": 80,
"DLY_SLS_AMT": 90,
"COOP_DLY_TRN_QT": 30,
"COOP_DLY_SLS_AMT": 20,
}
]
* 28
+ [
{
"REST_KEY": 2,
"DLY_TRN_QT": 70,
"DLY_SLS_AMT": 10,
"COOP_DLY_TRN_QT": 50,
"COOP_DLY_SLS_AMT": 20,
}
]
* 28,
index=index.append(index),
).sort_index()
index = date_range("2001-5-4", periods=4, freq="7D")
expected = DataFrame(
[
{
"REST_KEY": 14,
"DLY_TRN_QT": 14,
"DLY_SLS_AMT": 14,
"COOP_DLY_TRN_QT": 14,
"COOP_DLY_SLS_AMT": 14,
}
]
* 4,
index=index,
)
result = df.resample("7D").count()
assert_frame_equal(result, expected)
expected = DataFrame(
[
{
"REST_KEY": 21,
"DLY_TRN_QT": 1050,
"DLY_SLS_AMT": 700,
"COOP_DLY_TRN_QT": 560,
"COOP_DLY_SLS_AMT": 280,
}
]
* 4,
index=index,
)
result = df.resample("7D").sum()
assert_frame_equal(result, expected)
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]])
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
# make sure passing loffset returns DatetimeIndex in all cases
# basic method taken from Base.test_resample_loffset_arg_type()
df = frame
expected_means = [
df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)
]
expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D")
# loffset coerces PeriodIndex to DateTimeIndex
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({"value": expected_means}, index=expected_index)
result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result_how = df.resample("2D", how=agg_arg, loffset="2H", kind=kind)
if isinstance(agg_arg, list):
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
assert_frame_equal(result_agg, expected)
assert_frame_equal(result_how, expected)
@pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)])
@pytest.mark.parametrize("kind", [None, "period"])
def test_upsampling_ohlc(self, freq, period_mult, kind):
# GH 13083
pi = period_range(start="2000", freq="D", periods=10)
s = Series(range(len(pi)), index=pi)
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
# timestamp-based resampling doesn't include all sub-periods
# of the last original period, so extend accordingly:
new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
expected = expected.reindex(new_index)
result = s.resample(freq, kind=kind).ohlc()
assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"periods, values",
[
(
[
pd.NaT,
"1970-01-01 00:00:00",
pd.NaT,
"1970-01-01 00:00:02",
"1970-01-01 00:00:03",
],
[2, 3, 5, 7, 11],
),
(
[
pd.NaT,
pd.NaT,
"1970-01-01 00:00:00",
pd.NaT,
pd.NaT,
pd.NaT,
"1970-01-01 00:00:02",
"1970-01-01 00:00:03",
pd.NaT,
pd.NaT,
],
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
),
],
)
@pytest.mark.parametrize(
"freq, expected_values",
[
("1s", [3, np.NaN, 7, 11]),
("2s", [3, int((7 + 11) / 2)]),
("3s", [int((3 + 7) / 2), 11]),
],
)
def test_resample_with_nat(self, periods, values, freq, expected_values):
# GH 13224
index = PeriodIndex(periods, freq="S")
frame = DataFrame(values, index=index)
expected_index = period_range(
"1970-01-01 00:00:00", periods=len(expected_values), freq=freq
)
expected = DataFrame(expected_values, index=expected_index)
result = frame.resample(freq).mean()
assert_frame_equal(result, expected)
def test_resample_with_only_nat(self):
# GH 13224
pi = PeriodIndex([pd.NaT] * 3, freq="S")
frame = DataFrame([2, 3, 5], index=pi)
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame(index=expected_index)
result = frame.resample("1s").mean()
assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"start,end,start_freq,end_freq,base",
[
("19910905", "19910909 03:00", "H", "24H", 10),
("19910905", "19910909 12:00", "H", "24H", 10),
("19910905", "19910909 23:00", "H", "24H", 10),
("19910905 10:00", "19910909", "H", "24H", 10),
("19910905 10:00", "19910909 10:00", "H", "24H", 10),
("19910905", "19910909 10:00", "H", "24H", 10),
("19910905 12:00", "19910909", "H", "24H", 10),
("19910905 12:00", "19910909 03:00", "H", "24H", 10),
("19910905 12:00", "19910909 12:00", "H", "24H", 10),
("19910905 12:00", "19910909 12:00", "H", "24H", 34),
("19910905 12:00", "19910909 12:00", "H", "17H", 10),
("19910905 12:00", "19910909 12:00", "H", "17H", 3),
("19910905 12:00", "19910909 1:00", "H", "M", 3),
("19910905", "19910913 06:00", "2H", "24H", 10),
("19910905", "19910905 01:39", "Min", "5Min", 3),
("19910905", "19910905 03:18", "2Min", "5Min", 3),
],
)
def test_resample_with_non_zero_base(self, start, end, start_freq, end_freq, base):
# GH 23882
s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq))
s = s + np.arange(len(s))
result = s.resample(end_freq, base=base).mean()
result = result.to_timestamp(end_freq)
# to_timestamp casts 24H -> D
result = result.asfreq(end_freq) if end_freq == "24H" else result
expected = s.to_timestamp().resample(end_freq, base=base).mean()
assert_series_equal(result, expected)
@pytest.mark.parametrize(
"first,last,offset,exp_first,exp_last",
[
("19910905", "19920406", "D", "19910905", "19920406"),
("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"),
(
"19910905 06:00",
"19920406 06:00",
"H",
"19910905 06:00",
"19920406 06:00",
),
("19910906", "19920406", "M", "1991-09", "1992-04"),
("19910831", "19920430", "M", "1991-08", "1992-04"),
("1991-08", "1992-04", "M", "1991-08", "1992-04"),
],
)
def test_get_period_range_edges(self, first, last, offset, exp_first, exp_last):
first = pd.Period(first)
last = pd.Period(last)
exp_first = pd.Period(exp_first, freq=offset)
exp_last = pd.Period(exp_last, freq=offset)
offset = pd.tseries.frequencies.to_offset(offset)
result = _get_period_range_edges(first, last, offset)
expected = (exp_first, exp_last)
assert result == expected

View File

@@ -0,0 +1,567 @@
from collections import OrderedDict
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
test_series = Series(np.random.rand(len(dti)), dti)
_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
@pytest.fixture
def test_frame():
return _test_frame.copy()
def test_str():
r = test_series.resample("H")
assert (
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
"label=left, convention=start, base=0]" in str(r)
)
def test_api():
r = test_series.resample("H")
result = r.mean()
assert isinstance(result, Series)
assert len(result) == 217
r = test_series.to_frame().resample("H")
result = r.mean()
assert isinstance(result, DataFrame)
assert len(result) == 217
def test_groupby_resample_api():
# GH 12448
# .groupby(...).resample(...) hitting warnings
# when appropriate
df = DataFrame(
{
"date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
# replication step
i = (
pd.date_range("2016-01-03", periods=8).tolist()
+ pd.date_range("2016-01-17", periods=8).tolist()
)
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
assert_frame_equal(result, expected)
def test_groupby_resample_on_api():
# GH 15021
# .groupby(...).resample(on=...) results in an unexpected
# keyword warning.
df = DataFrame(
{
"key": ["A", "B"] * 5,
"dates": pd.date_range("2016-01-01", periods=10),
"values": np.random.randn(10),
}
)
expected = df.set_index("dates").groupby("key").resample("D").mean()
result = df.groupby("key").resample("D", on="dates").mean()
assert_frame_equal(result, expected)
def test_pipe(test_frame):
# GH17905
# series
r = test_series.resample("H")
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_series_equal(result, expected)
# dataframe
r = test_frame.resample("H")
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)
def test_getitem(test_frame):
r = test_frame.resample("H")
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
r = test_frame.resample("H")["B"]
assert r._selected_obj.name == test_frame.columns[1]
# technically this is allowed
r = test_frame.resample("H")["A", "B"]
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
r = test_frame.resample("H")["A", "B"]
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
@pytest.mark.parametrize("key", [["D"], ["A", "D"]])
def test_select_bad_cols(key, test_frame):
g = test_frame.resample("H")
# 'A' should not be referenced as a bad column...
# will have to rethink regex if you change message!
msg = r"^\"Columns not found: 'D'\"$"
with pytest.raises(KeyError, match=msg):
g[key]
def test_attribute_access(test_frame):
r = test_frame.resample("H")
tm.assert_series_equal(r.A.sum(), r["A"].sum())
def test_api_compat_before_use():
# make sure that we are setting the binner
# on these attributes
for attr in ["groups", "ngroups", "indices"]:
rng = pd.date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng)), index=rng)
rs = ts.resample("30s")
# before use
getattr(rs, attr)
# after grouper is initialized is ok
rs.mean()
getattr(rs, attr)
def tests_skip_nuisance(test_frame):
df = test_frame
df["D"] = "foo"
r = df.resample("H")
result = r[["A", "B"]].sum()
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
assert_frame_equal(result, expected)
expected = r[["A", "B", "C"]].sum()
result = r.sum()
assert_frame_equal(result, expected)
def test_downsample_but_actually_upsampling():
# this is reindex / asfreq
rng = pd.date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
result = ts.resample("20s").asfreq()
expected = Series(
[0, 20, 40, 60, 80],
index=pd.date_range("2012-01-01 00:00:00", freq="20s", periods=5),
)
assert_series_equal(result, expected)
def test_combined_up_downsampling_of_irregular():
# since we are reallydoing an operation like this
# ts2.resample('2s').mean().ffill()
# preserve these semantics
rng = pd.date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng)), index=rng)
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ts2.resample("2s", how="mean", fill_method="ffill")
expected = ts2.resample("2s").mean().ffill()
assert_series_equal(result, expected)
def test_transform():
r = test_series.resample("20min")
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
result = r.transform("mean")
assert_series_equal(result, expected)
def test_fillna():
# need to upsample here
rng = pd.date_range("1/1/2012", periods=10, freq="2S")
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
r = ts.resample("s")
expected = r.ffill()
result = r.fillna(method="ffill")
assert_series_equal(result, expected)
expected = r.bfill()
result = r.fillna(method="bfill")
assert_series_equal(result, expected)
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\) or nearest\. Got 0"
)
with pytest.raises(ValueError, match=msg):
r.fillna(0)
def test_apply_without_aggregation():
# both resample and groupby should work w/o aggregation
r = test_series.resample("20min")
g = test_series.groupby(pd.Grouper(freq="20min"))
for t in [g, r]:
result = t.apply(lambda x: x)
assert_series_equal(result, test_series)
def test_agg_consistency():
# make sure that we are consistent across
# similar aggregations with and w/o selection list
df = DataFrame(
np.random.randn(1000, 3),
index=pd.date_range("1/1/2012", freq="S", periods=1000),
columns=["A", "B", "C"],
)
r = df.resample("3T")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
expected = r[["A", "B", "C"]].agg({"r1": "mean", "r2": "sum"})
result = r.agg({"r1": "mean", "r2": "sum"})
assert_frame_equal(result, expected, check_like=True)
# TODO: once GH 14008 is fixed, move these tests into
# `Base` test class
def test_agg():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
a_mean = r["A"].mean()
a_std = r["A"].std()
a_sum = r["A"].sum()
b_mean = r["B"].mean()
b_std = r["B"].std()
b_sum = r["B"].sum()
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
for t in cases:
result = t.aggregate([np.mean, np.std])
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, b_std], axis=1)
for t in cases:
result = t.aggregate({"A": np.mean, "B": np.std})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
for t in cases:
result = t.aggregate({"A": ["mean", "std"]})
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, a_sum], axis=1)
expected.columns = ["mean", "sum"]
for t in cases:
result = t["A"].aggregate(["mean", "sum"])
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, a_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "sum")])
for t in cases:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t.aggregate({"A": {"mean": "mean", "sum": "sum"}})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
)
for t in cases:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t.aggregate(
{
"A": {"mean": "mean", "sum": "sum"},
"B": {"mean2": "mean", "sum2": "sum"},
}
)
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
)
for t in cases:
result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[
("r1", "A", "mean"),
("r1", "A", "sum"),
("r2", "B", "mean"),
("r2", "B", "sum"),
]
)
def test_agg_misc():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
# passed lambda
for t in cases:
result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
expected = pd.concat([r["A"].sum(), rcustom], axis=1)
assert_frame_equal(result, expected, check_like=True)
# agg with renamers
expected = pd.concat(
[t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
)
for t in cases:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t[["A", "B"]].agg(
OrderedDict([("result1", np.sum), ("result2", np.mean)])
)
assert_frame_equal(result, expected, check_like=True)
# agg with different hows
expected = pd.concat(
[t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
)
for t in cases:
result = t.agg(OrderedDict([("A", ["sum", "std"]), ("B", ["mean", "std"])]))
assert_frame_equal(result, expected, check_like=True)
# equivalent of using a selection list / or not
for t in cases:
result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
assert_frame_equal(result, expected, check_like=True)
# series like aggs
for t in cases:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t["A"].agg({"A": ["sum", "std"]})
expected = pd.concat([t["A"].sum(), t["A"].std()], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "std")])
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat(
[t["A"].agg(["sum", "std"]), t["A"].agg(["mean", "std"])], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
assert_frame_equal(result, expected, check_like=True)
# errors
# invalid names in the agg specification
msg = "\"Column 'B' does not exist!\""
for t in cases:
with pytest.raises(KeyError, match=msg):
t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
def test_agg_nested_dicts():
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
msg = r"cannot perform renaming for r(1|2) with a nested dictionary"
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
for t in cases:
expected = pd.concat(
[t["A"].mean(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t[["A", "B"]].agg(
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
)
assert_frame_equal(result, expected, check_like=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
assert_frame_equal(result, expected, check_like=True)
def test_try_aggregate_non_existing_column():
# GH 16766
data = [
{"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
{"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
{"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
]
df = DataFrame(data).set_index("dt")
# Error as we don't have 'z' column
msg = "\"Column 'z' does not exist!\""
with pytest.raises(KeyError, match=msg):
df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
def test_selection_api_validation():
# GH 13500
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame(
{"date": index, "a": rng},
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
)
df_exp = DataFrame({"a": rng}, index=index)
# non DatetimeIndex
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex,"
" but got an instance of 'Int64Index'"
)
with pytest.raises(TypeError, match=msg):
df.resample("2D", level="v")
msg = "The Grouper cannot specify both a key and a level!"
with pytest.raises(ValueError, match=msg):
df.resample("2D", on="date", level="d")
msg = "unhashable type: 'list'"
with pytest.raises(TypeError, match=msg):
df.resample("2D", on=["a", "date"])
msg = r"\"Level \['a', 'date'\] not found\""
with pytest.raises(KeyError, match=msg):
df.resample("2D", level=["a", "date"])
# upsampling not allowed
msg = (
"Upsampling from level= or on= selection is not supported, use"
r" \.set_index\(\.\.\.\) to explicitly set index to datetime-like"
)
with pytest.raises(ValueError, match=msg):
df.resample("2D", level="d").asfreq()
with pytest.raises(ValueError, match=msg):
df.resample("2D", on="date").asfreq()
exp = df_exp.resample("2D").sum()
exp.index.name = "date"
assert_frame_equal(exp, df.resample("2D", on="date").sum())
exp.index.name = "d"
assert_frame_equal(exp, df.resample("2D", level="d").sum())
@pytest.mark.parametrize(
"col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
)
def test_agg_with_datetime_index_list_agg_func(col_name):
# GH 22660
# The parametrized column names would get converted to dates by our
# date parser. Some would result in OutOfBoundsError (ValueError) while
# others would result in OverflowError when passed into Timestamp.
# We catch these errors and move on to the correct branch.
df = pd.DataFrame(
list(range(200)),
index=pd.date_range(
start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
),
columns=[col_name],
)
result = df.resample("1d").aggregate(["mean"])
expected = pd.DataFrame(
[47.5, 143.5, 195.5],
index=pd.date_range(
start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
),
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
)
assert_frame_equal(result, expected)

View File

@@ -0,0 +1,278 @@
from textwrap import dedent
import numpy as np
import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
test_frame = DataFrame(
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
index=date_range("1/1/2000", freq="s", periods=40),
)
def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent(
"""\
import pandas.util.testing as tm
s = tm.makeTimeSeries()
rs = s.resample("D")
"""
)
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("rs.", 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [
["2010-01-01", "A", 2],
["2010-01-02", "A", 3],
["2010-01-05", "A", 8],
["2010-01-10", "A", 7],
["2010-01-13", "A", 3],
["2010-01-01", "B", 5],
["2010-01-03", "B", 2],
["2010-01-04", "B", 1],
["2010-01-11", "B", 7],
["2010-01-14", "B", 3],
]
df = DataFrame(data, columns=["date", "id", "score"])
df.date = pd.to_datetime(df.date)
def f(x):
return x.set_index("date").resample("D").asfreq()
expected = df.groupby("id").apply(f)
result = df.set_index("date").groupby("id").resample("D").asfreq()
assert_frame_equal(result, expected)
df = DataFrame(
{
"date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
def f(x):
return x.resample("1D").ffill()
expected = df.groupby("group").apply(f)
result = df.groupby("group").resample("1D").ffill()
assert_frame_equal(result, expected)
def test_getitem():
g = test_frame.groupby("A")
expected = g.B.apply(lambda x: x.resample("2s").mean())
result = g.resample("2s").B.mean()
assert_series_equal(result, expected)
result = g.B.resample("2s").mean()
assert_series_equal(result, expected)
result = g.resample("2s").mean().B
assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
df = DataFrame(data, index=pd.date_range("2016-01-01", periods=2))
r = df.groupby("id").resample("1D")
result = r["buyer"].count()
expected = Series(
[1, 1],
index=pd.MultiIndex.from_tuples(
[(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
names=["id", None],
),
name="buyer",
)
assert_series_equal(result, expected)
result = r["buyer"].count()
assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = pd.DataFrame(
{"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1}
)
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
result = df.groupby("id").resample("2D", on="date")["data"].sum()
assert_series_equal(result, exp)
def test_nearest():
# GH 17496
# Resample nearest
index = pd.date_range("1/1/2000", periods=3, freq="T")
result = Series(range(3), index=index).resample("20s").nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:20",
"2000-01-01 00:00:40",
"2000-01-01 00:01:00",
"2000-01-01 00:01:20",
"2000-01-01 00:01:40",
"2000-01-01 00:02:00",
],
dtype="datetime64[ns]",
freq="20S",
),
)
assert_series_equal(result, expected)
def test_methods():
g = test_frame.groupby("A")
r = g.resample("2s")
for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
for f in ["size"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_series_equal(result, expected)
for f in ["count"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
# series only
for f in ["nunique"]:
result = getattr(r.B, f)()
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
assert_series_equal(result, expected)
for f in ["nearest", "backfill", "ffill", "asfreq"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
assert_frame_equal(result, expected)
result = r.ohlc()
expected = g.apply(lambda x: x.resample("2s").ohlc())
assert_frame_equal(result, expected)
for f in ["std", "var"]:
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
assert_frame_equal(result, expected)
def test_apply():
g = test_frame.groupby("A")
r = g.resample("2s")
# reduction
expected = g.resample("2s").sum()
def f(x):
return x.resample("2s").sum()
result = r.apply(f)
assert_frame_equal(result, expected)
def f(x):
return x.resample("2s").apply(lambda y: y.sum())
result = g.apply(f)
assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = pd.date_range("1-1-2015", "12-31-15", freq="D")
df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
def f(x):
s = Series([1, 2], index=["a", "b"])
return s
expected = df.groupby(pd.Grouper(freq="M")).apply(f)
result = df.resample("M").apply(f)
assert_frame_equal(result, expected)
# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
result = df["col1"].resample("M").apply(f)
assert_series_equal(result, expected)
def test_resample_groupby_with_label():
# GH 13235
index = date_range("2000-01-01", freq="2D", periods=5)
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
result = df.groupby("col0").resample("1W", label="left").sum()
mi = [
np.array([0, 0, 1, 2]),
pd.to_datetime(
np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
),
]
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
expected = DataFrame(
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
)
assert_frame_equal(result, expected)
def test_consistency_with_window():
# consistent return values with window
df = test_frame
expected = pd.Int64Index([1, 2, 3], name="A")
result = df.groupby("A").resample("2s").mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby("A").rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(
np.random.randn(20, 3),
columns=list("aaa"),
index=pd.date_range("2012-01-01", periods=20, freq="s"),
)
df2 = df.copy()
df2.columns = ["a", "b", "c"]
expected = df2.resample("5s").median()
result = df.resample("5s").median()
expected.columns = result.columns
assert_frame_equal(result, expected)

View File

@@ -0,0 +1,279 @@
from datetime import datetime
from operator import methodcaller
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
def test_apply():
grouper = Grouper(freq="A", label="right", closed="right")
grouped = test_series.groupby(grouper)
def f(x):
return x.sort_values()[-3:]
applied = grouped.apply(f)
expected = test_series.groupby(lambda x: x.year).apply(f)
applied.index = applied.index.droplevel(0)
expected.index = expected.index.droplevel(0)
assert_series_equal(applied, expected)
def test_count():
test_series[::3] = np.nan
expected = test_series.groupby(lambda x: x.year).count()
grouper = Grouper(freq="A", label="right", closed="right")
result = test_series.groupby(grouper).count()
expected.index = result.index
assert_series_equal(result, expected)
result = test_series.resample("A").count()
expected.index = result.index
assert_series_equal(result, expected)
def test_numpy_reduction():
result = test_series.resample("A", closed="right").prod()
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
expected.index = result.index
assert_series_equal(result, expected)
def test_apply_iteration():
# #2300
N = 1000
ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
df = DataFrame({"open": 1, "close": 2}, index=ind)
tg = Grouper(freq="M")
_, grouper, _ = tg._get_grouper(df)
# Errors
grouped = df.groupby(grouper, group_keys=False)
def f(df):
return df["close"] / df["open"]
# it works!
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
@pytest.mark.parametrize(
"name, func",
[
("Int64Index", tm.makeIntIndex),
("Index", tm.makeUnicodeIndex),
("Float64Index", tm.makeFloatIndex),
("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
],
)
def test_fails_on_no_datetime_index(name, func):
n = 2
index = func(n)
df = DataFrame({"a": np.random.randn(n)}, index=index)
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex "
"or PeriodIndex, but got an instance of '{}'".format(name)
)
with pytest.raises(TypeError, match=msg):
df.groupby(Grouper(freq="D"))
def test_aaa_group_order():
# GH 12840
# check TimeGrouper perform stable sorts
n = 20
data = np.random.randn(n, 4)
df = DataFrame(data, columns=["A", "B", "C", "D"])
df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
grouped = df.groupby(Grouper(key="key", freq="D"))
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""
if resample_method == "ohlc":
pytest.xfail(reason="DataError: No numeric types to aggregate")
data = np.random.randn(20, 4)
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
tm.assert_equal(expected, dt_result)
# if TimeGrouper is used included, 'nth' doesn't work yet
"""
for func in ['nth']:
expected = getattr(normal_grouped, func)(3)
expected.index = date_range(start='2013-01-01',
freq='D', periods=5, name='key')
dt_result = getattr(dt_grouped, func)(3)
assert_frame_equal(expected, dt_result)
"""
@pytest.mark.parametrize(
"method, method_args, unit",
[
("sum", dict(), 0),
("sum", dict(min_count=0), 0),
("sum", dict(min_count=1), np.nan),
("prod", dict(), 1),
("prod", dict(min_count=0), 1),
("prod", dict(min_count=1), np.nan),
],
)
def test_resample_entirly_nat_window(method, method_args, unit):
s = pd.Series([0] * 2 + [np.nan] * 2, index=pd.date_range("2017", periods=4))
result = methodcaller(method, **method_args)(s.resample("2d"))
expected = pd.Series(
[0.0, unit], index=pd.to_datetime(["2017-01-01", "2017-01-03"])
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"func, fill_value",
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
)
def test_aggregate_with_nat(func, fill_value):
# check TimeGrouper's aggregation is identical as normal groupby
# if NaT is included, 'var', 'std', 'mean', 'first','last'
# and 'nth' doesn't work yet
n = 20
data = np.random.randn(n, 4).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
assert_frame_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_aggregate_with_nat_size():
# GH 9925
n = 20
data = np.random.randn(n, 4).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = normal_grouped.size()
dt_result = dt_grouped.size()
pad = Series([0], index=[3])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
assert_series_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_repr():
# GH18203
result = repr(Grouper(key="A", freq="H"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
"closed='left', label='left', how='mean', "
"convention='e', base=0)"
)
assert result == expected
@pytest.mark.parametrize(
"method, method_args, expected_values",
[
("sum", dict(), [1, 0, 1]),
("sum", dict(min_count=0), [1, 0, 1]),
("sum", dict(min_count=1), [1, np.nan, 1]),
("sum", dict(min_count=2), [np.nan, np.nan, np.nan]),
("prod", dict(), [1, 1, 1]),
("prod", dict(min_count=0), [1, 1, 1]),
("prod", dict(min_count=1), [1, np.nan, 1]),
("prod", dict(min_count=2), [np.nan, np.nan, np.nan]),
],
)
def test_upsample_sum(method, method_args, expected_values):
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
resampled = s.resample("30T")
index = pd.to_datetime(
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"]
)
result = methodcaller(method, **method_args)(resampled)
expected = pd.Series(expected_values, index=index)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,128 @@
from datetime import timedelta
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.indexes.timedeltas import timedelta_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
def test_asfreq_bug():
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
result = df.resample("1T").asfreq()
expected = DataFrame(
data=[1, np.nan, np.nan, 3],
index=timedelta_range("0 day", periods=4, freq="1T"),
)
assert_frame_equal(result, expected)
def test_resample_with_nat():
# GH 13223
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
expected = DataFrame(
{"value": [2.5, np.nan, 5.0]},
index=timedelta_range("0 day", periods=3, freq="1S"),
)
assert_frame_equal(result, expected)
def test_resample_as_freq_with_subperiod():
# GH 13022
index = timedelta_range("00:00:00", "00:10:00", freq="5T")
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
result = df.resample("2T").asfreq()
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
expected = DataFrame(
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T")
)
tm.assert_frame_equal(result, expected)
def test_resample_with_timedeltas():
expected = DataFrame({"A": np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = pd.timedelta_range("0 days", freq="30T", periods=50)
df = DataFrame(
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
)
result = df.resample("30T").sum()
assert_frame_equal(result, expected)
s = df["A"]
result = s.resample("30T").sum()
assert_series_equal(result, expected["A"])
def test_resample_single_period_timedelta():
s = Series(list(range(5)), index=pd.timedelta_range("1 day", freq="s", periods=5))
result = s.resample("2s").sum()
expected = Series(
[1, 5, 4], index=pd.timedelta_range("1 day", freq="2s", periods=3)
)
assert_series_equal(result, expected)
def test_resample_timedelta_idempotency():
# GH 12072
index = pd.timedelta_range("0", periods=9, freq="10L")
series = Series(range(9), index=index)
result = series.resample("10L").mean()
expected = series
assert_series_equal(result, expected)
def test_resample_base_with_timedeltaindex():
# GH 10530
rng = timedelta_range(start="0s", periods=25, freq="s")
ts = Series(np.random.randn(len(rng)), index=rng)
with_base = ts.resample("2s", base=5).mean()
without_base = ts.resample("2s").mean()
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_resample_categorical_data_with_timedeltaindex():
# GH #12169
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
df["Group"] = df["Group_obj"].astype("category")
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
expected = DataFrame(
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
index=pd.to_timedelta([0, 10], unit="s"),
)
expected = expected.reindex(["Group_obj", "Group"], axis=1)
expected["Group"] = expected["Group_obj"].astype("category")
tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling
times = timedelta_range("1 day", "4 day", freq="4D")
df = DataFrame({"time": times}, index=times)
times2 = timedelta_range("1 day", "4 day", freq="2D")
exp = Series(times2, index=times2, name="time")
exp.iloc[1] = pd.NaT
res = df.resample("2D").first()["time"]
tm.assert_series_equal(res, exp)
res = df["time"].resample("2D").first()
tm.assert_series_equal(res, exp)