8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas import DataFrame, Series, bdate_range
|
||||
|
||||
N, K = 100, 10
|
||||
|
||||
|
||||
class Base:
|
||||
|
||||
_nan_locs = np.arange(20, 40)
|
||||
_inf_locs = np.array([])
|
||||
|
||||
def _create_data(self):
|
||||
arr = randn(N)
|
||||
arr[self._nan_locs] = np.NaN
|
||||
|
||||
self.arr = arr
|
||||
self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
|
||||
self.series = Series(arr.copy(), index=self.rng)
|
||||
self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K))
|
@@ -0,0 +1,49 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def raw(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"triang",
|
||||
"blackman",
|
||||
"hamming",
|
||||
"bartlett",
|
||||
"bohman",
|
||||
"blackmanharris",
|
||||
"nuttall",
|
||||
"barthann",
|
||||
]
|
||||
)
|
||||
def win_types(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"])
|
||||
def win_types_special(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"]
|
||||
)
|
||||
def arithmetic_win_operators(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["right", "left", "both", "neither"])
|
||||
def closed(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def center(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, 1])
|
||||
def min_periods(request):
|
||||
return request.param
|
367
venv/lib/python3.6/site-packages/pandas/tests/window/test_api.py
Normal file
367
venv/lib/python3.6/site-packages/pandas/tests/window/test_api.py
Normal file
@@ -0,0 +1,367 @@
|
||||
from collections import OrderedDict
|
||||
import warnings
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, Timestamp, concat
|
||||
from pandas.core.base import SpecificationError
|
||||
from pandas.tests.window.common import Base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestApi(Base):
|
||||
def setup_method(self, method):
|
||||
self._create_data()
|
||||
|
||||
def test_getitem(self):
|
||||
|
||||
r = self.frame.rolling(window=5)
|
||||
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns)
|
||||
|
||||
r = self.frame.rolling(window=5)[1]
|
||||
assert r._selected_obj.name == self.frame.columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = self.frame.rolling(window=5)[1, 3]
|
||||
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]])
|
||||
|
||||
r = self.frame.rolling(window=5)[[1, 3]]
|
||||
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]])
|
||||
|
||||
def test_select_bad_cols(self):
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
g = df.rolling(window=5)
|
||||
with pytest.raises(KeyError, match="Columns not found: 'C'"):
|
||||
g[["C"]]
|
||||
with pytest.raises(KeyError, match="^[^A]+$"):
|
||||
# A should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
g[["A", "C"]]
|
||||
|
||||
def test_attribute_access(self):
|
||||
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
r = df.rolling(window=5)
|
||||
tm.assert_series_equal(r.A.sum(), r["A"].sum())
|
||||
msg = "'Rolling' object has no attribute 'F'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
r.F
|
||||
|
||||
def tests_skip_nuisance(self):
|
||||
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3)
|
||||
result = r[["A", "B"]].sum()
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
|
||||
columns=list("AB"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_skip_sum_object_raises(self):
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3)
|
||||
result = r.sum()
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
|
||||
columns=list("AB"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_agg(self):
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3)
|
||||
a_mean = r["A"].mean()
|
||||
a_std = r["A"].std()
|
||||
a_sum = r["A"].sum()
|
||||
b_mean = r["B"].mean()
|
||||
b_std = r["B"].std()
|
||||
b_sum = r["B"].sum()
|
||||
|
||||
result = r.aggregate([np.mean, np.std])
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.aggregate({"A": np.mean, "B": np.std})
|
||||
|
||||
expected = concat([a_mean, b_std], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r["A"].aggregate(["mean", "sum"])
|
||||
expected = concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ["mean", "sum"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# using a dict with renaming
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
result = r.aggregate({"A": {"mean": "mean", "sum": "sum"}})
|
||||
expected = concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "sum")])
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
result = r.aggregate(
|
||||
{
|
||||
"A": {"mean": "mean", "sum": "sum"},
|
||||
"B": {"mean2": "mean", "sum2": "sum"},
|
||||
}
|
||||
)
|
||||
expected = concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
exp_cols = [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
|
||||
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
|
||||
exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
def test_agg_apply(self, raw):
|
||||
|
||||
# passed lambda
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3)
|
||||
a_sum = r["A"].sum()
|
||||
|
||||
result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
|
||||
rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
|
||||
expected = concat([a_sum, rcustom], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
def test_agg_consistency(self):
|
||||
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3)
|
||||
|
||||
result = r.agg([np.sum, np.mean]).columns
|
||||
expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = r["A"].agg([np.sum, np.mean]).columns
|
||||
expected = Index(["sum", "mean"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = r.agg({"A": [np.sum, np.mean]}).columns
|
||||
expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_agg_nested_dicts(self):
|
||||
|
||||
# API change for disallowing these types of nested dicts
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3)
|
||||
|
||||
msg = r"cannot perform renaming for (r1|r2) with a nested dictionary"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
|
||||
|
||||
expected = concat(
|
||||
[r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
|
||||
)
|
||||
with catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
result = r[["A", "B"]].agg(
|
||||
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
result = r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "ra", "mean"),
|
||||
("A", "ra", "std"),
|
||||
("B", "rb", "mean"),
|
||||
("B", "rb", "std"),
|
||||
]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
def test_count_nonnumeric_types(self):
|
||||
# GH12541
|
||||
cols = [
|
||||
"int",
|
||||
"float",
|
||||
"string",
|
||||
"datetime",
|
||||
"timedelta",
|
||||
"periods",
|
||||
"fl_inf",
|
||||
"fl_nan",
|
||||
"str_nan",
|
||||
"dt_nat",
|
||||
"periods_nat",
|
||||
]
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"int": [1, 2, 3],
|
||||
"float": [4.0, 5.0, 6.0],
|
||||
"string": list("abc"),
|
||||
"datetime": pd.date_range("20170101", periods=3),
|
||||
"timedelta": pd.timedelta_range("1 s", periods=3, freq="s"),
|
||||
"periods": [
|
||||
pd.Period("2012-01"),
|
||||
pd.Period("2012-02"),
|
||||
pd.Period("2012-03"),
|
||||
],
|
||||
"fl_inf": [1.0, 2.0, np.Inf],
|
||||
"fl_nan": [1.0, 2.0, np.NaN],
|
||||
"str_nan": ["aa", "bb", np.NaN],
|
||||
"dt_nat": [
|
||||
Timestamp("20170101"),
|
||||
Timestamp("20170203"),
|
||||
Timestamp(None),
|
||||
],
|
||||
"periods_nat": [
|
||||
pd.Period("2012-01"),
|
||||
pd.Period("2012-02"),
|
||||
pd.Period(None),
|
||||
],
|
||||
},
|
||||
columns=cols,
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"int": [1.0, 2.0, 2.0],
|
||||
"float": [1.0, 2.0, 2.0],
|
||||
"string": [1.0, 2.0, 2.0],
|
||||
"datetime": [1.0, 2.0, 2.0],
|
||||
"timedelta": [1.0, 2.0, 2.0],
|
||||
"periods": [1.0, 2.0, 2.0],
|
||||
"fl_inf": [1.0, 2.0, 2.0],
|
||||
"fl_nan": [1.0, 2.0, 1.0],
|
||||
"str_nan": [1.0, 2.0, 1.0],
|
||||
"dt_nat": [1.0, 2.0, 1.0],
|
||||
"periods_nat": [1.0, 2.0, 1.0],
|
||||
},
|
||||
columns=cols,
|
||||
)
|
||||
|
||||
result = df.rolling(window=2).count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(1).count()
|
||||
expected = df.notna().astype(float)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
|
||||
def test_window_with_args(self):
|
||||
# make sure that we are aggregating window functions correctly with arg
|
||||
r = Series(np.random.randn(100)).rolling(
|
||||
window=10, min_periods=1, win_type="gaussian"
|
||||
)
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["<lambda>", "<lambda>"]
|
||||
result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def a(x):
|
||||
return x.mean(std=10)
|
||||
|
||||
def b(x):
|
||||
return x.mean(std=0.01)
|
||||
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["a", "b"]
|
||||
result = r.aggregate([a, b])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_preserve_metadata(self):
|
||||
# GH 10565
|
||||
s = Series(np.arange(100), name="foo")
|
||||
|
||||
s2 = s.rolling(30).sum()
|
||||
s3 = s.rolling(20).sum()
|
||||
assert s2.name == "foo"
|
||||
assert s3.name == "foo"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,window_size,expected_vals",
|
||||
[
|
||||
(
|
||||
"rolling",
|
||||
2,
|
||||
[
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[25.0, 30.0, 35.0, 30.0],
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[35.0, 40.0, 60.0, 40.0],
|
||||
[60.0, 80.0, 85.0, 80],
|
||||
],
|
||||
),
|
||||
(
|
||||
"expanding",
|
||||
None,
|
||||
[
|
||||
[10.0, 10.0, 20.0, 20.0],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[20.0, 30.0, 30.0, 20.0],
|
||||
[10.0, 10.0, 30.0, 30.0],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[26.666667, 40.0, 50.0, 30.0],
|
||||
[40.0, 80.0, 60.0, 30.0],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_multiple_agg_funcs(self, func, window_size, expected_vals):
|
||||
# GH 15072
|
||||
df = pd.DataFrame(
|
||||
[
|
||||
["A", 10, 20],
|
||||
["A", 20, 30],
|
||||
["A", 30, 40],
|
||||
["B", 10, 30],
|
||||
["B", 30, 40],
|
||||
["B", 40, 80],
|
||||
["B", 80, 90],
|
||||
],
|
||||
columns=["stock", "low", "high"],
|
||||
)
|
||||
|
||||
f = getattr(df.groupby("stock"), func)
|
||||
if window_size:
|
||||
window = f(window_size)
|
||||
else:
|
||||
window = f()
|
||||
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
|
||||
names=["stock", None],
|
||||
)
|
||||
columns = pd.MultiIndex.from_tuples(
|
||||
[("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
|
||||
)
|
||||
expected = pd.DataFrame(expected_vals, index=index, columns=columns)
|
||||
|
||||
result = window.agg(
|
||||
OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"])))
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
@@ -0,0 +1,242 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.base import DataError
|
||||
import pandas.util.testing as tm
|
||||
|
||||
# gh-12373 : rolling functions error on float32 data
|
||||
# make sure rolling functions works for different dtypes
|
||||
#
|
||||
# NOTE that these are yielded tests and so _create_data
|
||||
# is explicitly called.
|
||||
#
|
||||
# further note that we are only checking rolling for fully dtype
|
||||
# compliance (though both expanding and ewm inherit)
|
||||
|
||||
|
||||
class Dtype:
|
||||
window = 2
|
||||
|
||||
funcs = {
|
||||
"count": lambda v: v.count(),
|
||||
"max": lambda v: v.max(),
|
||||
"min": lambda v: v.min(),
|
||||
"sum": lambda v: v.sum(),
|
||||
"mean": lambda v: v.mean(),
|
||||
"std": lambda v: v.std(),
|
||||
"var": lambda v: v.var(),
|
||||
"median": lambda v: v.median(),
|
||||
}
|
||||
|
||||
def get_expects(self):
|
||||
expects = {
|
||||
"sr1": {
|
||||
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
|
||||
"max": Series([np.nan, 1, 2, 3, 4], dtype="float64"),
|
||||
"min": Series([np.nan, 0, 1, 2, 3], dtype="float64"),
|
||||
"sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"),
|
||||
"mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
|
||||
"std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"),
|
||||
"var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"),
|
||||
"median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
|
||||
},
|
||||
"sr2": {
|
||||
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
|
||||
"max": Series([np.nan, 10, 8, 6, 4], dtype="float64"),
|
||||
"min": Series([np.nan, 8, 6, 4, 2], dtype="float64"),
|
||||
"sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"),
|
||||
"mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
|
||||
"std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"),
|
||||
"var": Series([np.nan, 2, 2, 2, 2], dtype="float64"),
|
||||
"median": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
|
||||
},
|
||||
"sr3": {
|
||||
"count": Series([1, 2, 2, 1, 1], dtype="float64"),
|
||||
"max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"),
|
||||
"min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"),
|
||||
"sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"),
|
||||
"mean": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
|
||||
"std": Series(
|
||||
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, dtype="float64"
|
||||
),
|
||||
"var": Series([np.nan, 0.5, 0.5, np.nan, np.nan], dtype="float64"),
|
||||
"median": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
|
||||
},
|
||||
"df": {
|
||||
"count": DataFrame(
|
||||
{0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])},
|
||||
dtype="float64",
|
||||
),
|
||||
"max": DataFrame(
|
||||
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
|
||||
dtype="float64",
|
||||
),
|
||||
"min": DataFrame(
|
||||
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
|
||||
dtype="float64",
|
||||
),
|
||||
"sum": DataFrame(
|
||||
{
|
||||
0: Series([np.nan, 2, 6, 10, 14]),
|
||||
1: Series([np.nan, 4, 8, 12, 16]),
|
||||
},
|
||||
dtype="float64",
|
||||
),
|
||||
"mean": DataFrame(
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
dtype="float64",
|
||||
),
|
||||
"std": DataFrame(
|
||||
{
|
||||
0: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
1: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
},
|
||||
dtype="float64",
|
||||
),
|
||||
"var": DataFrame(
|
||||
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
|
||||
dtype="float64",
|
||||
),
|
||||
"median": DataFrame(
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
dtype="float64",
|
||||
),
|
||||
},
|
||||
}
|
||||
return expects
|
||||
|
||||
def _create_dtype_data(self, dtype):
|
||||
sr1 = Series(np.arange(5), dtype=dtype)
|
||||
sr2 = Series(np.arange(10, 0, -2), dtype=dtype)
|
||||
sr3 = sr1.copy()
|
||||
sr3[3] = np.NaN
|
||||
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
|
||||
|
||||
data = {"sr1": sr1, "sr2": sr2, "sr3": sr3, "df": df}
|
||||
|
||||
return data
|
||||
|
||||
def _create_data(self):
|
||||
self.data = self._create_dtype_data(self.dtype)
|
||||
self.expects = self.get_expects()
|
||||
|
||||
def test_dtypes(self):
|
||||
self._create_data()
|
||||
for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
|
||||
|
||||
f = self.funcs[f_name]
|
||||
d = self.data[d_name]
|
||||
exp = self.expects[d_name][f_name]
|
||||
self.check_dtypes(f, f_name, d, d_name, exp)
|
||||
|
||||
def check_dtypes(self, f, f_name, d, d_name, exp):
|
||||
roll = d.rolling(window=self.window)
|
||||
result = f(roll)
|
||||
tm.assert_almost_equal(result, exp)
|
||||
|
||||
|
||||
class TestDtype_object(Dtype):
|
||||
dtype = object
|
||||
|
||||
|
||||
class Dtype_integer(Dtype):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype_int8(Dtype_integer):
|
||||
dtype = np.int8
|
||||
|
||||
|
||||
class TestDtype_int16(Dtype_integer):
|
||||
dtype = np.int16
|
||||
|
||||
|
||||
class TestDtype_int32(Dtype_integer):
|
||||
dtype = np.int32
|
||||
|
||||
|
||||
class TestDtype_int64(Dtype_integer):
|
||||
dtype = np.int64
|
||||
|
||||
|
||||
class Dtype_uinteger(Dtype):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype_uint8(Dtype_uinteger):
|
||||
dtype = np.uint8
|
||||
|
||||
|
||||
class TestDtype_uint16(Dtype_uinteger):
|
||||
dtype = np.uint16
|
||||
|
||||
|
||||
class TestDtype_uint32(Dtype_uinteger):
|
||||
dtype = np.uint32
|
||||
|
||||
|
||||
class TestDtype_uint64(Dtype_uinteger):
|
||||
dtype = np.uint64
|
||||
|
||||
|
||||
class Dtype_float(Dtype):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype_float16(Dtype_float):
|
||||
dtype = np.float16
|
||||
|
||||
|
||||
class TestDtype_float32(Dtype_float):
|
||||
dtype = np.float32
|
||||
|
||||
|
||||
class TestDtype_float64(Dtype_float):
|
||||
dtype = np.float64
|
||||
|
||||
|
||||
class TestDtype_category(Dtype):
|
||||
dtype = "category"
|
||||
include_df = False
|
||||
|
||||
def _create_dtype_data(self, dtype):
|
||||
sr1 = Series(range(5), dtype=dtype)
|
||||
sr2 = Series(range(10, 0, -2), dtype=dtype)
|
||||
|
||||
data = {"sr1": sr1, "sr2": sr2}
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class DatetimeLike(Dtype):
|
||||
def check_dtypes(self, f, f_name, d, d_name, exp):
|
||||
|
||||
roll = d.rolling(window=self.window)
|
||||
if f_name == "count":
|
||||
result = f(roll)
|
||||
tm.assert_almost_equal(result, exp)
|
||||
|
||||
else:
|
||||
with pytest.raises(DataError):
|
||||
f(roll)
|
||||
|
||||
|
||||
class TestDtype_timedelta(DatetimeLike):
|
||||
dtype = np.dtype("m8[ns]")
|
||||
|
||||
|
||||
class TestDtype_datetime(DatetimeLike):
|
||||
dtype = np.dtype("M8[ns]")
|
||||
|
||||
|
||||
class TestDtype_datetime64UTC(DatetimeLike):
|
||||
dtype = "datetime64[ns, UTC]"
|
||||
|
||||
def _create_data(self):
|
||||
pytest.skip(
|
||||
"direct creation of extension dtype "
|
||||
"datetime64[ns, UTC] is not supported ATM"
|
||||
)
|
@@ -0,0 +1,70 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.core.window as rwindow
|
||||
from pandas.tests.window.common import Base
|
||||
|
||||
|
||||
class TestEWM(Base):
|
||||
def setup_method(self, method):
|
||||
self._create_data()
|
||||
|
||||
def test_doc_string(self):
|
||||
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.ewm(com=0.5).mean()
|
||||
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor(self, which):
|
||||
o = getattr(self, which)
|
||||
c = o.ewm
|
||||
|
||||
# valid
|
||||
c(com=0.5)
|
||||
c(span=1.5)
|
||||
c(alpha=0.5)
|
||||
c(halflife=0.75)
|
||||
c(com=0.5, span=None)
|
||||
c(alpha=0.5, com=None)
|
||||
c(halflife=0.75, alpha=None)
|
||||
|
||||
# not valid: mutually exclusive
|
||||
with pytest.raises(ValueError):
|
||||
c(com=0.5, alpha=0.5)
|
||||
with pytest.raises(ValueError):
|
||||
c(span=1.5, halflife=0.75)
|
||||
with pytest.raises(ValueError):
|
||||
c(alpha=0.5, span=1.5)
|
||||
|
||||
# not valid: com < 0
|
||||
with pytest.raises(ValueError):
|
||||
c(com=-0.5)
|
||||
|
||||
# not valid: span < 1
|
||||
with pytest.raises(ValueError):
|
||||
c(span=0.5)
|
||||
|
||||
# not valid: halflife <= 0
|
||||
with pytest.raises(ValueError):
|
||||
c(halflife=0)
|
||||
|
||||
# not valid: alpha <= 0 or alpha > 1
|
||||
for alpha in (-0.5, 1.5):
|
||||
with pytest.raises(ValueError):
|
||||
c(alpha=alpha)
|
||||
|
||||
@pytest.mark.parametrize("method", ["std", "mean", "var"])
|
||||
def test_numpy_compat(self, method):
|
||||
# see gh-12811
|
||||
e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5)
|
||||
|
||||
msg = "numpy operations are not valid with window objects"
|
||||
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(e, method)(1, 2, 3)
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(e, method)(dtype=np.float64)
|
@@ -0,0 +1,115 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.core.window as rwindow
|
||||
from pandas.tests.window.common import Base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestExpanding(Base):
|
||||
def setup_method(self, method):
|
||||
self._create_data()
|
||||
|
||||
def test_doc_string(self):
|
||||
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.expanding(2).sum()
|
||||
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor(self, which):
|
||||
# GH 12669
|
||||
|
||||
o = getattr(self, which)
|
||||
c = o.expanding
|
||||
|
||||
# valid
|
||||
c(min_periods=1)
|
||||
c(min_periods=1, center=True)
|
||||
c(min_periods=1, center=False)
|
||||
|
||||
# not valid
|
||||
for w in [2.0, "foo", np.array([2])]:
|
||||
with pytest.raises(ValueError):
|
||||
c(min_periods=w)
|
||||
with pytest.raises(ValueError):
|
||||
c(min_periods=1, center=w)
|
||||
|
||||
@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"])
|
||||
def test_numpy_compat(self, method):
|
||||
# see gh-12811
|
||||
e = rwindow.Expanding(Series([2, 4, 6]), window=2)
|
||||
|
||||
msg = "numpy operations are not valid with window objects"
|
||||
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(e, method)(1, 2, 3)
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(e, method)(dtype=np.float64)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expander",
|
||||
[
|
||||
1,
|
||||
pytest.param(
|
||||
"ls",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="GH#16425 expanding with offset not supported"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_empty_df_expanding(self, expander):
|
||||
# GH 15819 Verifies that datetime and integer expanding windows can be
|
||||
# applied to empty DataFrames
|
||||
|
||||
expected = DataFrame()
|
||||
result = DataFrame().expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Verifies that datetime and integer expanding windows can be applied
|
||||
# to empty DataFrames with datetime index
|
||||
expected = DataFrame(index=pd.DatetimeIndex([]))
|
||||
result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_missing_minp_zero(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/18921
|
||||
# minp=0
|
||||
x = pd.Series([np.nan])
|
||||
result = x.expanding(min_periods=0).sum()
|
||||
expected = pd.Series([0.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# minp=1
|
||||
result = x.expanding(min_periods=1).sum()
|
||||
expected = pd.Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
|
||||
def test_iter_raises(self, klass):
|
||||
# https://github.com/pandas-dev/pandas/issues/11704
|
||||
# Iteration over a Window
|
||||
obj = klass([1, 2, 3, 4])
|
||||
with pytest.raises(NotImplementedError):
|
||||
iter(obj.expanding(2))
|
||||
|
||||
def test_expanding_axis(self, axis_frame):
|
||||
# see gh-23372.
|
||||
df = DataFrame(np.ones((10, 20)))
|
||||
axis = df._get_axis_number(axis_frame)
|
||||
|
||||
if axis == 0:
|
||||
expected = DataFrame(
|
||||
{i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
|
||||
)
|
||||
else:
|
||||
# axis == 1
|
||||
expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
|
||||
|
||||
result = df.expanding(3, axis=axis_frame).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
@@ -0,0 +1,176 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestGrouperGrouping:
|
||||
def setup_method(self, method):
|
||||
self.series = Series(np.arange(10))
|
||||
self.frame = DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
|
||||
|
||||
def test_mutated(self):
|
||||
|
||||
msg = r"group\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.frame.groupby("A", foo=1)
|
||||
|
||||
g = self.frame.groupby("A")
|
||||
assert not g.mutated
|
||||
g = self.frame.groupby("A", mutated=True)
|
||||
assert g.mutated
|
||||
|
||||
def test_getitem(self):
|
||||
g = self.frame.groupby("A")
|
||||
g_mutated = self.frame.groupby("A", mutated=True)
|
||||
|
||||
expected = g_mutated.B.apply(lambda x: x.rolling(2).mean())
|
||||
|
||||
result = g.rolling(2).mean().B
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.rolling(2).B.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.rolling(2).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = self.frame.B.groupby(self.frame.A).rolling(2).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_multiple(self):
|
||||
|
||||
# GH 13174
|
||||
g = self.frame.groupby("A")
|
||||
r = g.rolling(2)
|
||||
g_mutated = self.frame.groupby("A", mutated=True)
|
||||
expected = g_mutated.B.apply(lambda x: x.rolling(2).count())
|
||||
|
||||
result = r.B.count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = r.B.count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rolling(self):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.rolling(window=4)
|
||||
|
||||
for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]:
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.rolling(4), f)())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["std", "var"]:
|
||||
result = getattr(r, f)(ddof=1)
|
||||
expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.quantile(0.5)
|
||||
expected = g.apply(lambda x: x.rolling(4).quantile(0.5))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_corr_cov(self):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.rolling(window=4)
|
||||
|
||||
for f in ["corr", "cov"]:
|
||||
result = getattr(r, f)(self.frame)
|
||||
|
||||
def func(x):
|
||||
return getattr(x.rolling(4), f)(self.frame)
|
||||
|
||||
expected = g.apply(func)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = getattr(r.B, f)(pairwise=True)
|
||||
|
||||
def func(x):
|
||||
return getattr(x.B.rolling(4), f)(pairwise=True)
|
||||
|
||||
expected = g.apply(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rolling_apply(self, raw):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.rolling(window=4)
|
||||
|
||||
# reduction
|
||||
result = r.apply(lambda x: x.sum(), raw=raw)
|
||||
expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_apply_mutability(self):
|
||||
# GH 14013
|
||||
df = pd.DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6})
|
||||
g = df.groupby("A")
|
||||
|
||||
mi = pd.MultiIndex.from_tuples(
|
||||
[("bar", 3), ("bar", 4), ("bar", 5), ("foo", 0), ("foo", 1), ("foo", 2)]
|
||||
)
|
||||
|
||||
mi.names = ["A", None]
|
||||
# Grouped column should not be a part of the output
|
||||
expected = pd.DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi)
|
||||
|
||||
result = g.rolling(window=2).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Call an arbitrary function on the groupby
|
||||
g.sum()
|
||||
|
||||
# Make sure nothing has been mutated
|
||||
result = g.rolling(window=2).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_expanding(self):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.expanding()
|
||||
|
||||
for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]:
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.expanding(), f)())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["std", "var"]:
|
||||
result = getattr(r, f)(ddof=0)
|
||||
expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.quantile(0.5)
|
||||
expected = g.apply(lambda x: x.expanding().quantile(0.5))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_expanding_corr_cov(self):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.expanding()
|
||||
|
||||
for f in ["corr", "cov"]:
|
||||
result = getattr(r, f)(self.frame)
|
||||
|
||||
def func(x):
|
||||
return getattr(x.expanding(), f)(self.frame)
|
||||
|
||||
expected = g.apply(func)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = getattr(r.B, f)(pairwise=True)
|
||||
|
||||
def func(x):
|
||||
return getattr(x.B.expanding(), f)(pairwise=True)
|
||||
|
||||
expected = g.apply(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_expanding_apply(self, raw):
|
||||
g = self.frame.groupby("A")
|
||||
r = g.expanding()
|
||||
|
||||
# reduction
|
||||
result = r.apply(lambda x: x.sum(), raw=raw)
|
||||
expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
|
||||
tm.assert_frame_equal(result, expected)
|
2562
venv/lib/python3.6/site-packages/pandas/tests/window/test_moments.py
Normal file
2562
venv/lib/python3.6/site-packages/pandas/tests/window/test_moments.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,183 @@
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.sorting import safe_sort
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPairwise:
|
||||
|
||||
# GH 7738
|
||||
df1s = [
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]),
|
||||
DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]),
|
||||
DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]),
|
||||
]
|
||||
df2 = DataFrame(
|
||||
[[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]],
|
||||
columns=["Y", "Z", "X"],
|
||||
)
|
||||
s = Series([1, 1, 3, 8])
|
||||
|
||||
def compare(self, result, expected):
|
||||
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
|
||||
def test_no_flex(self, f):
|
||||
|
||||
# DataFrame methods (which do not call _flex_binary_moment())
|
||||
|
||||
results = [f(df) for df in self.df1s]
|
||||
for (df, result) in zip(self.df1s, results):
|
||||
tm.assert_index_equal(result.index, df.columns)
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
for i, result in enumerate(results):
|
||||
if i > 0:
|
||||
self.compare(result, results[0])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=True),
|
||||
lambda x: x.expanding().corr(pairwise=True),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=True),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=True),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=True),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_self(self, f):
|
||||
|
||||
# DataFrame with itself, pairwise=True
|
||||
# note that we may construct the 1st level of the MI
|
||||
# in a non-monotonic way, so compare accordingly
|
||||
results = []
|
||||
for i, df in enumerate(self.df1s):
|
||||
result = f(df)
|
||||
tm.assert_index_equal(result.index.levels[0], df.index, check_names=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())
|
||||
)
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
results.append(df)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
if i > 0:
|
||||
self.compare(result, results[0])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=False),
|
||||
lambda x: x.expanding().corr(pairwise=False),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=False),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=False),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=False),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_self(self, f):
|
||||
|
||||
# DataFrame with itself, pairwise=False
|
||||
results = [f(df) for df in self.df1s]
|
||||
for (df, result) in zip(self.df1s, results):
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
for i, result in enumerate(results):
|
||||
if i > 0:
|
||||
self.compare(result, results[0])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=True),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=True),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_other(self, f):
|
||||
|
||||
# DataFrame with another DataFrame, pairwise=True
|
||||
results = [f(df, self.df2) for df in self.df1s]
|
||||
for (df, result) in zip(self.df1s, results):
|
||||
tm.assert_index_equal(result.index.levels[0], df.index, check_names=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())
|
||||
)
|
||||
for i, result in enumerate(results):
|
||||
if i > 0:
|
||||
self.compare(result, results[0])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=False),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_other(self, f):
|
||||
|
||||
# DataFrame with another DataFrame, pairwise=False
|
||||
results = [
|
||||
f(df, self.df2) if df.columns.is_unique else None for df in self.df1s
|
||||
]
|
||||
for (df, result) in zip(self.df1s, results):
|
||||
if result is not None:
|
||||
with warnings.catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
# we can have int and str columns
|
||||
expected_index = df.index.union(self.df2.index)
|
||||
expected_columns = df.columns.union(self.df2.columns)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
tm.assert_index_equal(result.columns, expected_columns)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="'arg1' columns are not unique"):
|
||||
f(df, self.df2)
|
||||
with pytest.raises(ValueError, match="'arg2' columns are not unique"):
|
||||
f(self.df2, df)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y),
|
||||
lambda x, y: x.expanding().corr(y),
|
||||
lambda x, y: x.rolling(window=3).cov(y),
|
||||
lambda x, y: x.rolling(window=3).corr(y),
|
||||
lambda x, y: x.ewm(com=3).cov(y),
|
||||
lambda x, y: x.ewm(com=3).corr(y),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_series(self, f):
|
||||
|
||||
# DataFrame with a Series
|
||||
results = [f(df, self.s) for df in self.df1s] + [
|
||||
f(self.s, df) for df in self.df1s
|
||||
]
|
||||
for (df, result) in zip(self.df1s, results):
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
for i, result in enumerate(results):
|
||||
if i > 0:
|
||||
self.compare(result, results[0])
|
@@ -0,0 +1,328 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.core.window as rwindow
|
||||
from pandas.tests.window.common import Base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestRolling(Base):
|
||||
def setup_method(self, method):
|
||||
self._create_data()
|
||||
|
||||
def test_doc_string(self):
|
||||
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.rolling(2).sum()
|
||||
df.rolling(2, min_periods=1).sum()
|
||||
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor(self, which):
|
||||
# GH 12669
|
||||
|
||||
o = getattr(self, which)
|
||||
c = o.rolling
|
||||
|
||||
# valid
|
||||
c(window=2)
|
||||
c(window=2, min_periods=1)
|
||||
c(window=2, min_periods=1, center=True)
|
||||
c(window=2, min_periods=1, center=False)
|
||||
|
||||
# GH 13383
|
||||
with pytest.raises(ValueError):
|
||||
c(0)
|
||||
c(-1)
|
||||
|
||||
# not valid
|
||||
for w in [2.0, "foo", np.array([2])]:
|
||||
with pytest.raises(ValueError):
|
||||
c(window=w)
|
||||
with pytest.raises(ValueError):
|
||||
c(window=2, min_periods=w)
|
||||
with pytest.raises(ValueError):
|
||||
c(window=2, min_periods=1, center=w)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor_with_win_type(self, which):
|
||||
# GH 13383
|
||||
o = getattr(self, which)
|
||||
c = o.rolling
|
||||
with pytest.raises(ValueError):
|
||||
c(-1, win_type="boxcar")
|
||||
|
||||
@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)])
|
||||
def test_constructor_with_timedelta_window(self, window):
|
||||
# GH 15440
|
||||
n = 10
|
||||
df = DataFrame(
|
||||
{"value": np.arange(n)},
|
||||
index=pd.date_range("2015-12-24", periods=n, freq="D"),
|
||||
)
|
||||
expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3))
|
||||
|
||||
result = df.rolling(window=window).sum()
|
||||
expected = DataFrame(
|
||||
{"value": expected_data},
|
||||
index=pd.date_range("2015-12-24", periods=n, freq="D"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
expected = df.rolling("3D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"])
|
||||
def test_constructor_timedelta_window_and_minperiods(self, window, raw):
|
||||
# GH 15305
|
||||
n = 10
|
||||
df = DataFrame(
|
||||
{"value": np.arange(n)},
|
||||
index=pd.date_range("2017-08-08", periods=n, freq="D"),
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))},
|
||||
index=pd.date_range("2017-08-08", periods=n, freq="D"),
|
||||
)
|
||||
result_roll_sum = df.rolling(window=window, min_periods=2).sum()
|
||||
result_roll_generic = df.rolling(window=window, min_periods=2).apply(
|
||||
sum, raw=raw
|
||||
)
|
||||
tm.assert_frame_equal(result_roll_sum, expected)
|
||||
tm.assert_frame_equal(result_roll_generic, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"])
|
||||
def test_numpy_compat(self, method):
|
||||
# see gh-12811
|
||||
r = rwindow.Rolling(Series([2, 4, 6]), window=2)
|
||||
|
||||
msg = "numpy operations are not valid with window objects"
|
||||
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(r, method)(1, 2, 3)
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(r, method)(dtype=np.float64)
|
||||
|
||||
def test_closed(self):
|
||||
df = DataFrame({"A": [0, 1, 2, 3, 4]})
|
||||
# closed only allowed for datetimelike
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window=3, closed="neither")
|
||||
|
||||
@pytest.mark.parametrize("closed", ["neither", "left"])
|
||||
def test_closed_empty(self, closed, arithmetic_win_operators):
|
||||
# GH 26005
|
||||
func_name = arithmetic_win_operators
|
||||
ser = pd.Series(
|
||||
data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D")
|
||||
)
|
||||
roll = ser.rolling("1D", closed=closed)
|
||||
|
||||
result = getattr(roll, func_name)()
|
||||
expected = pd.Series([np.nan] * 5, index=ser.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("func", ["min", "max"])
|
||||
def test_closed_one_entry(self, func):
|
||||
# GH24718
|
||||
ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1))
|
||||
result = getattr(ser.rolling("10D", closed="left"), func)()
|
||||
tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index))
|
||||
|
||||
@pytest.mark.parametrize("func", ["min", "max"])
|
||||
def test_closed_one_entry_groupby(self, func):
|
||||
# GH24718
|
||||
ser = pd.DataFrame(
|
||||
data={"A": [1, 1, 2], "B": [3, 2, 1]},
|
||||
index=pd.date_range("2000", periods=3),
|
||||
)
|
||||
result = getattr(
|
||||
ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func
|
||||
)()
|
||||
exp_idx = pd.MultiIndex.from_arrays(
|
||||
arrays=[[1, 1, 2], ser.index], names=("A", None)
|
||||
)
|
||||
expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("input_dtype", ["int", "float"])
|
||||
@pytest.mark.parametrize(
|
||||
"func,closed,expected",
|
||||
[
|
||||
("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
|
||||
("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
|
||||
("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
|
||||
("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
|
||||
("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
|
||||
],
|
||||
)
|
||||
def test_closed_min_max_datetime(self, input_dtype, func, closed, expected):
|
||||
# see gh-21704
|
||||
ser = pd.Series(
|
||||
data=np.arange(10).astype(input_dtype),
|
||||
index=pd.date_range("2000", periods=10),
|
||||
)
|
||||
|
||||
result = getattr(ser.rolling("3D", closed=closed), func)()
|
||||
expected = pd.Series(expected, index=ser.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_closed_uneven(self):
|
||||
# see gh-21704
|
||||
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
|
||||
|
||||
# uneven
|
||||
ser = ser.drop(index=ser.index[[1, 5]])
|
||||
result = ser.rolling("3D", closed="left").min()
|
||||
expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,closed,expected",
|
||||
[
|
||||
("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
|
||||
("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
|
||||
("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
|
||||
("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
|
||||
("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
|
||||
("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
|
||||
("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
|
||||
("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_closed_min_max_minp(self, func, closed, expected):
|
||||
# see gh-21704
|
||||
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
|
||||
ser[ser.index[-3:]] = np.nan
|
||||
result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)()
|
||||
expected = pd.Series(expected, index=ser.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"closed,expected",
|
||||
[
|
||||
("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]),
|
||||
("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
|
||||
("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
|
||||
("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]),
|
||||
],
|
||||
)
|
||||
def test_closed_median_quantile(self, closed, expected):
|
||||
# GH 26005
|
||||
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
|
||||
roll = ser.rolling("3D", closed=closed)
|
||||
expected = pd.Series(expected, index=ser.index)
|
||||
|
||||
result = roll.median()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = roll.quantile(0.5)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("roller", ["1s", 1])
|
||||
def tests_empty_df_rolling(self, roller):
|
||||
# GH 15819 Verifies that datetime and integer rolling windows can be
|
||||
# applied to empty DataFrames
|
||||
expected = DataFrame()
|
||||
result = DataFrame().rolling(roller).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Verifies that datetime and integer rolling windows can be applied to
|
||||
# empty DataFrames with datetime index
|
||||
expected = DataFrame(index=pd.DatetimeIndex([]))
|
||||
result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_empty_window_median_quantile(self):
|
||||
# GH 26005
|
||||
expected = pd.Series([np.nan, np.nan, np.nan])
|
||||
roll = pd.Series(np.arange(3)).rolling(0)
|
||||
|
||||
result = roll.median()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = roll.quantile(0.1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_missing_minp_zero(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/18921
|
||||
# minp=0
|
||||
x = pd.Series([np.nan])
|
||||
result = x.rolling(1, min_periods=0).sum()
|
||||
expected = pd.Series([0.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# minp=1
|
||||
result = x.rolling(1, min_periods=1).sum()
|
||||
expected = pd.Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_missing_minp_zero_variable(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/18921
|
||||
x = pd.Series(
|
||||
[np.nan] * 4,
|
||||
index=pd.DatetimeIndex(
|
||||
["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]
|
||||
),
|
||||
)
|
||||
result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum()
|
||||
expected = pd.Series(0.0, index=x.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multi_index_names(self):
|
||||
|
||||
# GH 16789, 16825
|
||||
cols = pd.MultiIndex.from_product(
|
||||
[["A", "B"], ["C", "D", "E"]], names=["1", "2"]
|
||||
)
|
||||
df = DataFrame(np.ones((10, 6)), columns=cols)
|
||||
result = df.rolling(3).cov()
|
||||
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
assert result.index.names == [None, "1", "2"]
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
|
||||
def test_iter_raises(self, klass):
|
||||
# https://github.com/pandas-dev/pandas/issues/11704
|
||||
# Iteration over a Window
|
||||
obj = klass([1, 2, 3, 4])
|
||||
with pytest.raises(NotImplementedError):
|
||||
iter(obj.rolling(2))
|
||||
|
||||
def test_rolling_axis_sum(self, axis_frame):
|
||||
# see gh-23372.
|
||||
df = DataFrame(np.ones((10, 20)))
|
||||
axis = df._get_axis_number(axis_frame)
|
||||
|
||||
if axis == 0:
|
||||
expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)})
|
||||
else:
|
||||
# axis == 1
|
||||
expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10)
|
||||
|
||||
result = df.rolling(3, axis=axis_frame).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_axis_count(self, axis_frame):
|
||||
# see gh-26055
|
||||
df = DataFrame({"x": range(3), "y": range(3)})
|
||||
|
||||
axis = df._get_axis_number(axis_frame)
|
||||
|
||||
if axis in [0, "index"]:
|
||||
expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]})
|
||||
else:
|
||||
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
|
||||
|
||||
result = df.rolling(2, axis=axis_frame).count()
|
||||
tm.assert_frame_equal(result, expected)
|
@@ -0,0 +1,692 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, Series, Timestamp, date_range, to_datetime
|
||||
import pandas.util.testing as tm
|
||||
|
||||
import pandas.tseries.offsets as offsets
|
||||
|
||||
|
||||
class TestRollingTS:
|
||||
|
||||
# rolling time-series friendly
|
||||
# xref GH13327
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.regular = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
).set_index("A")
|
||||
|
||||
self.ragged = DataFrame({"B": range(5)})
|
||||
self.ragged.index = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
def test_doc_string(self):
|
||||
|
||||
df = DataFrame(
|
||||
{"B": [0, 1, 2, np.nan, 4]},
|
||||
index=[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
],
|
||||
)
|
||||
df
|
||||
df.rolling("2s").sum()
|
||||
|
||||
def test_valid(self):
|
||||
|
||||
df = self.regular
|
||||
|
||||
# not a valid freq
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window="foobar")
|
||||
|
||||
# not a datetimelike index
|
||||
with pytest.raises(ValueError):
|
||||
df.reset_index().rolling(window="foobar")
|
||||
|
||||
# non-fixed freqs
|
||||
for freq in ["2MS", offsets.MonthBegin(2)]:
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window=freq)
|
||||
|
||||
for freq in ["1D", offsets.Day(2), "2ms"]:
|
||||
df.rolling(window=freq)
|
||||
|
||||
# non-integer min_periods
|
||||
for minp in [1.0, "foo", np.array([1, 2, 3])]:
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window="1D", min_periods=minp)
|
||||
|
||||
# center is not implemented
|
||||
with pytest.raises(NotImplementedError):
|
||||
df.rolling(window="1D", center=True)
|
||||
|
||||
def test_on(self):
|
||||
|
||||
df = self.regular
|
||||
|
||||
# not a valid column
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window="2s", on="foobar")
|
||||
|
||||
# column is valid
|
||||
df = df.copy()
|
||||
df["C"] = date_range("20130101", periods=len(df))
|
||||
df.rolling(window="2d", on="C").sum()
|
||||
|
||||
# invalid columns
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling(window="2d", on="B")
|
||||
|
||||
# ok even though on non-selected
|
||||
df.rolling(window="2d", on="C").B.sum()
|
||||
|
||||
def test_monotonic_on(self):
|
||||
|
||||
# on/index must be monotonic
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
)
|
||||
|
||||
assert df.A.is_monotonic
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
df = df.set_index("A")
|
||||
assert df.index.is_monotonic
|
||||
df.rolling("2s").sum()
|
||||
|
||||
# non-monotonic
|
||||
df.index = reversed(df.index.tolist())
|
||||
assert not df.index.is_monotonic
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling("2s").sum()
|
||||
|
||||
df = df.reset_index()
|
||||
with pytest.raises(ValueError):
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
def test_frame_on(self):
|
||||
|
||||
df = DataFrame(
|
||||
{"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
|
||||
)
|
||||
|
||||
df["A"] = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
# we are doing simulating using 'on'
|
||||
expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
|
||||
|
||||
result = df.rolling("2s", on="A").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# test as a frame
|
||||
# we should be ignoring the 'on' as an aggregation column
|
||||
# note that the expected is setting, computing, and resetting
|
||||
# so the columns need to be switched compared
|
||||
# to the actual result where they are ordered as in the
|
||||
# original
|
||||
expected = (
|
||||
df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="A")[["B"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_on2(self):
|
||||
|
||||
# using multiple aggregation columns
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [0, 1, 2, 3, 4],
|
||||
"B": [0, 1, 2, np.nan, 4],
|
||||
"C": Index(
|
||||
[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
),
|
||||
},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
expected1 = DataFrame(
|
||||
{"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="C").sum()
|
||||
expected = expected1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([0, 1, 3, np.nan, 4], name="B")
|
||||
result = df.rolling("2s", on="C").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = expected1[["A", "B", "C"]]
|
||||
result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_basic_regular(self):
|
||||
|
||||
df = self.regular.copy()
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="1D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="2D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1).sum()
|
||||
result = df.rolling(window="2D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_min_periods(self):
|
||||
|
||||
# compare for min_periods
|
||||
df = self.regular
|
||||
|
||||
# these slightly different
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed(self):
|
||||
|
||||
# xref GH13965
|
||||
|
||||
df = DataFrame(
|
||||
{"A": [1] * 5},
|
||||
index=[
|
||||
Timestamp("20130101 09:00:01"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:04"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
],
|
||||
)
|
||||
|
||||
# closed must be 'right', 'left', 'both', 'neither'
|
||||
with pytest.raises(ValueError):
|
||||
self.regular.rolling(window="2s", closed="blabla")
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="right").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# default should be 'right'
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 3, 3, 2]
|
||||
result = df.rolling("2s", closed="both").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="left").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
|
||||
result = df.rolling("2s", closed="neither").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_sum(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 3, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s").sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=3).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_mean(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_median(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_quantile(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_std(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.5] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_var(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.25] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_skew(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="3s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_kurt(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="3s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 4 + [-1.2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_count(self):
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 1, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = self.ragged
|
||||
result = df.rolling(window="1s").count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 2, 1, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_regular_min(self):
|
||||
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
|
||||
).set_index("A")
|
||||
result = df.rolling("1s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
|
||||
).set_index("A")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.rolling("2s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling("5s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_min(self):
|
||||
|
||||
df = self.ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 0, 0, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_perf_min(self):
|
||||
|
||||
N = 10000
|
||||
|
||||
dfp = DataFrame(
|
||||
{"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s")
|
||||
)
|
||||
expected = dfp.rolling(2, min_periods=1).min()
|
||||
result = dfp.rolling("2s").min()
|
||||
assert ((result - expected) < 0.01).all().bool()
|
||||
|
||||
expected = dfp.rolling(200, min_periods=1).min()
|
||||
result = dfp.rolling("200s").min()
|
||||
assert ((result - expected) < 0.01).all().bool()
|
||||
|
||||
def test_ragged_max(self):
|
||||
|
||||
df = self.ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_apply(self, raw):
|
||||
|
||||
df = self.ragged
|
||||
|
||||
f = lambda x: 1
|
||||
result = df.rolling(window="1s", min_periods=1).apply(f, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).apply(f, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).apply(f, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_all(self):
|
||||
|
||||
# simple comparison of integer vs time-based windowing
|
||||
df = self.regular * 2
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
for f in [
|
||||
"sum",
|
||||
"mean",
|
||||
"count",
|
||||
"median",
|
||||
"std",
|
||||
"var",
|
||||
"kurt",
|
||||
"skew",
|
||||
"min",
|
||||
"max",
|
||||
]:
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = getattr(er, f)()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.quantile(0.5)
|
||||
expected = er.quantile(0.5)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_all_apply(self, raw):
|
||||
|
||||
df = self.regular * 2
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
result = r.apply(lambda x: 1, raw=raw)
|
||||
expected = er.apply(lambda x: 1, raw=raw)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_all2(self):
|
||||
|
||||
# more sophisticated comparison of integer vs.
|
||||
# time-based windowing
|
||||
df = DataFrame(
|
||||
{"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H")
|
||||
)
|
||||
# in-range data
|
||||
dft = df.between_time("09:00", "16:00")
|
||||
|
||||
r = dft.rolling(window="5H")
|
||||
|
||||
for f in [
|
||||
"sum",
|
||||
"mean",
|
||||
"count",
|
||||
"median",
|
||||
"std",
|
||||
"var",
|
||||
"kurt",
|
||||
"skew",
|
||||
"min",
|
||||
"max",
|
||||
]:
|
||||
|
||||
result = getattr(r, f)()
|
||||
|
||||
# we need to roll the days separately
|
||||
# to compare with a time-based roll
|
||||
# finally groupby-apply will return a multi-index
|
||||
# so we need to drop the day
|
||||
def agg_by_day(x):
|
||||
x = x.between_time("09:00", "16:00")
|
||||
return getattr(x.rolling(5, min_periods=1), f)()
|
||||
|
||||
expected = (
|
||||
df.groupby(df.index.day)
|
||||
.apply(agg_by_day)
|
||||
.reset_index(level=0, drop=True)
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_monotonic(self):
|
||||
|
||||
# GH 15130
|
||||
# we don't need to validate monotonicity when grouping
|
||||
|
||||
data = [
|
||||
["David", "1/1/2015", 100],
|
||||
["David", "1/5/2015", 500],
|
||||
["David", "5/30/2015", 50],
|
||||
["David", "7/25/2015", 50],
|
||||
["Ryan", "1/4/2014", 100],
|
||||
["Ryan", "1/19/2015", 500],
|
||||
["Ryan", "3/31/2016", 50],
|
||||
["Joe", "7/1/2015", 100],
|
||||
["Joe", "9/9/2015", 500],
|
||||
["Joe", "10/15/2015", 50],
|
||||
]
|
||||
|
||||
df = DataFrame(data=data, columns=["name", "date", "amount"])
|
||||
df["date"] = to_datetime(df["date"])
|
||||
|
||||
expected = (
|
||||
df.set_index("date")
|
||||
.groupby("name")
|
||||
.apply(lambda x: x.rolling("180D")["amount"].sum())
|
||||
)
|
||||
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_non_monotonic(self):
|
||||
# GH 13966 (similar to #15130, closed by #15175)
|
||||
|
||||
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1] * 20 + [2] * 12 + [3] * 8,
|
||||
"B": np.concatenate((dates, dates)),
|
||||
"C": np.arange(40),
|
||||
}
|
||||
)
|
||||
|
||||
result = df.groupby("A").rolling("4s", on="B").C.mean()
|
||||
expected = (
|
||||
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df2 = df.sort_values("B")
|
||||
result = df2.groupby("A").rolling("4s", on="B").C.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rolling_cov_offset(self):
|
||||
# GH16058
|
||||
|
||||
idx = date_range("2017-01-01", periods=24, freq="1h")
|
||||
ss = Series(np.arange(len(idx)), index=idx)
|
||||
|
||||
result = ss.rolling("2h").cov()
|
||||
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(2, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
result = ss.rolling("3h").cov()
|
||||
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(3, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
@@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas.core.window as rwindow
|
||||
from pandas.tests.window.common import Base
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
|
||||
class TestWindow(Base):
|
||||
def setup_method(self, method):
|
||||
self._create_data()
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor(self, which):
|
||||
# GH 12669
|
||||
|
||||
o = getattr(self, which)
|
||||
c = o.rolling
|
||||
|
||||
# valid
|
||||
c(win_type="boxcar", window=2, min_periods=1)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=True)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=False)
|
||||
|
||||
# not valid
|
||||
for w in [2.0, "foo", np.array([2])]:
|
||||
with pytest.raises(ValueError):
|
||||
c(win_type="boxcar", window=2, min_periods=w)
|
||||
with pytest.raises(ValueError):
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=w)
|
||||
|
||||
for wt in ["foobar", 1]:
|
||||
with pytest.raises(ValueError):
|
||||
c(win_type=wt, window=2)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("which", ["series", "frame"])
|
||||
def test_constructor_with_win_type(self, which, win_types):
|
||||
# GH 12669
|
||||
o = getattr(self, which)
|
||||
c = o.rolling
|
||||
c(win_type=win_types, window=2)
|
||||
|
||||
@pytest.mark.parametrize("method", ["sum", "mean"])
|
||||
def test_numpy_compat(self, method):
|
||||
# see gh-12811
|
||||
w = rwindow.Window(Series([2, 4, 6]), window=[0, 2])
|
||||
|
||||
msg = "numpy operations are not valid with window objects"
|
||||
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(w, method)(1, 2, 3)
|
||||
with pytest.raises(UnsupportedFunctionCall, match=msg):
|
||||
getattr(w, method)(dtype=np.float64)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("arg", ["median", "var", "std", "kurt", "skew"])
|
||||
def test_agg_function_support(self, arg):
|
||||
df = pd.DataFrame({"A": np.arange(5)})
|
||||
roll = df.rolling(2, win_type="triang")
|
||||
|
||||
msg = "'{arg}' is not a valid function for " "'Window' object".format(arg=arg)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg(arg)
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg([arg])
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg({"A": arg})
|
Reference in New Issue
Block a user