8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,23 @@
from datetime import datetime
import numpy as np
from numpy.random import randn
from pandas import DataFrame, Series, bdate_range
N, K = 100, 10
class Base:
_nan_locs = np.arange(20, 40)
_inf_locs = np.array([])
def _create_data(self):
arr = randn(N)
arr[self._nan_locs] = np.NaN
self.arr = arr
self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
self.series = Series(arr.copy(), index=self.rng)
self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K))

View File

@@ -0,0 +1,49 @@
import pytest
@pytest.fixture(params=[True, False])
def raw(request):
return request.param
@pytest.fixture(
params=[
"triang",
"blackman",
"hamming",
"bartlett",
"bohman",
"blackmanharris",
"nuttall",
"barthann",
]
)
def win_types(request):
return request.param
@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"])
def win_types_special(request):
return request.param
@pytest.fixture(
params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"]
)
def arithmetic_win_operators(request):
return request.param
@pytest.fixture(params=["right", "left", "both", "neither"])
def closed(request):
return request.param
@pytest.fixture(params=[True, False])
def center(request):
return request.param
@pytest.fixture(params=[None, 1])
def min_periods(request):
return request.param

View File

@@ -0,0 +1,367 @@
from collections import OrderedDict
import warnings
from warnings import catch_warnings
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, concat
from pandas.core.base import SpecificationError
from pandas.tests.window.common import Base
import pandas.util.testing as tm
class TestApi(Base):
def setup_method(self, method):
self._create_data()
def test_getitem(self):
r = self.frame.rolling(window=5)
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns)
r = self.frame.rolling(window=5)[1]
assert r._selected_obj.name == self.frame.columns[1]
# technically this is allowed
r = self.frame.rolling(window=5)[1, 3]
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]])
r = self.frame.rolling(window=5)[[1, 3]]
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]])
def test_select_bad_cols(self):
df = DataFrame([[1, 2]], columns=["A", "B"])
g = df.rolling(window=5)
with pytest.raises(KeyError, match="Columns not found: 'C'"):
g[["C"]]
with pytest.raises(KeyError, match="^[^A]+$"):
# A should not be referenced as a bad column...
# will have to rethink regex if you change message!
g[["A", "C"]]
def test_attribute_access(self):
df = DataFrame([[1, 2]], columns=["A", "B"])
r = df.rolling(window=5)
tm.assert_series_equal(r.A.sum(), r["A"].sum())
msg = "'Rolling' object has no attribute 'F'"
with pytest.raises(AttributeError, match=msg):
r.F
def tests_skip_nuisance(self):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
r = df.rolling(window=3)
result = r[["A", "B"]].sum()
expected = DataFrame(
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
columns=list("AB"),
)
tm.assert_frame_equal(result, expected)
def test_skip_sum_object_raises(self):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
r = df.rolling(window=3)
result = r.sum()
expected = DataFrame(
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
columns=list("AB"),
)
tm.assert_frame_equal(result, expected)
def test_agg(self):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
a_mean = r["A"].mean()
a_std = r["A"].std()
a_sum = r["A"].sum()
b_mean = r["B"].mean()
b_std = r["B"].std()
b_sum = r["B"].sum()
result = r.aggregate([np.mean, np.std])
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
tm.assert_frame_equal(result, expected)
result = r.aggregate({"A": np.mean, "B": np.std})
expected = concat([a_mean, b_std], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)
result = r.aggregate({"A": ["mean", "std"]})
expected = concat([a_mean, a_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
tm.assert_frame_equal(result, expected)
result = r["A"].aggregate(["mean", "sum"])
expected = concat([a_mean, a_sum], axis=1)
expected.columns = ["mean", "sum"]
tm.assert_frame_equal(result, expected)
with catch_warnings(record=True):
# using a dict with renaming
warnings.simplefilter("ignore", FutureWarning)
result = r.aggregate({"A": {"mean": "mean", "sum": "sum"}})
expected = concat([a_mean, a_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "sum")])
tm.assert_frame_equal(result, expected, check_like=True)
with catch_warnings(record=True):
warnings.simplefilter("ignore", FutureWarning)
result = r.aggregate(
{
"A": {"mean": "mean", "sum": "sum"},
"B": {"mean2": "mean", "sum2": "sum"},
}
)
expected = concat([a_mean, a_sum, b_mean, b_sum], axis=1)
exp_cols = [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
tm.assert_frame_equal(result, expected, check_like=True)
result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
tm.assert_frame_equal(result, expected, check_like=True)
def test_agg_apply(self, raw):
# passed lambda
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
a_sum = r["A"].sum()
result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
expected = concat([a_sum, rcustom], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)
def test_agg_consistency(self):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
result = r.agg([np.sum, np.mean]).columns
expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]])
tm.assert_index_equal(result, expected)
result = r["A"].agg([np.sum, np.mean]).columns
expected = Index(["sum", "mean"])
tm.assert_index_equal(result, expected)
result = r.agg({"A": [np.sum, np.mean]}).columns
expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
tm.assert_index_equal(result, expected)
def test_agg_nested_dicts(self):
# API change for disallowing these types of nested dicts
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
r = df.rolling(window=3)
msg = r"cannot perform renaming for (r1|r2) with a nested dictionary"
with pytest.raises(SpecificationError, match=msg):
r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
expected = concat(
[r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
)
with catch_warnings(record=True):
warnings.simplefilter("ignore", FutureWarning)
result = r[["A", "B"]].agg(
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
)
tm.assert_frame_equal(result, expected, check_like=True)
with catch_warnings(record=True):
warnings.simplefilter("ignore", FutureWarning)
result = r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
expected.columns = pd.MultiIndex.from_tuples(
[
("A", "ra", "mean"),
("A", "ra", "std"),
("B", "rb", "mean"),
("B", "rb", "std"),
]
)
tm.assert_frame_equal(result, expected, check_like=True)
def test_count_nonnumeric_types(self):
# GH12541
cols = [
"int",
"float",
"string",
"datetime",
"timedelta",
"periods",
"fl_inf",
"fl_nan",
"str_nan",
"dt_nat",
"periods_nat",
]
df = DataFrame(
{
"int": [1, 2, 3],
"float": [4.0, 5.0, 6.0],
"string": list("abc"),
"datetime": pd.date_range("20170101", periods=3),
"timedelta": pd.timedelta_range("1 s", periods=3, freq="s"),
"periods": [
pd.Period("2012-01"),
pd.Period("2012-02"),
pd.Period("2012-03"),
],
"fl_inf": [1.0, 2.0, np.Inf],
"fl_nan": [1.0, 2.0, np.NaN],
"str_nan": ["aa", "bb", np.NaN],
"dt_nat": [
Timestamp("20170101"),
Timestamp("20170203"),
Timestamp(None),
],
"periods_nat": [
pd.Period("2012-01"),
pd.Period("2012-02"),
pd.Period(None),
],
},
columns=cols,
)
expected = DataFrame(
{
"int": [1.0, 2.0, 2.0],
"float": [1.0, 2.0, 2.0],
"string": [1.0, 2.0, 2.0],
"datetime": [1.0, 2.0, 2.0],
"timedelta": [1.0, 2.0, 2.0],
"periods": [1.0, 2.0, 2.0],
"fl_inf": [1.0, 2.0, 2.0],
"fl_nan": [1.0, 2.0, 1.0],
"str_nan": [1.0, 2.0, 1.0],
"dt_nat": [1.0, 2.0, 1.0],
"periods_nat": [1.0, 2.0, 1.0],
},
columns=cols,
)
result = df.rolling(window=2).count()
tm.assert_frame_equal(result, expected)
result = df.rolling(1).count()
expected = df.notna().astype(float)
tm.assert_frame_equal(result, expected)
@td.skip_if_no_scipy
@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
def test_window_with_args(self):
# make sure that we are aggregating window functions correctly with arg
r = Series(np.random.randn(100)).rolling(
window=10, min_periods=1, win_type="gaussian"
)
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
expected.columns = ["<lambda>", "<lambda>"]
result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)])
tm.assert_frame_equal(result, expected)
def a(x):
return x.mean(std=10)
def b(x):
return x.mean(std=0.01)
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
expected.columns = ["a", "b"]
result = r.aggregate([a, b])
tm.assert_frame_equal(result, expected)
def test_preserve_metadata(self):
# GH 10565
s = Series(np.arange(100), name="foo")
s2 = s.rolling(30).sum()
s3 = s.rolling(20).sum()
assert s2.name == "foo"
assert s3.name == "foo"
@pytest.mark.parametrize(
"func,window_size,expected_vals",
[
(
"rolling",
2,
[
[np.nan, np.nan, np.nan, np.nan],
[15.0, 20.0, 25.0, 20.0],
[25.0, 30.0, 35.0, 30.0],
[np.nan, np.nan, np.nan, np.nan],
[20.0, 30.0, 35.0, 30.0],
[35.0, 40.0, 60.0, 40.0],
[60.0, 80.0, 85.0, 80],
],
),
(
"expanding",
None,
[
[10.0, 10.0, 20.0, 20.0],
[15.0, 20.0, 25.0, 20.0],
[20.0, 30.0, 30.0, 20.0],
[10.0, 10.0, 30.0, 30.0],
[20.0, 30.0, 35.0, 30.0],
[26.666667, 40.0, 50.0, 30.0],
[40.0, 80.0, 60.0, 30.0],
],
),
],
)
def test_multiple_agg_funcs(self, func, window_size, expected_vals):
# GH 15072
df = pd.DataFrame(
[
["A", 10, 20],
["A", 20, 30],
["A", 30, 40],
["B", 10, 30],
["B", 30, 40],
["B", 40, 80],
["B", 80, 90],
],
columns=["stock", "low", "high"],
)
f = getattr(df.groupby("stock"), func)
if window_size:
window = f(window_size)
else:
window = f()
index = pd.MultiIndex.from_tuples(
[("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
names=["stock", None],
)
columns = pd.MultiIndex.from_tuples(
[("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
)
expected = pd.DataFrame(expected_vals, index=index, columns=columns)
result = window.agg(
OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"])))
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,242 @@
from itertools import product
import numpy as np
import pytest
from pandas import DataFrame, Series
from pandas.core.base import DataError
import pandas.util.testing as tm
# gh-12373 : rolling functions error on float32 data
# make sure rolling functions works for different dtypes
#
# NOTE that these are yielded tests and so _create_data
# is explicitly called.
#
# further note that we are only checking rolling for fully dtype
# compliance (though both expanding and ewm inherit)
class Dtype:
window = 2
funcs = {
"count": lambda v: v.count(),
"max": lambda v: v.max(),
"min": lambda v: v.min(),
"sum": lambda v: v.sum(),
"mean": lambda v: v.mean(),
"std": lambda v: v.std(),
"var": lambda v: v.var(),
"median": lambda v: v.median(),
}
def get_expects(self):
expects = {
"sr1": {
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
"max": Series([np.nan, 1, 2, 3, 4], dtype="float64"),
"min": Series([np.nan, 0, 1, 2, 3], dtype="float64"),
"sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"),
"mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
"std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"),
"var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"),
"median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
},
"sr2": {
"count": Series([1, 2, 2, 2, 2], dtype="float64"),
"max": Series([np.nan, 10, 8, 6, 4], dtype="float64"),
"min": Series([np.nan, 8, 6, 4, 2], dtype="float64"),
"sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"),
"mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
"std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"),
"var": Series([np.nan, 2, 2, 2, 2], dtype="float64"),
"median": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
},
"sr3": {
"count": Series([1, 2, 2, 1, 1], dtype="float64"),
"max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"),
"min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"),
"sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"),
"mean": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
"std": Series(
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, dtype="float64"
),
"var": Series([np.nan, 0.5, 0.5, np.nan, np.nan], dtype="float64"),
"median": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
},
"df": {
"count": DataFrame(
{0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])},
dtype="float64",
),
"max": DataFrame(
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
dtype="float64",
),
"min": DataFrame(
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
dtype="float64",
),
"sum": DataFrame(
{
0: Series([np.nan, 2, 6, 10, 14]),
1: Series([np.nan, 4, 8, 12, 16]),
},
dtype="float64",
),
"mean": DataFrame(
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
dtype="float64",
),
"std": DataFrame(
{
0: Series([np.nan] + [np.sqrt(2)] * 4),
1: Series([np.nan] + [np.sqrt(2)] * 4),
},
dtype="float64",
),
"var": DataFrame(
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
dtype="float64",
),
"median": DataFrame(
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
dtype="float64",
),
},
}
return expects
def _create_dtype_data(self, dtype):
sr1 = Series(np.arange(5), dtype=dtype)
sr2 = Series(np.arange(10, 0, -2), dtype=dtype)
sr3 = sr1.copy()
sr3[3] = np.NaN
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
data = {"sr1": sr1, "sr2": sr2, "sr3": sr3, "df": df}
return data
def _create_data(self):
self.data = self._create_dtype_data(self.dtype)
self.expects = self.get_expects()
def test_dtypes(self):
self._create_data()
for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
f = self.funcs[f_name]
d = self.data[d_name]
exp = self.expects[d_name][f_name]
self.check_dtypes(f, f_name, d, d_name, exp)
def check_dtypes(self, f, f_name, d, d_name, exp):
roll = d.rolling(window=self.window)
result = f(roll)
tm.assert_almost_equal(result, exp)
class TestDtype_object(Dtype):
dtype = object
class Dtype_integer(Dtype):
pass
class TestDtype_int8(Dtype_integer):
dtype = np.int8
class TestDtype_int16(Dtype_integer):
dtype = np.int16
class TestDtype_int32(Dtype_integer):
dtype = np.int32
class TestDtype_int64(Dtype_integer):
dtype = np.int64
class Dtype_uinteger(Dtype):
pass
class TestDtype_uint8(Dtype_uinteger):
dtype = np.uint8
class TestDtype_uint16(Dtype_uinteger):
dtype = np.uint16
class TestDtype_uint32(Dtype_uinteger):
dtype = np.uint32
class TestDtype_uint64(Dtype_uinteger):
dtype = np.uint64
class Dtype_float(Dtype):
pass
class TestDtype_float16(Dtype_float):
dtype = np.float16
class TestDtype_float32(Dtype_float):
dtype = np.float32
class TestDtype_float64(Dtype_float):
dtype = np.float64
class TestDtype_category(Dtype):
dtype = "category"
include_df = False
def _create_dtype_data(self, dtype):
sr1 = Series(range(5), dtype=dtype)
sr2 = Series(range(10, 0, -2), dtype=dtype)
data = {"sr1": sr1, "sr2": sr2}
return data
class DatetimeLike(Dtype):
def check_dtypes(self, f, f_name, d, d_name, exp):
roll = d.rolling(window=self.window)
if f_name == "count":
result = f(roll)
tm.assert_almost_equal(result, exp)
else:
with pytest.raises(DataError):
f(roll)
class TestDtype_timedelta(DatetimeLike):
dtype = np.dtype("m8[ns]")
class TestDtype_datetime(DatetimeLike):
dtype = np.dtype("M8[ns]")
class TestDtype_datetime64UTC(DatetimeLike):
dtype = "datetime64[ns, UTC]"
def _create_data(self):
pytest.skip(
"direct creation of extension dtype "
"datetime64[ns, UTC] is not supported ATM"
)

View File

@@ -0,0 +1,70 @@
import numpy as np
import pytest
from pandas.errors import UnsupportedFunctionCall
from pandas import DataFrame, Series
import pandas.core.window as rwindow
from pandas.tests.window.common import Base
class TestEWM(Base):
def setup_method(self, method):
self._create_data()
def test_doc_string(self):
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
df
df.ewm(com=0.5).mean()
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor(self, which):
o = getattr(self, which)
c = o.ewm
# valid
c(com=0.5)
c(span=1.5)
c(alpha=0.5)
c(halflife=0.75)
c(com=0.5, span=None)
c(alpha=0.5, com=None)
c(halflife=0.75, alpha=None)
# not valid: mutually exclusive
with pytest.raises(ValueError):
c(com=0.5, alpha=0.5)
with pytest.raises(ValueError):
c(span=1.5, halflife=0.75)
with pytest.raises(ValueError):
c(alpha=0.5, span=1.5)
# not valid: com < 0
with pytest.raises(ValueError):
c(com=-0.5)
# not valid: span < 1
with pytest.raises(ValueError):
c(span=0.5)
# not valid: halflife <= 0
with pytest.raises(ValueError):
c(halflife=0)
# not valid: alpha <= 0 or alpha > 1
for alpha in (-0.5, 1.5):
with pytest.raises(ValueError):
c(alpha=alpha)
@pytest.mark.parametrize("method", ["std", "mean", "var"])
def test_numpy_compat(self, method):
# see gh-12811
e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5)
msg = "numpy operations are not valid with window objects"
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(e, method)(1, 2, 3)
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(e, method)(dtype=np.float64)

View File

@@ -0,0 +1,115 @@
import numpy as np
import pytest
from pandas.errors import UnsupportedFunctionCall
import pandas as pd
from pandas import DataFrame, Series
import pandas.core.window as rwindow
from pandas.tests.window.common import Base
import pandas.util.testing as tm
class TestExpanding(Base):
def setup_method(self, method):
self._create_data()
def test_doc_string(self):
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
df
df.expanding(2).sum()
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor(self, which):
# GH 12669
o = getattr(self, which)
c = o.expanding
# valid
c(min_periods=1)
c(min_periods=1, center=True)
c(min_periods=1, center=False)
# not valid
for w in [2.0, "foo", np.array([2])]:
with pytest.raises(ValueError):
c(min_periods=w)
with pytest.raises(ValueError):
c(min_periods=1, center=w)
@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"])
def test_numpy_compat(self, method):
# see gh-12811
e = rwindow.Expanding(Series([2, 4, 6]), window=2)
msg = "numpy operations are not valid with window objects"
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(e, method)(1, 2, 3)
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(e, method)(dtype=np.float64)
@pytest.mark.parametrize(
"expander",
[
1,
pytest.param(
"ls",
marks=pytest.mark.xfail(
reason="GH#16425 expanding with offset not supported"
),
),
],
)
def test_empty_df_expanding(self, expander):
# GH 15819 Verifies that datetime and integer expanding windows can be
# applied to empty DataFrames
expected = DataFrame()
result = DataFrame().expanding(expander).sum()
tm.assert_frame_equal(result, expected)
# Verifies that datetime and integer expanding windows can be applied
# to empty DataFrames with datetime index
expected = DataFrame(index=pd.DatetimeIndex([]))
result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum()
tm.assert_frame_equal(result, expected)
def test_missing_minp_zero(self):
# https://github.com/pandas-dev/pandas/pull/18921
# minp=0
x = pd.Series([np.nan])
result = x.expanding(min_periods=0).sum()
expected = pd.Series([0.0])
tm.assert_series_equal(result, expected)
# minp=1
result = x.expanding(min_periods=1).sum()
expected = pd.Series([np.nan])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.expanding(2))
def test_expanding_axis(self, axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
axis = df._get_axis_number(axis_frame)
if axis == 0:
expected = DataFrame(
{i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
)
else:
# axis == 1
expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
result = df.expanding(3, axis=axis_frame).sum()
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,176 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
import pandas.util.testing as tm
class TestGrouperGrouping:
def setup_method(self, method):
self.series = Series(np.arange(10))
self.frame = DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
def test_mutated(self):
msg = r"group\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
self.frame.groupby("A", foo=1)
g = self.frame.groupby("A")
assert not g.mutated
g = self.frame.groupby("A", mutated=True)
assert g.mutated
def test_getitem(self):
g = self.frame.groupby("A")
g_mutated = self.frame.groupby("A", mutated=True)
expected = g_mutated.B.apply(lambda x: x.rolling(2).mean())
result = g.rolling(2).mean().B
tm.assert_series_equal(result, expected)
result = g.rolling(2).B.mean()
tm.assert_series_equal(result, expected)
result = g.B.rolling(2).mean()
tm.assert_series_equal(result, expected)
result = self.frame.B.groupby(self.frame.A).rolling(2).mean()
tm.assert_series_equal(result, expected)
def test_getitem_multiple(self):
# GH 13174
g = self.frame.groupby("A")
r = g.rolling(2)
g_mutated = self.frame.groupby("A", mutated=True)
expected = g_mutated.B.apply(lambda x: x.rolling(2).count())
result = r.B.count()
tm.assert_series_equal(result, expected)
result = r.B.count()
tm.assert_series_equal(result, expected)
def test_rolling(self):
g = self.frame.groupby("A")
r = g.rolling(window=4)
for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.rolling(4), f)())
tm.assert_frame_equal(result, expected)
for f in ["std", "var"]:
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
tm.assert_frame_equal(result, expected)
result = r.quantile(0.5)
expected = g.apply(lambda x: x.rolling(4).quantile(0.5))
tm.assert_frame_equal(result, expected)
def test_rolling_corr_cov(self):
g = self.frame.groupby("A")
r = g.rolling(window=4)
for f in ["corr", "cov"]:
result = getattr(r, f)(self.frame)
def func(x):
return getattr(x.rolling(4), f)(self.frame)
expected = g.apply(func)
tm.assert_frame_equal(result, expected)
result = getattr(r.B, f)(pairwise=True)
def func(x):
return getattr(x.B.rolling(4), f)(pairwise=True)
expected = g.apply(func)
tm.assert_series_equal(result, expected)
def test_rolling_apply(self, raw):
g = self.frame.groupby("A")
r = g.rolling(window=4)
# reduction
result = r.apply(lambda x: x.sum(), raw=raw)
expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
tm.assert_frame_equal(result, expected)
def test_rolling_apply_mutability(self):
# GH 14013
df = pd.DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6})
g = df.groupby("A")
mi = pd.MultiIndex.from_tuples(
[("bar", 3), ("bar", 4), ("bar", 5), ("foo", 0), ("foo", 1), ("foo", 2)]
)
mi.names = ["A", None]
# Grouped column should not be a part of the output
expected = pd.DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi)
result = g.rolling(window=2).sum()
tm.assert_frame_equal(result, expected)
# Call an arbitrary function on the groupby
g.sum()
# Make sure nothing has been mutated
result = g.rolling(window=2).sum()
tm.assert_frame_equal(result, expected)
def test_expanding(self):
g = self.frame.groupby("A")
r = g.expanding()
for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.expanding(), f)())
tm.assert_frame_equal(result, expected)
for f in ["std", "var"]:
result = getattr(r, f)(ddof=0)
expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
tm.assert_frame_equal(result, expected)
result = r.quantile(0.5)
expected = g.apply(lambda x: x.expanding().quantile(0.5))
tm.assert_frame_equal(result, expected)
def test_expanding_corr_cov(self):
g = self.frame.groupby("A")
r = g.expanding()
for f in ["corr", "cov"]:
result = getattr(r, f)(self.frame)
def func(x):
return getattr(x.expanding(), f)(self.frame)
expected = g.apply(func)
tm.assert_frame_equal(result, expected)
result = getattr(r.B, f)(pairwise=True)
def func(x):
return getattr(x.B.expanding(), f)(pairwise=True)
expected = g.apply(func)
tm.assert_series_equal(result, expected)
def test_expanding_apply(self, raw):
g = self.frame.groupby("A")
r = g.expanding()
# reduction
result = r.apply(lambda x: x.sum(), raw=raw)
expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,183 @@
import warnings
import pytest
from pandas import DataFrame, Series
from pandas.core.sorting import safe_sort
import pandas.util.testing as tm
class TestPairwise:
# GH 7738
df1s = [
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]),
DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]),
DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]),
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]),
]
df2 = DataFrame(
[[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]],
columns=["Y", "Z", "X"],
)
s = Series([1, 1, 3, 8])
def compare(self, result, expected):
# since we have sorted the results
# we can only compare non-nans
result = result.dropna().values
expected = expected.dropna().values
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
def test_no_flex(self, f):
# DataFrame methods (which do not call _flex_binary_moment())
results = [f(df) for df in self.df1s]
for (df, result) in zip(self.df1s, results):
tm.assert_index_equal(result.index, df.columns)
tm.assert_index_equal(result.columns, df.columns)
for i, result in enumerate(results):
if i > 0:
self.compare(result, results[0])
@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().cov(pairwise=True),
lambda x: x.expanding().corr(pairwise=True),
lambda x: x.rolling(window=3).cov(pairwise=True),
lambda x: x.rolling(window=3).corr(pairwise=True),
lambda x: x.ewm(com=3).cov(pairwise=True),
lambda x: x.ewm(com=3).corr(pairwise=True),
],
)
def test_pairwise_with_self(self, f):
# DataFrame with itself, pairwise=True
# note that we may construct the 1st level of the MI
# in a non-monotonic way, so compare accordingly
results = []
for i, df in enumerate(self.df1s):
result = f(df)
tm.assert_index_equal(result.index.levels[0], df.index, check_names=False)
tm.assert_numpy_array_equal(
safe_sort(result.index.levels[1]), safe_sort(df.columns.unique())
)
tm.assert_index_equal(result.columns, df.columns)
results.append(df)
for i, result in enumerate(results):
if i > 0:
self.compare(result, results[0])
@pytest.mark.parametrize(
"f",
[
lambda x: x.expanding().cov(pairwise=False),
lambda x: x.expanding().corr(pairwise=False),
lambda x: x.rolling(window=3).cov(pairwise=False),
lambda x: x.rolling(window=3).corr(pairwise=False),
lambda x: x.ewm(com=3).cov(pairwise=False),
lambda x: x.ewm(com=3).corr(pairwise=False),
],
)
def test_no_pairwise_with_self(self, f):
# DataFrame with itself, pairwise=False
results = [f(df) for df in self.df1s]
for (df, result) in zip(self.df1s, results):
tm.assert_index_equal(result.index, df.index)
tm.assert_index_equal(result.columns, df.columns)
for i, result in enumerate(results):
if i > 0:
self.compare(result, results[0])
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y, pairwise=True),
lambda x, y: x.expanding().corr(y, pairwise=True),
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
lambda x, y: x.ewm(com=3).corr(y, pairwise=True),
],
)
def test_pairwise_with_other(self, f):
# DataFrame with another DataFrame, pairwise=True
results = [f(df, self.df2) for df in self.df1s]
for (df, result) in zip(self.df1s, results):
tm.assert_index_equal(result.index.levels[0], df.index, check_names=False)
tm.assert_numpy_array_equal(
safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique())
)
for i, result in enumerate(results):
if i > 0:
self.compare(result, results[0])
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y, pairwise=False),
lambda x, y: x.expanding().corr(y, pairwise=False),
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
lambda x, y: x.ewm(com=3).corr(y, pairwise=False),
],
)
def test_no_pairwise_with_other(self, f):
# DataFrame with another DataFrame, pairwise=False
results = [
f(df, self.df2) if df.columns.is_unique else None for df in self.df1s
]
for (df, result) in zip(self.df1s, results):
if result is not None:
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", RuntimeWarning)
# we can have int and str columns
expected_index = df.index.union(self.df2.index)
expected_columns = df.columns.union(self.df2.columns)
tm.assert_index_equal(result.index, expected_index)
tm.assert_index_equal(result.columns, expected_columns)
else:
with pytest.raises(ValueError, match="'arg1' columns are not unique"):
f(df, self.df2)
with pytest.raises(ValueError, match="'arg2' columns are not unique"):
f(self.df2, df)
@pytest.mark.parametrize(
"f",
[
lambda x, y: x.expanding().cov(y),
lambda x, y: x.expanding().corr(y),
lambda x, y: x.rolling(window=3).cov(y),
lambda x, y: x.rolling(window=3).corr(y),
lambda x, y: x.ewm(com=3).cov(y),
lambda x, y: x.ewm(com=3).corr(y),
],
)
def test_pairwise_with_series(self, f):
# DataFrame with a Series
results = [f(df, self.s) for df in self.df1s] + [
f(self.s, df) for df in self.df1s
]
for (df, result) in zip(self.df1s, results):
tm.assert_index_equal(result.index, df.index)
tm.assert_index_equal(result.columns, df.columns)
for i, result in enumerate(results):
if i > 0:
self.compare(result, results[0])

View File

@@ -0,0 +1,328 @@
from datetime import timedelta
import numpy as np
import pytest
from pandas.errors import UnsupportedFunctionCall
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DataFrame, Series
import pandas.core.window as rwindow
from pandas.tests.window.common import Base
import pandas.util.testing as tm
class TestRolling(Base):
def setup_method(self, method):
self._create_data()
def test_doc_string(self):
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
df
df.rolling(2).sum()
df.rolling(2, min_periods=1).sum()
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor(self, which):
# GH 12669
o = getattr(self, which)
c = o.rolling
# valid
c(window=2)
c(window=2, min_periods=1)
c(window=2, min_periods=1, center=True)
c(window=2, min_periods=1, center=False)
# GH 13383
with pytest.raises(ValueError):
c(0)
c(-1)
# not valid
for w in [2.0, "foo", np.array([2])]:
with pytest.raises(ValueError):
c(window=w)
with pytest.raises(ValueError):
c(window=2, min_periods=w)
with pytest.raises(ValueError):
c(window=2, min_periods=1, center=w)
@td.skip_if_no_scipy
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor_with_win_type(self, which):
# GH 13383
o = getattr(self, which)
c = o.rolling
with pytest.raises(ValueError):
c(-1, win_type="boxcar")
@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)])
def test_constructor_with_timedelta_window(self, window):
# GH 15440
n = 10
df = DataFrame(
{"value": np.arange(n)},
index=pd.date_range("2015-12-24", periods=n, freq="D"),
)
expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3))
result = df.rolling(window=window).sum()
expected = DataFrame(
{"value": expected_data},
index=pd.date_range("2015-12-24", periods=n, freq="D"),
)
tm.assert_frame_equal(result, expected)
expected = df.rolling("3D").sum()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"])
def test_constructor_timedelta_window_and_minperiods(self, window, raw):
# GH 15305
n = 10
df = DataFrame(
{"value": np.arange(n)},
index=pd.date_range("2017-08-08", periods=n, freq="D"),
)
expected = DataFrame(
{"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))},
index=pd.date_range("2017-08-08", periods=n, freq="D"),
)
result_roll_sum = df.rolling(window=window, min_periods=2).sum()
result_roll_generic = df.rolling(window=window, min_periods=2).apply(
sum, raw=raw
)
tm.assert_frame_equal(result_roll_sum, expected)
tm.assert_frame_equal(result_roll_generic, expected)
@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"])
def test_numpy_compat(self, method):
# see gh-12811
r = rwindow.Rolling(Series([2, 4, 6]), window=2)
msg = "numpy operations are not valid with window objects"
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(r, method)(1, 2, 3)
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(r, method)(dtype=np.float64)
def test_closed(self):
df = DataFrame({"A": [0, 1, 2, 3, 4]})
# closed only allowed for datetimelike
with pytest.raises(ValueError):
df.rolling(window=3, closed="neither")
@pytest.mark.parametrize("closed", ["neither", "left"])
def test_closed_empty(self, closed, arithmetic_win_operators):
# GH 26005
func_name = arithmetic_win_operators
ser = pd.Series(
data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D")
)
roll = ser.rolling("1D", closed=closed)
result = getattr(roll, func_name)()
expected = pd.Series([np.nan] * 5, index=ser.index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("func", ["min", "max"])
def test_closed_one_entry(self, func):
# GH24718
ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1))
result = getattr(ser.rolling("10D", closed="left"), func)()
tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index))
@pytest.mark.parametrize("func", ["min", "max"])
def test_closed_one_entry_groupby(self, func):
# GH24718
ser = pd.DataFrame(
data={"A": [1, 1, 2], "B": [3, 2, 1]},
index=pd.date_range("2000", periods=3),
)
result = getattr(
ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func
)()
exp_idx = pd.MultiIndex.from_arrays(
arrays=[[1, 1, 2], ser.index], names=("A", None)
)
expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("input_dtype", ["int", "float"])
@pytest.mark.parametrize(
"func,closed,expected",
[
("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]),
("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]),
("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
],
)
def test_closed_min_max_datetime(self, input_dtype, func, closed, expected):
# see gh-21704
ser = pd.Series(
data=np.arange(10).astype(input_dtype),
index=pd.date_range("2000", periods=10),
)
result = getattr(ser.rolling("3D", closed=closed), func)()
expected = pd.Series(expected, index=ser.index)
tm.assert_series_equal(result, expected)
def test_closed_uneven(self):
# see gh-21704
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
# uneven
ser = ser.drop(index=ser.index[[1, 5]])
result = ser.rolling("3D", closed="left").min()
expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"func,closed,expected",
[
("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]),
("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]),
("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]),
("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]),
("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]),
("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]),
("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]),
],
)
def test_closed_min_max_minp(self, func, closed, expected):
# see gh-21704
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
ser[ser.index[-3:]] = np.nan
result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)()
expected = pd.Series(expected, index=ser.index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"closed,expected",
[
("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]),
("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]),
],
)
def test_closed_median_quantile(self, closed, expected):
# GH 26005
ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
roll = ser.rolling("3D", closed=closed)
expected = pd.Series(expected, index=ser.index)
result = roll.median()
tm.assert_series_equal(result, expected)
result = roll.quantile(0.5)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("roller", ["1s", 1])
def tests_empty_df_rolling(self, roller):
# GH 15819 Verifies that datetime and integer rolling windows can be
# applied to empty DataFrames
expected = DataFrame()
result = DataFrame().rolling(roller).sum()
tm.assert_frame_equal(result, expected)
# Verifies that datetime and integer rolling windows can be applied to
# empty DataFrames with datetime index
expected = DataFrame(index=pd.DatetimeIndex([]))
result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum()
tm.assert_frame_equal(result, expected)
def test_empty_window_median_quantile(self):
# GH 26005
expected = pd.Series([np.nan, np.nan, np.nan])
roll = pd.Series(np.arange(3)).rolling(0)
result = roll.median()
tm.assert_series_equal(result, expected)
result = roll.quantile(0.1)
tm.assert_series_equal(result, expected)
def test_missing_minp_zero(self):
# https://github.com/pandas-dev/pandas/pull/18921
# minp=0
x = pd.Series([np.nan])
result = x.rolling(1, min_periods=0).sum()
expected = pd.Series([0.0])
tm.assert_series_equal(result, expected)
# minp=1
result = x.rolling(1, min_periods=1).sum()
expected = pd.Series([np.nan])
tm.assert_series_equal(result, expected)
def test_missing_minp_zero_variable(self):
# https://github.com/pandas-dev/pandas/pull/18921
x = pd.Series(
[np.nan] * 4,
index=pd.DatetimeIndex(
["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]
),
)
result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum()
expected = pd.Series(0.0, index=x.index)
tm.assert_series_equal(result, expected)
def test_multi_index_names(self):
# GH 16789, 16825
cols = pd.MultiIndex.from_product(
[["A", "B"], ["C", "D", "E"]], names=["1", "2"]
)
df = DataFrame(np.ones((10, 6)), columns=cols)
result = df.rolling(3).cov()
tm.assert_index_equal(result.columns, df.columns)
assert result.index.names == [None, "1", "2"]
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.rolling(2))
def test_rolling_axis_sum(self, axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
axis = df._get_axis_number(axis_frame)
if axis == 0:
expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)})
else:
# axis == 1
expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10)
result = df.rolling(3, axis=axis_frame).sum()
tm.assert_frame_equal(result, expected)
def test_rolling_axis_count(self, axis_frame):
# see gh-26055
df = DataFrame({"x": range(3), "y": range(3)})
axis = df._get_axis_number(axis_frame)
if axis in [0, "index"]:
expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]})
else:
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
result = df.rolling(2, axis=axis_frame).count()
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,692 @@
import numpy as np
import pytest
from pandas import DataFrame, Index, Series, Timestamp, date_range, to_datetime
import pandas.util.testing as tm
import pandas.tseries.offsets as offsets
class TestRollingTS:
# rolling time-series friendly
# xref GH13327
def setup_method(self, method):
self.regular = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
).set_index("A")
self.ragged = DataFrame({"B": range(5)})
self.ragged.index = [
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
def test_doc_string(self):
df = DataFrame(
{"B": [0, 1, 2, np.nan, 4]},
index=[
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
],
)
df
df.rolling("2s").sum()
def test_valid(self):
df = self.regular
# not a valid freq
with pytest.raises(ValueError):
df.rolling(window="foobar")
# not a datetimelike index
with pytest.raises(ValueError):
df.reset_index().rolling(window="foobar")
# non-fixed freqs
for freq in ["2MS", offsets.MonthBegin(2)]:
with pytest.raises(ValueError):
df.rolling(window=freq)
for freq in ["1D", offsets.Day(2), "2ms"]:
df.rolling(window=freq)
# non-integer min_periods
for minp in [1.0, "foo", np.array([1, 2, 3])]:
with pytest.raises(ValueError):
df.rolling(window="1D", min_periods=minp)
# center is not implemented
with pytest.raises(NotImplementedError):
df.rolling(window="1D", center=True)
def test_on(self):
df = self.regular
# not a valid column
with pytest.raises(ValueError):
df.rolling(window="2s", on="foobar")
# column is valid
df = df.copy()
df["C"] = date_range("20130101", periods=len(df))
df.rolling(window="2d", on="C").sum()
# invalid columns
with pytest.raises(ValueError):
df.rolling(window="2d", on="B")
# ok even though on non-selected
df.rolling(window="2d", on="C").B.sum()
def test_monotonic_on(self):
# on/index must be monotonic
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
)
assert df.A.is_monotonic
df.rolling("2s", on="A").sum()
df = df.set_index("A")
assert df.index.is_monotonic
df.rolling("2s").sum()
# non-monotonic
df.index = reversed(df.index.tolist())
assert not df.index.is_monotonic
with pytest.raises(ValueError):
df.rolling("2s").sum()
df = df.reset_index()
with pytest.raises(ValueError):
df.rolling("2s", on="A").sum()
def test_frame_on(self):
df = DataFrame(
{"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
)
df["A"] = [
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
# we are doing simulating using 'on'
expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
result = df.rolling("2s", on="A").B.sum()
tm.assert_series_equal(result, expected)
# test as a frame
# we should be ignoring the 'on' as an aggregation column
# note that the expected is setting, computing, and resetting
# so the columns need to be switched compared
# to the actual result where they are ordered as in the
# original
expected = (
df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
)
result = df.rolling("2s", on="A")[["B"]].sum()
tm.assert_frame_equal(result, expected)
def test_frame_on2(self):
# using multiple aggregation columns
df = DataFrame(
{
"A": [0, 1, 2, 3, 4],
"B": [0, 1, 2, np.nan, 4],
"C": Index(
[
Timestamp("20130101 09:00:00"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:05"),
Timestamp("20130101 09:00:06"),
]
),
},
columns=["A", "C", "B"],
)
expected1 = DataFrame(
{"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
columns=["A", "C", "B"],
)
result = df.rolling("2s", on="C").sum()
expected = expected1
tm.assert_frame_equal(result, expected)
expected = Series([0, 1, 3, np.nan, 4], name="B")
result = df.rolling("2s", on="C").B.sum()
tm.assert_series_equal(result, expected)
expected = expected1[["A", "B", "C"]]
result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
tm.assert_frame_equal(result, expected)
def test_basic_regular(self):
df = self.regular.copy()
df.index = date_range("20130101", periods=5, freq="D")
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="1D").sum()
tm.assert_frame_equal(result, expected)
df.index = date_range("20130101", periods=5, freq="2D")
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="2D", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(window=1, min_periods=1).sum()
result = df.rolling(window="2D", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(window=1).sum()
result = df.rolling(window="2D").sum()
tm.assert_frame_equal(result, expected)
def test_min_periods(self):
# compare for min_periods
df = self.regular
# these slightly different
expected = df.rolling(2, min_periods=1).sum()
result = df.rolling("2s").sum()
tm.assert_frame_equal(result, expected)
expected = df.rolling(2, min_periods=1).sum()
result = df.rolling("2s", min_periods=1).sum()
tm.assert_frame_equal(result, expected)
def test_closed(self):
# xref GH13965
df = DataFrame(
{"A": [1] * 5},
index=[
Timestamp("20130101 09:00:01"),
Timestamp("20130101 09:00:02"),
Timestamp("20130101 09:00:03"),
Timestamp("20130101 09:00:04"),
Timestamp("20130101 09:00:06"),
],
)
# closed must be 'right', 'left', 'both', 'neither'
with pytest.raises(ValueError):
self.regular.rolling(window="2s", closed="blabla")
expected = df.copy()
expected["A"] = [1.0, 2, 2, 2, 1]
result = df.rolling("2s", closed="right").sum()
tm.assert_frame_equal(result, expected)
# default should be 'right'
result = df.rolling("2s").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [1.0, 2, 3, 3, 2]
result = df.rolling("2s", closed="both").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [np.nan, 1.0, 2, 2, 1]
result = df.rolling("2s", closed="left").sum()
tm.assert_frame_equal(result, expected)
expected = df.copy()
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
result = df.rolling("2s", closed="neither").sum()
tm.assert_frame_equal(result, expected)
def test_ragged_sum(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 3, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=2).sum()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 5, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s").sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 5, 7]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="4s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 6, 9]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="4s", min_periods=3).sum()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 3, 6, 9]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).sum()
expected = df.copy()
expected["B"] = [0.0, 1, 3, 6, 10]
tm.assert_frame_equal(result, expected)
def test_ragged_mean(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).mean()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).mean()
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_median(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).median()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).median()
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_quantile(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).quantile(0.5)
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).quantile(0.5)
expected = df.copy()
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
tm.assert_frame_equal(result, expected)
def test_ragged_std(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).std(ddof=0)
expected = df.copy()
expected["B"] = [0.0] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="1s", min_periods=1).std(ddof=1)
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).std(ddof=0)
expected = df.copy()
expected["B"] = [0.0] + [0.5] * 4
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).std(ddof=1)
expected = df.copy()
expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
tm.assert_frame_equal(result, expected)
def test_ragged_var(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).var(ddof=0)
expected = df.copy()
expected["B"] = [0.0] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="1s", min_periods=1).var(ddof=1)
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="3s", min_periods=1).var(ddof=0)
expected = df.copy()
expected["B"] = [0.0] + [0.25] * 4
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).var(ddof=1)
expected = df.copy()
expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
tm.assert_frame_equal(result, expected)
def test_ragged_skew(self):
df = self.ragged
result = df.rolling(window="3s", min_periods=1).skew()
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).skew()
expected = df.copy()
expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
tm.assert_frame_equal(result, expected)
def test_ragged_kurt(self):
df = self.ragged
result = df.rolling(window="3s", min_periods=1).kurt()
expected = df.copy()
expected["B"] = [np.nan] * 5
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).kurt()
expected = df.copy()
expected["B"] = [np.nan] * 4 + [-1.2]
tm.assert_frame_equal(result, expected)
def test_ragged_count(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).count()
expected = df.copy()
expected["B"] = [1.0, 1, 1, 1, 1]
tm.assert_frame_equal(result, expected)
df = self.ragged
result = df.rolling(window="1s").count()
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).count()
expected = df.copy()
expected["B"] = [1.0, 1, 2, 1, 2]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=2).count()
expected = df.copy()
expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
tm.assert_frame_equal(result, expected)
def test_regular_min(self):
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
).set_index("A")
result = df.rolling("1s").min()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
df = DataFrame(
{"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
).set_index("A")
tm.assert_frame_equal(result, expected)
result = df.rolling("2s").min()
expected = df.copy()
expected["B"] = [5.0, 4, 3, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling("5s").min()
expected = df.copy()
expected["B"] = [5.0, 4, 3, 3, 3]
tm.assert_frame_equal(result, expected)
def test_ragged_min(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 1, 1, 3, 3]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).min()
expected = df.copy()
expected["B"] = [0.0, 0, 0, 1, 1]
tm.assert_frame_equal(result, expected)
def test_perf_min(self):
N = 10000
dfp = DataFrame(
{"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s")
)
expected = dfp.rolling(2, min_periods=1).min()
result = dfp.rolling("2s").min()
assert ((result - expected) < 0.01).all().bool()
expected = dfp.rolling(200, min_periods=1).min()
result = dfp.rolling("200s").min()
assert ((result - expected) < 0.01).all().bool()
def test_ragged_max(self):
df = self.ragged
result = df.rolling(window="1s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).max()
expected = df.copy()
expected["B"] = [0.0, 1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
def test_ragged_apply(self, raw):
df = self.ragged
f = lambda x: 1
result = df.rolling(window="1s", min_periods=1).apply(f, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
result = df.rolling(window="2s", min_periods=1).apply(f, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
result = df.rolling(window="5s", min_periods=1).apply(f, raw=raw)
expected = df.copy()
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
def test_all(self):
# simple comparison of integer vs time-based windowing
df = self.regular * 2
er = df.rolling(window=1)
r = df.rolling(window="1s")
for f in [
"sum",
"mean",
"count",
"median",
"std",
"var",
"kurt",
"skew",
"min",
"max",
]:
result = getattr(r, f)()
expected = getattr(er, f)()
tm.assert_frame_equal(result, expected)
result = r.quantile(0.5)
expected = er.quantile(0.5)
tm.assert_frame_equal(result, expected)
def test_all_apply(self, raw):
df = self.regular * 2
er = df.rolling(window=1)
r = df.rolling(window="1s")
result = r.apply(lambda x: 1, raw=raw)
expected = er.apply(lambda x: 1, raw=raw)
tm.assert_frame_equal(result, expected)
def test_all2(self):
# more sophisticated comparison of integer vs.
# time-based windowing
df = DataFrame(
{"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H")
)
# in-range data
dft = df.between_time("09:00", "16:00")
r = dft.rolling(window="5H")
for f in [
"sum",
"mean",
"count",
"median",
"std",
"var",
"kurt",
"skew",
"min",
"max",
]:
result = getattr(r, f)()
# we need to roll the days separately
# to compare with a time-based roll
# finally groupby-apply will return a multi-index
# so we need to drop the day
def agg_by_day(x):
x = x.between_time("09:00", "16:00")
return getattr(x.rolling(5, min_periods=1), f)()
expected = (
df.groupby(df.index.day)
.apply(agg_by_day)
.reset_index(level=0, drop=True)
)
tm.assert_frame_equal(result, expected)
def test_groupby_monotonic(self):
# GH 15130
# we don't need to validate monotonicity when grouping
data = [
["David", "1/1/2015", 100],
["David", "1/5/2015", 500],
["David", "5/30/2015", 50],
["David", "7/25/2015", 50],
["Ryan", "1/4/2014", 100],
["Ryan", "1/19/2015", 500],
["Ryan", "3/31/2016", 50],
["Joe", "7/1/2015", 100],
["Joe", "9/9/2015", 500],
["Joe", "10/15/2015", 50],
]
df = DataFrame(data=data, columns=["name", "date", "amount"])
df["date"] = to_datetime(df["date"])
expected = (
df.set_index("date")
.groupby("name")
.apply(lambda x: x.rolling("180D")["amount"].sum())
)
result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
tm.assert_series_equal(result, expected)
def test_non_monotonic(self):
# GH 13966 (similar to #15130, closed by #15175)
dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
df = DataFrame(
{
"A": [1] * 20 + [2] * 12 + [3] * 8,
"B": np.concatenate((dates, dates)),
"C": np.arange(40),
}
)
result = df.groupby("A").rolling("4s", on="B").C.mean()
expected = (
df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
)
tm.assert_series_equal(result, expected)
df2 = df.sort_values("B")
result = df2.groupby("A").rolling("4s", on="B").C.mean()
tm.assert_series_equal(result, expected)
def test_rolling_cov_offset(self):
# GH16058
idx = date_range("2017-01-01", periods=24, freq="1h")
ss = Series(np.arange(len(idx)), index=idx)
result = ss.rolling("2h").cov()
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(2, min_periods=1).cov()
tm.assert_series_equal(result, expected2)
result = ss.rolling("3h").cov()
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
tm.assert_series_equal(result, expected)
expected2 = ss.rolling(3, min_periods=1).cov()
tm.assert_series_equal(result, expected2)

View File

@@ -0,0 +1,76 @@
import numpy as np
import pytest
from pandas.errors import UnsupportedFunctionCall
import pandas.util._test_decorators as td
import pandas as pd
from pandas import Series
import pandas.core.window as rwindow
from pandas.tests.window.common import Base
@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestWindow(Base):
def setup_method(self, method):
self._create_data()
@td.skip_if_no_scipy
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor(self, which):
# GH 12669
o = getattr(self, which)
c = o.rolling
# valid
c(win_type="boxcar", window=2, min_periods=1)
c(win_type="boxcar", window=2, min_periods=1, center=True)
c(win_type="boxcar", window=2, min_periods=1, center=False)
# not valid
for w in [2.0, "foo", np.array([2])]:
with pytest.raises(ValueError):
c(win_type="boxcar", window=2, min_periods=w)
with pytest.raises(ValueError):
c(win_type="boxcar", window=2, min_periods=1, center=w)
for wt in ["foobar", 1]:
with pytest.raises(ValueError):
c(win_type=wt, window=2)
@td.skip_if_no_scipy
@pytest.mark.parametrize("which", ["series", "frame"])
def test_constructor_with_win_type(self, which, win_types):
# GH 12669
o = getattr(self, which)
c = o.rolling
c(win_type=win_types, window=2)
@pytest.mark.parametrize("method", ["sum", "mean"])
def test_numpy_compat(self, method):
# see gh-12811
w = rwindow.Window(Series([2, 4, 6]), window=[0, 2])
msg = "numpy operations are not valid with window objects"
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(w, method)(1, 2, 3)
with pytest.raises(UnsupportedFunctionCall, match=msg):
getattr(w, method)(dtype=np.float64)
@td.skip_if_no_scipy
@pytest.mark.parametrize("arg", ["median", "var", "std", "kurt", "skew"])
def test_agg_function_support(self, arg):
df = pd.DataFrame({"A": np.arange(5)})
roll = df.rolling(2, win_type="triang")
msg = "'{arg}' is not a valid function for " "'Window' object".format(arg=arg)
with pytest.raises(AttributeError, match=msg):
roll.agg(arg)
with pytest.raises(AttributeError, match=msg):
roll.agg([arg])
with pytest.raises(AttributeError, match=msg):
roll.agg({"A": arg})