8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,289 @@
|
||||
from copy import deepcopy
|
||||
from distutils.version import LooseVersion
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal,
|
||||
assert_frame_equal,
|
||||
assert_series_equal,
|
||||
)
|
||||
|
||||
from .test_generic import Generic
|
||||
|
||||
try:
|
||||
import xarray
|
||||
|
||||
_XARRAY_INSTALLED = True
|
||||
except ImportError:
|
||||
_XARRAY_INSTALLED = False
|
||||
|
||||
|
||||
class TestDataFrame(Generic):
|
||||
_typ = DataFrame
|
||||
_comparator = lambda self, x, y: assert_frame_equal(x, y)
|
||||
|
||||
def test_rename_mi(self):
|
||||
df = DataFrame(
|
||||
[11, 21, 31],
|
||||
index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
|
||||
)
|
||||
df.rename(str.lower)
|
||||
|
||||
def test_set_axis_name(self):
|
||||
df = pd.DataFrame([[1, 2], [3, 4]])
|
||||
funcs = ["_set_axis_name", "rename_axis"]
|
||||
for func in funcs:
|
||||
result = methodcaller(func, "foo")(df)
|
||||
assert df.index.name is None
|
||||
assert result.index.name == "foo"
|
||||
|
||||
result = methodcaller(func, "cols", axis=1)(df)
|
||||
assert df.columns.name is None
|
||||
assert result.columns.name == "cols"
|
||||
|
||||
def test_set_axis_name_mi(self):
|
||||
df = DataFrame(
|
||||
np.empty((3, 3)),
|
||||
index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]),
|
||||
columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]),
|
||||
)
|
||||
|
||||
level_names = ["L1", "L2"]
|
||||
funcs = ["_set_axis_name", "rename_axis"]
|
||||
for func in funcs:
|
||||
result = methodcaller(func, level_names)(df)
|
||||
assert result.index.names == level_names
|
||||
assert result.columns.names == [None, None]
|
||||
|
||||
result = methodcaller(func, level_names, axis=1)(df)
|
||||
assert result.columns.names == ["L1", "L2"]
|
||||
assert result.index.names == [None, None]
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
|
||||
# allow single item via bool method
|
||||
df = DataFrame([[True]])
|
||||
assert df.bool()
|
||||
|
||||
df = DataFrame([[False]])
|
||||
assert not df.bool()
|
||||
|
||||
df = DataFrame([[False, False]])
|
||||
with pytest.raises(ValueError):
|
||||
df.bool()
|
||||
with pytest.raises(ValueError):
|
||||
bool(df)
|
||||
|
||||
def test_get_numeric_data_preserve_dtype(self):
|
||||
|
||||
# get the numeric data
|
||||
o = DataFrame({"A": [1, "2", 3.0]})
|
||||
result = o._get_numeric_data()
|
||||
expected = DataFrame(index=[0, 1, 2], dtype=object)
|
||||
self._compare(result, expected)
|
||||
|
||||
def test_metadata_propagation_indiv(self):
|
||||
|
||||
# groupby
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
|
||||
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
|
||||
"C": np.random.randn(8),
|
||||
"D": np.random.randn(8),
|
||||
}
|
||||
)
|
||||
result = df.groupby("A").sum()
|
||||
self.check_metadata(df, result)
|
||||
|
||||
# resample
|
||||
df = DataFrame(
|
||||
np.random.randn(1000, 2),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
)
|
||||
result = df.resample("1T")
|
||||
self.check_metadata(df, result)
|
||||
|
||||
# merging with override
|
||||
# GH 6923
|
||||
_metadata = DataFrame._metadata
|
||||
_finalize = DataFrame.__finalize__
|
||||
|
||||
np.random.seed(10)
|
||||
df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"])
|
||||
df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"])
|
||||
DataFrame._metadata = ["filename"]
|
||||
df1.filename = "fname1.csv"
|
||||
df2.filename = "fname2.csv"
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
|
||||
for name in self._metadata:
|
||||
if method == "merge":
|
||||
left, right = other.left, other.right
|
||||
value = getattr(left, name, "") + "|" + getattr(right, name, "")
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, ""))
|
||||
|
||||
return self
|
||||
|
||||
DataFrame.__finalize__ = finalize
|
||||
result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner")
|
||||
assert result.filename == "fname1.csv|fname2.csv"
|
||||
|
||||
# concat
|
||||
# GH 6927
|
||||
DataFrame._metadata = ["filename"]
|
||||
df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab"))
|
||||
df1.filename = "foo"
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "concat":
|
||||
value = "+".join(
|
||||
[getattr(o, name) for o in other.objs if getattr(o, name, None)]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, None))
|
||||
|
||||
return self
|
||||
|
||||
DataFrame.__finalize__ = finalize
|
||||
|
||||
result = pd.concat([df1, df1])
|
||||
assert result.filename == "foo+foo"
|
||||
|
||||
# reset
|
||||
DataFrame._metadata = _metadata
|
||||
DataFrame.__finalize__ = _finalize
|
||||
|
||||
def test_set_attribute(self):
|
||||
# Test for consistent setattr behavior when an attribute and a column
|
||||
# have the same name (Issue #8994)
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
df.y = 2
|
||||
df["y"] = [2, 4, 6]
|
||||
df.y = 5
|
||||
|
||||
assert df.y == 5
|
||||
assert_series_equal(df["y"], Series([2, 4, 6], name="y"))
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _XARRAY_INSTALLED
|
||||
or _XARRAY_INSTALLED
|
||||
and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"),
|
||||
reason="xarray >= 0.10.0 required",
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
"FloatIndex",
|
||||
"IntIndex",
|
||||
"StringIndex",
|
||||
"UnicodeIndex",
|
||||
"DateIndex",
|
||||
"PeriodIndex",
|
||||
"CategoricalIndex",
|
||||
"TimedeltaIndex",
|
||||
],
|
||||
)
|
||||
def test_to_xarray_index_types(self, index):
|
||||
from xarray import Dataset
|
||||
|
||||
index = getattr(tm, "make{}".format(index))
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": list("abc"),
|
||||
"b": list(range(1, 4)),
|
||||
"c": np.arange(3, 6).astype("u1"),
|
||||
"d": np.arange(4.0, 7.0, dtype="float64"),
|
||||
"e": [True, False, True],
|
||||
"f": pd.Categorical(list("abc")),
|
||||
"g": pd.date_range("20130101", periods=3),
|
||||
"h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
|
||||
}
|
||||
)
|
||||
|
||||
df.index = index(3)
|
||||
df.index.name = "foo"
|
||||
df.columns.name = "bar"
|
||||
result = df.to_xarray()
|
||||
assert result.dims["foo"] == 3
|
||||
assert len(result.coords) == 1
|
||||
assert len(result.data_vars) == 8
|
||||
assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
# idempotency
|
||||
# categoricals are not preserved
|
||||
# datetimes w/tz are not preserved
|
||||
# column names are lost
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(object)
|
||||
expected["h"] = expected["h"].astype("datetime64[ns]")
|
||||
expected.columns.name = None
|
||||
assert_frame_equal(
|
||||
result.to_dataframe(),
|
||||
expected,
|
||||
check_index_type=False,
|
||||
check_categorical=False,
|
||||
)
|
||||
|
||||
@td.skip_if_no("xarray", min_version="0.7.0")
|
||||
def test_to_xarray(self):
|
||||
from xarray import Dataset
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": list("abc"),
|
||||
"b": list(range(1, 4)),
|
||||
"c": np.arange(3, 6).astype("u1"),
|
||||
"d": np.arange(4.0, 7.0, dtype="float64"),
|
||||
"e": [True, False, True],
|
||||
"f": pd.Categorical(list("abc")),
|
||||
"g": pd.date_range("20130101", periods=3),
|
||||
"h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
|
||||
}
|
||||
)
|
||||
|
||||
df.index.name = "foo"
|
||||
result = df[0:0].to_xarray()
|
||||
assert result.dims["foo"] == 0
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
# available in 0.7.1
|
||||
# MultiIndex
|
||||
df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
|
||||
result = df.to_xarray()
|
||||
assert result.dims["one"] == 1
|
||||
assert result.dims["two"] == 3
|
||||
assert len(result.coords) == 2
|
||||
assert len(result.data_vars) == 8
|
||||
assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
result = result.to_dataframe()
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(object)
|
||||
expected["h"] = expected["h"].astype("datetime64[ns]")
|
||||
expected.columns.name = None
|
||||
assert_frame_equal(result, expected, check_index_type=False)
|
||||
|
||||
def test_deepcopy_empty(self):
|
||||
# This test covers empty frame copying with non-empty column sets
|
||||
# as reported in issue GH15370
|
||||
empty_frame = DataFrame(data=[], index=[], columns=["A"])
|
||||
empty_frame_copy = deepcopy(empty_frame)
|
||||
|
||||
self._compare(empty_frame_copy, empty_frame)
|
@@ -0,0 +1,950 @@
|
||||
from copy import copy, deepcopy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Generic types test cases
|
||||
|
||||
|
||||
class Generic:
|
||||
@property
|
||||
def _ndim(self):
|
||||
return self._typ._AXIS_LEN
|
||||
|
||||
def _axes(self):
|
||||
""" return the axes for my object typ """
|
||||
return self._typ._AXIS_ORDERS
|
||||
|
||||
def _construct(self, shape, value=None, dtype=None, **kwargs):
|
||||
""" construct an object for the given shape
|
||||
if value is specified use that if its a scalar
|
||||
if value is an array, repeat it as needed """
|
||||
|
||||
if isinstance(shape, int):
|
||||
shape = tuple([shape] * self._ndim)
|
||||
if value is not None:
|
||||
if is_scalar(value):
|
||||
if value == "empty":
|
||||
arr = None
|
||||
|
||||
# remove the info axis
|
||||
kwargs.pop(self._typ._info_axis_name, None)
|
||||
else:
|
||||
arr = np.empty(shape, dtype=dtype)
|
||||
arr.fill(value)
|
||||
else:
|
||||
fshape = np.prod(shape)
|
||||
arr = value.ravel()
|
||||
new_shape = fshape / arr.shape[0]
|
||||
if fshape % arr.shape[0] != 0:
|
||||
raise Exception("invalid value passed in _construct")
|
||||
|
||||
arr = np.repeat(arr, new_shape).reshape(shape)
|
||||
else:
|
||||
arr = np.random.randn(*shape)
|
||||
return self._typ(arr, dtype=dtype, **kwargs)
|
||||
|
||||
def _compare(self, result, expected):
|
||||
self._comparator(result, expected)
|
||||
|
||||
def test_rename(self):
|
||||
|
||||
# single axis
|
||||
idx = list("ABCD")
|
||||
# relabeling values passed into self.rename
|
||||
args = [
|
||||
str.lower,
|
||||
{x: x.lower() for x in idx},
|
||||
Series({x: x.lower() for x in idx}),
|
||||
]
|
||||
|
||||
for axis in self._axes():
|
||||
kwargs = {axis: idx}
|
||||
obj = self._construct(4, **kwargs)
|
||||
|
||||
for arg in args:
|
||||
# rename a single axis
|
||||
result = obj.rename(**{axis: arg})
|
||||
expected = obj.copy()
|
||||
setattr(expected, axis, list("abcd"))
|
||||
self._compare(result, expected)
|
||||
|
||||
# multiple axes at once
|
||||
|
||||
def test_get_numeric_data(self):
|
||||
|
||||
n = 4
|
||||
kwargs = {self._typ._AXIS_NAMES[i]: list(range(n)) for i in range(self._ndim)}
|
||||
|
||||
# get the numeric data
|
||||
o = self._construct(n, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
self._compare(result, o)
|
||||
|
||||
# non-inclusion
|
||||
result = o._get_bool_data()
|
||||
expected = self._construct(n, value="empty", **kwargs)
|
||||
self._compare(result, expected)
|
||||
|
||||
# get the bool data
|
||||
arr = np.array([True, True, False, True])
|
||||
o = self._construct(n, value=arr, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
self._compare(result, o)
|
||||
|
||||
# _get_numeric_data is includes _get_bool_data, so can't test for
|
||||
# non-inclusion
|
||||
|
||||
def test_get_default(self):
|
||||
|
||||
# GH 7725
|
||||
d0 = "a", "b", "c", "d"
|
||||
d1 = np.arange(4, dtype="int64")
|
||||
others = "e", 10
|
||||
|
||||
for data, index in ((d0, d1), (d1, d0)):
|
||||
s = Series(data, index=index)
|
||||
for i, d in zip(index, data):
|
||||
assert s.get(i) == d
|
||||
assert s.get(i, d) == d
|
||||
assert s.get(i, "z") == d
|
||||
for other in others:
|
||||
assert s.get(other, "z") == "z"
|
||||
assert s.get(other, other) == other
|
||||
|
||||
def test_nonzero(self):
|
||||
|
||||
# GH 4633
|
||||
# look at the boolean/nonzero behavior for objects
|
||||
obj = self._construct(shape=4)
|
||||
msg = "The truth value of a {} is ambiguous".format(self._typ.__name__)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = self._construct(shape=4, value=1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = self._construct(shape=4, value=np.nan)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# empty
|
||||
obj = self._construct(shape=0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# invalid behaviors
|
||||
|
||||
obj1 = self._construct(shape=4, value=1)
|
||||
obj2 = self._construct(shape=4, value=1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
if obj1:
|
||||
pass
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 and obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 or obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
not obj1
|
||||
|
||||
def test_downcast(self):
|
||||
# test close downcasting
|
||||
|
||||
o = self._construct(shape=4, value=9, dtype=np.int64)
|
||||
result = o.copy()
|
||||
result._data = o._data.downcast(dtypes="infer")
|
||||
self._compare(result, o)
|
||||
|
||||
o = self._construct(shape=4, value=9.0)
|
||||
expected = o.astype(np.int64)
|
||||
result = o.copy()
|
||||
result._data = o._data.downcast(dtypes="infer")
|
||||
self._compare(result, expected)
|
||||
|
||||
o = self._construct(shape=4, value=9.5)
|
||||
result = o.copy()
|
||||
result._data = o._data.downcast(dtypes="infer")
|
||||
self._compare(result, o)
|
||||
|
||||
# are close
|
||||
o = self._construct(shape=4, value=9.000000000005)
|
||||
result = o.copy()
|
||||
result._data = o._data.downcast(dtypes="infer")
|
||||
expected = o.astype(np.int64)
|
||||
self._compare(result, expected)
|
||||
|
||||
def test_constructor_compound_dtypes(self):
|
||||
# see gh-5191
|
||||
# Compound dtypes should raise NotImplementedError.
|
||||
|
||||
def f(dtype):
|
||||
return self._construct(shape=3, value=1, dtype=dtype)
|
||||
|
||||
msg = "compound dtypes are not implemented in the {} constructor".format(
|
||||
self._typ.__name__
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
|
||||
|
||||
# these work (though results may be unexpected)
|
||||
f("int64")
|
||||
f("float64")
|
||||
f("M8[ns]")
|
||||
|
||||
def check_metadata(self, x, y=None):
|
||||
for m in x._metadata:
|
||||
v = getattr(x, m, None)
|
||||
if y is None:
|
||||
assert v is None
|
||||
else:
|
||||
assert v == getattr(y, m, None)
|
||||
|
||||
def test_metadata_propagation(self):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
o = self._construct(shape=3)
|
||||
o.name = "foo"
|
||||
o2 = self._construct(shape=3)
|
||||
o2.name = "bar"
|
||||
|
||||
# ----------
|
||||
# preserving
|
||||
# ----------
|
||||
|
||||
# simple ops with scalars
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(1)
|
||||
self.check_metadata(o, result)
|
||||
|
||||
# ops with like
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(o)
|
||||
self.check_metadata(o, result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
v1 = getattr(o, op)(o)
|
||||
self.check_metadata(o, v1)
|
||||
self.check_metadata(o, v1 & v1)
|
||||
self.check_metadata(o, v1 | v1)
|
||||
|
||||
# combine_first
|
||||
result = o.combine_first(o2)
|
||||
self.check_metadata(o, result)
|
||||
|
||||
# ---------------------------
|
||||
# non-preserving (by default)
|
||||
# ---------------------------
|
||||
|
||||
# add non-like
|
||||
result = o + o2
|
||||
self.check_metadata(result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
|
||||
# this is a name matching op
|
||||
v1 = getattr(o, op)(o)
|
||||
v2 = getattr(o, op)(o2)
|
||||
self.check_metadata(v2)
|
||||
self.check_metadata(v1 & v2)
|
||||
self.check_metadata(v1 | v2)
|
||||
|
||||
def test_head_tail(self):
|
||||
# GH5370
|
||||
|
||||
o = self._construct(shape=10)
|
||||
|
||||
# check all index types
|
||||
for index in [
|
||||
tm.makeFloatIndex,
|
||||
tm.makeIntIndex,
|
||||
tm.makeStringIndex,
|
||||
tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex,
|
||||
tm.makePeriodIndex,
|
||||
]:
|
||||
axis = o._get_axis_name(0)
|
||||
setattr(o, axis, index(len(getattr(o, axis))))
|
||||
|
||||
o.head()
|
||||
|
||||
self._compare(o.head(), o.iloc[:5])
|
||||
self._compare(o.tail(), o.iloc[-5:])
|
||||
|
||||
# 0-len
|
||||
self._compare(o.head(0), o.iloc[0:0])
|
||||
self._compare(o.tail(0), o.iloc[0:0])
|
||||
|
||||
# bounded
|
||||
self._compare(o.head(len(o) + 1), o)
|
||||
self._compare(o.tail(len(o) + 1), o)
|
||||
|
||||
# neg index
|
||||
self._compare(o.head(-3), o.head(7))
|
||||
self._compare(o.tail(-3), o.tail(7))
|
||||
|
||||
def test_sample(self):
|
||||
# Fixes issue: 2419
|
||||
|
||||
o = self._construct(shape=10)
|
||||
|
||||
###
|
||||
# Check behavior of random_state argument
|
||||
###
|
||||
|
||||
# Check for stability when receives seed or random state -- run 10
|
||||
# times.
|
||||
for test in range(10):
|
||||
seed = np.random.randint(0, 100)
|
||||
self._compare(
|
||||
o.sample(n=4, random_state=seed), o.sample(n=4, random_state=seed)
|
||||
)
|
||||
self._compare(
|
||||
o.sample(frac=0.7, random_state=seed),
|
||||
o.sample(frac=0.7, random_state=seed),
|
||||
)
|
||||
|
||||
self._compare(
|
||||
o.sample(n=4, random_state=np.random.RandomState(test)),
|
||||
o.sample(n=4, random_state=np.random.RandomState(test)),
|
||||
)
|
||||
|
||||
self._compare(
|
||||
o.sample(frac=0.7, random_state=np.random.RandomState(test)),
|
||||
o.sample(frac=0.7, random_state=np.random.RandomState(test)),
|
||||
)
|
||||
|
||||
os1, os2 = [], []
|
||||
for _ in range(2):
|
||||
np.random.seed(test)
|
||||
os1.append(o.sample(n=4))
|
||||
os2.append(o.sample(frac=0.7))
|
||||
self._compare(*os1)
|
||||
self._compare(*os2)
|
||||
|
||||
# Check for error when random_state argument invalid.
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(random_state="astring!")
|
||||
|
||||
###
|
||||
# Check behavior of `frac` and `N`
|
||||
###
|
||||
|
||||
# Giving both frac and N throws error
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=3, frac=0.3)
|
||||
|
||||
# Check that raises right error for negative lengths
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=-3)
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(frac=-0.3)
|
||||
|
||||
# Make sure float values of `n` give error
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=3.2)
|
||||
|
||||
# Check lengths are right
|
||||
assert len(o.sample(n=4) == 4)
|
||||
assert len(o.sample(frac=0.34) == 3)
|
||||
assert len(o.sample(frac=0.36) == 4)
|
||||
|
||||
###
|
||||
# Check weights
|
||||
###
|
||||
|
||||
# Weight length must be right
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=3, weights=[0, 1])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad_weights = [0.5] * 11
|
||||
o.sample(n=3, weights=bad_weights)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bad_weight_series = Series([0, 0, 0.2])
|
||||
o.sample(n=4, weights=bad_weight_series)
|
||||
|
||||
# Check won't accept negative weights
|
||||
with pytest.raises(ValueError):
|
||||
bad_weights = [-0.1] * 10
|
||||
o.sample(n=3, weights=bad_weights)
|
||||
|
||||
# Check inf and -inf throw errors:
|
||||
with pytest.raises(ValueError):
|
||||
weights_with_inf = [0.1] * 10
|
||||
weights_with_inf[0] = np.inf
|
||||
o.sample(n=3, weights=weights_with_inf)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
weights_with_ninf = [0.1] * 10
|
||||
weights_with_ninf[0] = -np.inf
|
||||
o.sample(n=3, weights=weights_with_ninf)
|
||||
|
||||
# All zeros raises errors
|
||||
zero_weights = [0] * 10
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=3, weights=zero_weights)
|
||||
|
||||
# All missing weights
|
||||
nan_weights = [np.nan] * 10
|
||||
with pytest.raises(ValueError):
|
||||
o.sample(n=3, weights=nan_weights)
|
||||
|
||||
# Check np.nan are replaced by zeros.
|
||||
weights_with_nan = [np.nan] * 10
|
||||
weights_with_nan[5] = 0.5
|
||||
self._compare(o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6])
|
||||
|
||||
# Check None are also replaced by zeros.
|
||||
weights_with_None = [None] * 10
|
||||
weights_with_None[5] = 0.5
|
||||
self._compare(o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6])
|
||||
|
||||
def test_size_compat(self):
|
||||
# GH8846
|
||||
# size property should be defined
|
||||
|
||||
o = self._construct(shape=10)
|
||||
assert o.size == np.prod(o.shape)
|
||||
assert o.size == 10 ** len(o.axes)
|
||||
|
||||
def test_split_compat(self):
|
||||
# xref GH8846
|
||||
o = self._construct(shape=10)
|
||||
assert len(np.array_split(o, 5)) == 5
|
||||
assert len(np.array_split(o, 2)) == 2
|
||||
|
||||
def test_unexpected_keyword(self): # GH8597
|
||||
df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"])
|
||||
ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
|
||||
ts = df["joe"].copy()
|
||||
ts[2] = np.nan
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected keyword"):
|
||||
df.drop("joe", axis=1, in_place=True)
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected keyword"):
|
||||
df.reindex([1, 0], inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected keyword"):
|
||||
ca.fillna(0, inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected keyword"):
|
||||
ts.fillna(0, in_place=True)
|
||||
|
||||
# See gh-12301
|
||||
def test_stat_unexpected_keyword(self):
|
||||
obj = self._construct(5)
|
||||
starwars = "Star Wars"
|
||||
errmsg = "unexpected keyword"
|
||||
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.max(epic=starwars) # stat_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.var(epic=starwars) # stat_function_ddof
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.sum(epic=starwars) # cum_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.any(epic=starwars) # logical_function
|
||||
|
||||
def test_api_compat(self):
|
||||
|
||||
# GH 12021
|
||||
# compat for __name__, __qualname__
|
||||
|
||||
obj = self._construct(5)
|
||||
for func in ["sum", "cumsum", "any", "var"]:
|
||||
f = getattr(obj, func)
|
||||
assert f.__name__ == func
|
||||
assert f.__qualname__.endswith(func)
|
||||
|
||||
def test_stat_non_defaults_args(self):
|
||||
obj = self._construct(5)
|
||||
out = np.array([0])
|
||||
errmsg = "the 'out' parameter is not supported"
|
||||
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.max(out=out) # stat_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.var(out=out) # stat_function_ddof
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.sum(out=out) # cum_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.any(out=out) # logical_function
|
||||
|
||||
def test_truncate_out_of_bounds(self):
|
||||
# GH11382
|
||||
|
||||
# small
|
||||
shape = [int(2e3)] + ([1] * (self._ndim - 1))
|
||||
small = self._construct(shape, dtype="int8", value=1)
|
||||
self._compare(small.truncate(), small)
|
||||
self._compare(small.truncate(before=0, after=3e3), small)
|
||||
self._compare(small.truncate(before=-1, after=2e3), small)
|
||||
|
||||
# big
|
||||
shape = [int(2e6)] + ([1] * (self._ndim - 1))
|
||||
big = self._construct(shape, dtype="int8", value=1)
|
||||
self._compare(big.truncate(), big)
|
||||
self._compare(big.truncate(before=0, after=3e6), big)
|
||||
self._compare(big.truncate(before=-1, after=2e6), big)
|
||||
|
||||
def test_validate_bool_args(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
invalid_values = [1, "True", [1, 2, 3], 5.0]
|
||||
|
||||
for value in invalid_values:
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).rename_axis(
|
||||
mapper={"a": "x", "b": "y"}, axis=1, inplace=value
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).drop("a", axis=1, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).sort_index(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df)._consolidate(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).fillna(value=0, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).replace(to_replace=1, value=7, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).interpolate(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df)._where(cond=df.a > 2, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
super(DataFrame, df).mask(cond=df.a > 2, inplace=value)
|
||||
|
||||
def test_copy_and_deepcopy(self):
|
||||
# GH 15444
|
||||
for shape in [0, 1, 2]:
|
||||
obj = self._construct(shape)
|
||||
for func in [
|
||||
copy,
|
||||
deepcopy,
|
||||
lambda x: x.copy(deep=False),
|
||||
lambda x: x.copy(deep=True),
|
||||
]:
|
||||
obj_copy = func(obj)
|
||||
assert obj_copy is not obj
|
||||
self._compare(obj_copy, obj)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods,fill_method,limit,exp",
|
||||
[
|
||||
(1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]),
|
||||
(1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]),
|
||||
(1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]),
|
||||
(1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]),
|
||||
(-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]),
|
||||
(-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]),
|
||||
(-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
|
||||
(-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_pct_change(self, periods, fill_method, limit, exp):
|
||||
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
|
||||
obj = self._typ(vals)
|
||||
func = getattr(obj, "pct_change")
|
||||
res = func(periods=periods, fill_method=fill_method, limit=limit)
|
||||
if type(obj) is DataFrame:
|
||||
tm.assert_frame_equal(res, DataFrame(exp))
|
||||
else:
|
||||
tm.assert_series_equal(res, Series(exp))
|
||||
|
||||
|
||||
class TestNDFrame:
|
||||
# tests that don't fit elsewhere
|
||||
|
||||
def test_sample(sel):
|
||||
# Fixes issue: 2419
|
||||
# additional specific object based tests
|
||||
|
||||
# A few dataframe test with degenerate weights.
|
||||
easy_weight_list = [0] * 10
|
||||
easy_weight_list[5] = 1
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"col1": range(10, 20),
|
||||
"col2": range(20, 30),
|
||||
"colString": ["a"] * 10,
|
||||
"easyweights": easy_weight_list,
|
||||
}
|
||||
)
|
||||
sample1 = df.sample(n=1, weights="easyweights")
|
||||
assert_frame_equal(sample1, df.iloc[5:6])
|
||||
|
||||
# Ensure proper error if string given as weight for Series or
|
||||
# DataFrame with axis = 1.
|
||||
s = Series(range(10))
|
||||
with pytest.raises(ValueError):
|
||||
s.sample(n=3, weights="weight_column")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.sample(n=1, weights="weight_column", axis=1)
|
||||
|
||||
# Check weighting key error
|
||||
with pytest.raises(
|
||||
KeyError, match="'String passed to weights not a valid column'"
|
||||
):
|
||||
df.sample(n=3, weights="not_a_real_column_name")
|
||||
|
||||
# Check that re-normalizes weights that don't sum to one.
|
||||
weights_less_than_1 = [0] * 10
|
||||
weights_less_than_1[0] = 0.5
|
||||
tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1])
|
||||
|
||||
###
|
||||
# Test axis argument
|
||||
###
|
||||
|
||||
# Test axis argument
|
||||
df = pd.DataFrame({"col1": range(10), "col2": ["a"] * 10})
|
||||
second_column_weight = [0, 1]
|
||||
assert_frame_equal(
|
||||
df.sample(n=1, axis=1, weights=second_column_weight), df[["col2"]]
|
||||
)
|
||||
|
||||
# Different axis arg types
|
||||
assert_frame_equal(
|
||||
df.sample(n=1, axis="columns", weights=second_column_weight), df[["col2"]]
|
||||
)
|
||||
|
||||
weight = [0] * 10
|
||||
weight[5] = 0.5
|
||||
assert_frame_equal(df.sample(n=1, axis="rows", weights=weight), df.iloc[5:6])
|
||||
assert_frame_equal(df.sample(n=1, axis="index", weights=weight), df.iloc[5:6])
|
||||
|
||||
# Check out of range axis values
|
||||
with pytest.raises(ValueError):
|
||||
df.sample(n=1, axis=2)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.sample(n=1, axis="not_a_name")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
s = pd.Series(range(10))
|
||||
s.sample(n=1, axis=1)
|
||||
|
||||
# Test weight length compared to correct axis
|
||||
with pytest.raises(ValueError):
|
||||
df.sample(n=1, axis=1, weights=[0.5] * 10)
|
||||
|
||||
# Check weights with axis = 1
|
||||
easy_weight_list = [0] * 3
|
||||
easy_weight_list[2] = 1
|
||||
|
||||
df = pd.DataFrame(
|
||||
{"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10}
|
||||
)
|
||||
sample1 = df.sample(n=1, axis=1, weights=easy_weight_list)
|
||||
assert_frame_equal(sample1, df[["colString"]])
|
||||
|
||||
# Test default axes
|
||||
assert_frame_equal(
|
||||
df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42)
|
||||
)
|
||||
|
||||
# Test that function aligns weights with frame
|
||||
df = DataFrame({"col1": [5, 6, 7], "col2": ["a", "b", "c"]}, index=[9, 5, 3])
|
||||
s = Series([1, 0, 0], index=[3, 5, 9])
|
||||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s))
|
||||
|
||||
# Weights have index values to be dropped because not in
|
||||
# sampled DataFrame
|
||||
s2 = Series([0.001, 0, 10000], index=[3, 5, 10])
|
||||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2))
|
||||
|
||||
# Weights have empty values to be filed with zeros
|
||||
s3 = Series([0.01, 0], index=[3, 5])
|
||||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3))
|
||||
|
||||
# No overlap in weight and sampled DataFrame indices
|
||||
s4 = Series([1, 0], index=[1, 2])
|
||||
with pytest.raises(ValueError):
|
||||
df.sample(1, weights=s4)
|
||||
|
||||
def test_squeeze(self):
|
||||
# noop
|
||||
for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]:
|
||||
tm.assert_series_equal(s.squeeze(), s)
|
||||
for df in [tm.makeTimeDataFrame()]:
|
||||
tm.assert_frame_equal(df.squeeze(), df)
|
||||
|
||||
# squeezing
|
||||
df = tm.makeTimeDataFrame().reindex(columns=["A"])
|
||||
tm.assert_series_equal(df.squeeze(), df["A"])
|
||||
|
||||
# don't fail with 0 length dimensions GH11229 & GH8999
|
||||
empty_series = Series([], name="five")
|
||||
empty_frame = DataFrame([empty_series])
|
||||
|
||||
[
|
||||
tm.assert_series_equal(empty_series, higher_dim.squeeze())
|
||||
for higher_dim in [empty_series, empty_frame]
|
||||
]
|
||||
|
||||
# axis argument
|
||||
df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
|
||||
assert df.shape == (1, 1)
|
||||
tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
|
||||
tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
|
||||
assert df.squeeze() == df.iloc[0, 0]
|
||||
msg = "No axis named 2 for object type <class 'pandas.core.frame.DataFrame'>"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis=2)
|
||||
msg = "No axis named x for object type <class 'pandas.core.frame.DataFrame'>"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis="x")
|
||||
|
||||
df = tm.makeTimeDataFrame(3)
|
||||
tm.assert_frame_equal(df.squeeze(axis=0), df)
|
||||
|
||||
def test_numpy_squeeze(self):
|
||||
s = tm.makeFloatSeries()
|
||||
tm.assert_series_equal(np.squeeze(s), s)
|
||||
|
||||
df = tm.makeTimeDataFrame().reindex(columns=["A"])
|
||||
tm.assert_series_equal(np.squeeze(df), df["A"])
|
||||
|
||||
def test_transpose(self):
|
||||
for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]:
|
||||
# calls implementation in pandas/core/base.py
|
||||
tm.assert_series_equal(s.transpose(), s)
|
||||
for df in [tm.makeTimeDataFrame()]:
|
||||
tm.assert_frame_equal(df.transpose().transpose(), df)
|
||||
|
||||
def test_numpy_transpose(self):
|
||||
msg = "the 'axes' parameter is not supported"
|
||||
|
||||
s = tm.makeFloatSeries()
|
||||
tm.assert_series_equal(np.transpose(s), s)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.transpose(s, axes=1)
|
||||
|
||||
df = tm.makeTimeDataFrame()
|
||||
tm.assert_frame_equal(np.transpose(np.transpose(df)), df)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.transpose(df, axes=1)
|
||||
|
||||
def test_take(self):
|
||||
indices = [1, 5, -2, 6, 3, -1]
|
||||
for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]:
|
||||
out = s.take(indices)
|
||||
expected = Series(
|
||||
data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype
|
||||
)
|
||||
tm.assert_series_equal(out, expected)
|
||||
for df in [tm.makeTimeDataFrame()]:
|
||||
out = df.take(indices)
|
||||
expected = DataFrame(
|
||||
data=df.values.take(indices, axis=0),
|
||||
index=df.index.take(indices),
|
||||
columns=df.columns,
|
||||
)
|
||||
tm.assert_frame_equal(out, expected)
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
indices = [-3, 2, 0, 1]
|
||||
s = tm.makeFloatSeries()
|
||||
df = tm.makeTimeDataFrame()
|
||||
|
||||
for obj in (s, df):
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, mode="clip")
|
||||
|
||||
def test_equals(self):
|
||||
s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s1[1] = 99
|
||||
assert not s1.equals(s2)
|
||||
|
||||
# NaNs compare as equal
|
||||
s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3])
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s2[0] = 9.9
|
||||
assert not s1.equals(s2)
|
||||
|
||||
idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
|
||||
s1 = Series([1, 2, np.nan], index=idx)
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
# Add object dtype column with nans
|
||||
index = np.random.random(10)
|
||||
df1 = DataFrame(np.random.random(10), index=index, columns=["floats"])
|
||||
df1["text"] = "the sky is so blue. we could use more chocolate.".split()
|
||||
df1["start"] = date_range("2000-1-1", periods=10, freq="T")
|
||||
df1["end"] = date_range("2000-1-1", periods=10, freq="D")
|
||||
df1["diff"] = df1["end"] - df1["start"]
|
||||
df1["bool"] = np.arange(10) % 3 == 0
|
||||
df1.loc[::2] = np.nan
|
||||
df2 = df1.copy()
|
||||
assert df1["text"].equals(df2["text"])
|
||||
assert df1["start"].equals(df2["start"])
|
||||
assert df1["end"].equals(df2["end"])
|
||||
assert df1["diff"].equals(df2["diff"])
|
||||
assert df1["bool"].equals(df2["bool"])
|
||||
assert df1.equals(df2)
|
||||
assert not df1.equals(object)
|
||||
|
||||
# different dtype
|
||||
different = df1.copy()
|
||||
different["floats"] = different["floats"].astype("float32")
|
||||
assert not df1.equals(different)
|
||||
|
||||
# different index
|
||||
different_index = -index
|
||||
different = df2.set_index(different_index)
|
||||
assert not df1.equals(different)
|
||||
|
||||
# different columns
|
||||
different = df2.copy()
|
||||
different.columns = df2.columns[::-1]
|
||||
assert not df1.equals(different)
|
||||
|
||||
# DatetimeIndex
|
||||
index = pd.date_range("2000-1-1", periods=10, freq="T")
|
||||
df1 = df1.set_index(index)
|
||||
df2 = df1.copy()
|
||||
assert df1.equals(df2)
|
||||
|
||||
# MultiIndex
|
||||
df3 = df1.set_index(["text"], append=True)
|
||||
df2 = df1.set_index(["text"], append=True)
|
||||
assert df3.equals(df2)
|
||||
|
||||
df2 = df1.set_index(["floats"], append=True)
|
||||
assert not df3.equals(df2)
|
||||
|
||||
# NaN in index
|
||||
df3 = df1.set_index(["floats"], append=True)
|
||||
df2 = df1.set_index(["floats"], append=True)
|
||||
assert df3.equals(df2)
|
||||
|
||||
# GH 8437
|
||||
a = pd.Series([False, np.nan])
|
||||
b = pd.Series([False, np.nan])
|
||||
c = pd.Series(index=range(2))
|
||||
d = pd.Series(index=range(2))
|
||||
e = pd.Series(index=range(2))
|
||||
f = pd.Series(index=range(2))
|
||||
c[:-1] = d[:-1] = e[0] = f[0] = False
|
||||
assert a.equals(a)
|
||||
assert a.equals(b)
|
||||
assert a.equals(c)
|
||||
assert a.equals(d)
|
||||
assert a.equals(e)
|
||||
assert e.equals(f)
|
||||
|
||||
def test_pipe(self):
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
f = lambda x, y: x ** y
|
||||
result = df.pipe(f, 2)
|
||||
expected = DataFrame({"A": [1, 4, 9]})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df.A.pipe(f, 2)
|
||||
assert_series_equal(result, expected.A)
|
||||
|
||||
def test_pipe_tuple(self):
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
f = lambda x, y: y
|
||||
result = df.pipe((f, "y"), 0)
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.A.pipe((f, "y"), 0)
|
||||
assert_series_equal(result, df.A)
|
||||
|
||||
def test_pipe_tuple_error(self):
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
f = lambda x, y: y
|
||||
with pytest.raises(ValueError):
|
||||
df.pipe((f, "y"), x=1, y=0)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.A.pipe((f, "y"), x=1, y=0)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
|
||||
def test_axis_classmethods(self, box):
|
||||
obj = box()
|
||||
values = (
|
||||
list(box._AXIS_NAMES.keys())
|
||||
+ list(box._AXIS_NUMBERS.keys())
|
||||
+ list(box._AXIS_ALIASES.keys())
|
||||
)
|
||||
for v in values:
|
||||
assert obj._get_axis_number(v) == box._get_axis_number(v)
|
||||
assert obj._get_axis_name(v) == box._get_axis_name(v)
|
||||
assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
|
||||
|
||||
def test_deprecated_to_dense(self):
|
||||
# GH 26557: DEPR
|
||||
# Deprecated 0.25.0
|
||||
|
||||
df = pd.DataFrame({"A": [1, 2, 3]})
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.to_dense()
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.to_dense()
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_deprecated_get_dtype_counts(self):
|
||||
# GH 18262
|
||||
df = DataFrame([1])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.get_dtype_counts()
|
@@ -0,0 +1,339 @@
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.missing import array_equivalent
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# Fixtures
|
||||
# ========
|
||||
@pytest.fixture
|
||||
def df():
|
||||
"""DataFrame with columns 'L1', 'L2', and 'L3' """
|
||||
return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]})
|
||||
|
||||
|
||||
@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]])
|
||||
def df_levels(request, df):
|
||||
"""DataFrame with columns or index levels 'L1', 'L2', and 'L3' """
|
||||
levels = request.param
|
||||
|
||||
if levels:
|
||||
df = df.set_index(levels)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_ambig(df):
|
||||
"""DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3' """
|
||||
df = df.set_index(["L1", "L2"])
|
||||
|
||||
df["L1"] = df["L3"]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_duplabels(df):
|
||||
"""DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2' """
|
||||
df = df.set_index(["L1"])
|
||||
df = pd.concat([df, df["L2"]], axis=1)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# Test is label/level reference
|
||||
# =============================
|
||||
def get_labels_levels(df_levels):
|
||||
expected_labels = list(df_levels.columns)
|
||||
expected_levels = [name for name in df_levels.index.names if name is not None]
|
||||
return expected_labels, expected_levels
|
||||
|
||||
|
||||
def assert_label_reference(frame, labels, axis):
|
||||
for label in labels:
|
||||
assert frame._is_label_reference(label, axis=axis)
|
||||
assert not frame._is_level_reference(label, axis=axis)
|
||||
assert frame._is_label_or_level_reference(label, axis=axis)
|
||||
|
||||
|
||||
def assert_level_reference(frame, levels, axis):
|
||||
for level in levels:
|
||||
assert frame._is_level_reference(level, axis=axis)
|
||||
assert not frame._is_label_reference(level, axis=axis)
|
||||
assert frame._is_label_or_level_reference(level, axis=axis)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_is_level_or_label_reference_df_simple(df_levels, axis):
|
||||
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_level_reference(df_levels, expected_levels, axis=axis)
|
||||
assert_label_reference(df_levels, expected_labels, axis=axis)
|
||||
|
||||
|
||||
def test_is_level_reference_df_ambig(df_ambig, axis):
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has both an on-axis level and off-axis label named L1
|
||||
# Therefore L1 should reference the label, not the level
|
||||
assert_label_reference(df_ambig, ["L1"], axis=axis)
|
||||
|
||||
# df has an on-axis level named L2 and it is not ambiguous
|
||||
# Therefore L2 is an level reference
|
||||
assert_level_reference(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has a column named L3 and it not an level reference
|
||||
assert_label_reference(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_is_level_reference_series_simple_axis0(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_reference(s, ["L1"], axis=0)
|
||||
assert not s._is_level_reference("L2")
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_reference(s, ["L1", "L2"], axis=0)
|
||||
assert not s._is_level_reference("L3")
|
||||
|
||||
|
||||
def test_is_level_reference_series_axis1_error(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._is_level_reference("L1", axis=1)
|
||||
|
||||
|
||||
# Test _check_label_or_level_ambiguity_df
|
||||
# =======================================
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_check_label_or_level_ambiguity_df(df_ambig, axis):
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
if axis in {0, "index"}:
|
||||
msg = "'L1' is both an index level and a column label"
|
||||
else:
|
||||
msg = "'L1' is both a column level and an index label"
|
||||
|
||||
# df_ambig has both an on-axis level and off-axis label named L1
|
||||
# Therefore, L1 is ambiguous.
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df_ambig._check_label_or_level_ambiguity("L1", axis=axis)
|
||||
|
||||
# df_ambig has an on-axis level named L2,, and it is not ambiguous.
|
||||
df_ambig._check_label_or_level_ambiguity("L2", axis=axis)
|
||||
|
||||
# df_ambig has an off-axis label named L3, and it is not ambiguous
|
||||
assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_check_label_or_level_ambiguity_series(df):
|
||||
|
||||
# A series has no columns and therefore references are never ambiguous
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
s._check_label_or_level_ambiguity("L3", axis=0)
|
||||
|
||||
|
||||
def test_check_label_or_level_ambiguity_series_axis1_error(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._check_label_or_level_ambiguity("L1", axis=1)
|
||||
|
||||
|
||||
# Test _get_label_or_level_values
|
||||
# ===============================
|
||||
def assert_label_values(frame, labels, axis):
|
||||
for label in labels:
|
||||
if axis in {0, "index"}:
|
||||
expected = frame[label]._values
|
||||
else:
|
||||
expected = frame.loc[label]._values
|
||||
|
||||
result = frame._get_label_or_level_values(label, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
def assert_level_values(frame, levels, axis):
|
||||
for level in levels:
|
||||
if axis in {0, "index"}:
|
||||
expected = frame.index.get_level_values(level=level)._values
|
||||
else:
|
||||
expected = frame.columns.get_level_values(level=level)._values
|
||||
|
||||
result = frame._get_label_or_level_values(level, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_get_label_or_level_values_df_simple(df_levels, axis):
|
||||
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_label_values(df_levels, expected_labels, axis=axis)
|
||||
assert_level_values(df_levels, expected_levels, axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_ambig(df_ambig, axis):
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has an on-axis level named L2, and it is not ambiguous.
|
||||
assert_level_values(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has an off-axis label named L3, and it is not ambiguous.
|
||||
assert_label_values(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_duplabels(df_duplabels, axis):
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_duplabels = df_duplabels.T
|
||||
|
||||
# df has unambiguous level 'L1'
|
||||
assert_level_values(df_duplabels, ["L1"], axis=axis)
|
||||
|
||||
# df has unique label 'L3'
|
||||
assert_label_values(df_duplabels, ["L3"], axis=axis)
|
||||
|
||||
# df has duplicate labels 'L2'
|
||||
if axis in {0, "index"}:
|
||||
expected_msg = "The column label 'L2' is not unique"
|
||||
else:
|
||||
expected_msg = "The index label 'L2' is not unique"
|
||||
|
||||
with pytest.raises(ValueError, match=expected_msg):
|
||||
assert_label_values(df_duplabels, ["L2"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_get_label_or_level_values_series_axis0(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_values(s, ["L1"], axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_values(s, ["L1", "L2"], axis=0)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_series_axis1_error(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._get_label_or_level_values("L1", axis=1)
|
||||
|
||||
|
||||
# Test _drop_labels_or_levels
|
||||
# ===========================
|
||||
def assert_labels_dropped(frame, labels, axis):
|
||||
for label in labels:
|
||||
df_dropped = frame._drop_labels_or_levels(label, axis=axis)
|
||||
|
||||
if axis in {0, "index"}:
|
||||
assert label in frame.columns
|
||||
assert label not in df_dropped.columns
|
||||
else:
|
||||
assert label in frame.index
|
||||
assert label not in df_dropped.index
|
||||
|
||||
|
||||
def assert_levels_dropped(frame, levels, axis):
|
||||
for level in levels:
|
||||
df_dropped = frame._drop_labels_or_levels(level, axis=axis)
|
||||
|
||||
if axis in {0, "index"}:
|
||||
assert level in frame.index.names
|
||||
assert level not in df_dropped.index.names
|
||||
else:
|
||||
assert level in frame.columns.names
|
||||
assert level not in df_dropped.columns.names
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_drop_labels_or_levels_df(df_levels, axis):
|
||||
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis in {1, "columns"}:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_labels_dropped(df_levels, expected_labels, axis=axis)
|
||||
assert_levels_dropped(df_levels, expected_levels, axis=axis)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
df_levels._drop_labels_or_levels("L4", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_drop_labels_or_levels_series(df):
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_levels_dropped(s, ["L1"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_levels_dropped(s, ["L1", "L2"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
@@ -0,0 +1,269 @@
|
||||
from distutils.version import LooseVersion
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex, Series, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
from .test_generic import Generic
|
||||
|
||||
try:
|
||||
import xarray
|
||||
|
||||
_XARRAY_INSTALLED = True
|
||||
except ImportError:
|
||||
_XARRAY_INSTALLED = False
|
||||
|
||||
|
||||
class TestSeries(Generic):
|
||||
_typ = Series
|
||||
_comparator = lambda self, x, y: assert_series_equal(x, y)
|
||||
|
||||
def setup_method(self):
|
||||
self.ts = tm.makeTimeSeries() # Was at top level in test_series
|
||||
self.ts.name = "ts"
|
||||
|
||||
self.series = tm.makeStringSeries()
|
||||
self.series.name = "series"
|
||||
|
||||
def test_rename_mi(self):
|
||||
s = Series(
|
||||
[11, 21, 31],
|
||||
index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
|
||||
)
|
||||
s.rename(str.lower)
|
||||
|
||||
def test_set_axis_name(self):
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
funcs = ["rename_axis", "_set_axis_name"]
|
||||
name = "foo"
|
||||
for func in funcs:
|
||||
result = methodcaller(func, name)(s)
|
||||
assert s.index.name is None
|
||||
assert result.index.name == name
|
||||
|
||||
def test_set_axis_name_mi(self):
|
||||
s = Series(
|
||||
[11, 21, 31],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"]
|
||||
),
|
||||
)
|
||||
funcs = ["rename_axis", "_set_axis_name"]
|
||||
for func in funcs:
|
||||
result = methodcaller(func, ["L1", "L2"])(s)
|
||||
assert s.index.name is None
|
||||
assert s.index.names == ["l1", "l2"]
|
||||
assert result.index.name is None
|
||||
assert result.index.names, ["L1", "L2"]
|
||||
|
||||
def test_set_axis_name_raises(self):
|
||||
s = pd.Series([1])
|
||||
with pytest.raises(ValueError):
|
||||
s._set_axis_name(name="a", axis=1)
|
||||
|
||||
def test_get_numeric_data_preserve_dtype(self):
|
||||
|
||||
# get the numeric data
|
||||
o = Series([1, 2, 3])
|
||||
result = o._get_numeric_data()
|
||||
self._compare(result, o)
|
||||
|
||||
o = Series([1, "2", 3.0])
|
||||
result = o._get_numeric_data()
|
||||
expected = Series([], dtype=object, index=pd.Index([], dtype=object))
|
||||
self._compare(result, expected)
|
||||
|
||||
o = Series([True, False, True])
|
||||
result = o._get_numeric_data()
|
||||
self._compare(result, o)
|
||||
|
||||
o = Series([True, False, True])
|
||||
result = o._get_bool_data()
|
||||
self._compare(result, o)
|
||||
|
||||
o = Series(date_range("20130101", periods=3))
|
||||
result = o._get_numeric_data()
|
||||
expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object))
|
||||
self._compare(result, expected)
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
|
||||
# allow single item via bool method
|
||||
s = Series([True])
|
||||
assert s.bool()
|
||||
|
||||
s = Series([False])
|
||||
assert not s.bool()
|
||||
|
||||
msg = "The truth value of a Series is ambiguous"
|
||||
# single item nan to raise
|
||||
for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), Series([False])]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(s)
|
||||
|
||||
msg = "bool cannot act on a non-boolean single element Series"
|
||||
for s in [Series([np.nan]), Series([pd.NaT])]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.bool()
|
||||
|
||||
# multiple bool are still an error
|
||||
msg = "The truth value of a Series is ambiguous"
|
||||
for s in [Series([True, True]), Series([False, False])]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(s)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.bool()
|
||||
|
||||
# single non-bool are an error
|
||||
for s in [Series([1]), Series([0]), Series(["a"]), Series([0.0])]:
|
||||
msg = "The truth value of a Series is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(s)
|
||||
msg = "bool cannot act on a non-boolean single element Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.bool()
|
||||
|
||||
def test_metadata_propagation_indiv(self):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
o = Series(range(3), range(3))
|
||||
o.name = "foo"
|
||||
o2 = Series(range(3), range(3))
|
||||
o2.name = "bar"
|
||||
|
||||
result = o.T
|
||||
self.check_metadata(o, result)
|
||||
|
||||
# resample
|
||||
ts = Series(
|
||||
np.random.rand(1000),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
name="foo",
|
||||
)
|
||||
result = ts.resample("1T").mean()
|
||||
self.check_metadata(ts, result)
|
||||
|
||||
result = ts.resample("1T").min()
|
||||
self.check_metadata(ts, result)
|
||||
|
||||
result = ts.resample("1T").apply(lambda x: x.sum())
|
||||
self.check_metadata(ts, result)
|
||||
|
||||
_metadata = Series._metadata
|
||||
_finalize = Series.__finalize__
|
||||
Series._metadata = ["name", "filename"]
|
||||
o.filename = "foo"
|
||||
o2.filename = "bar"
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "concat" and name == "filename":
|
||||
value = "+".join(
|
||||
[getattr(o, name) for o in other.objs if getattr(o, name, None)]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, None))
|
||||
|
||||
return self
|
||||
|
||||
Series.__finalize__ = finalize
|
||||
|
||||
result = pd.concat([o, o2])
|
||||
assert result.filename == "foo+bar"
|
||||
assert result.name is None
|
||||
|
||||
# reset
|
||||
Series._metadata = _metadata
|
||||
Series.__finalize__ = _finalize
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _XARRAY_INSTALLED
|
||||
or _XARRAY_INSTALLED
|
||||
and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"),
|
||||
reason="xarray >= 0.10.0 required",
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
"FloatIndex",
|
||||
"IntIndex",
|
||||
"StringIndex",
|
||||
"UnicodeIndex",
|
||||
"DateIndex",
|
||||
"PeriodIndex",
|
||||
"TimedeltaIndex",
|
||||
"CategoricalIndex",
|
||||
],
|
||||
)
|
||||
def test_to_xarray_index_types(self, index):
|
||||
from xarray import DataArray
|
||||
|
||||
index = getattr(tm, "make{}".format(index))
|
||||
s = Series(range(6), index=index(6))
|
||||
s.index.name = "foo"
|
||||
result = s.to_xarray()
|
||||
repr(result)
|
||||
assert len(result) == 6
|
||||
assert len(result.coords) == 1
|
||||
assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
# idempotency
|
||||
assert_series_equal(
|
||||
result.to_series(), s, check_index_type=False, check_categorical=True
|
||||
)
|
||||
|
||||
@td.skip_if_no("xarray", min_version="0.7.0")
|
||||
def test_to_xarray(self):
|
||||
from xarray import DataArray
|
||||
|
||||
s = Series([])
|
||||
s.index.name = "foo"
|
||||
result = s.to_xarray()
|
||||
assert len(result) == 0
|
||||
assert len(result.coords) == 1
|
||||
assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
s = Series(range(6))
|
||||
s.index.name = "foo"
|
||||
s.index = pd.MultiIndex.from_product(
|
||||
[["a", "b"], range(3)], names=["one", "two"]
|
||||
)
|
||||
result = s.to_xarray()
|
||||
assert len(result) == 2
|
||||
assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, DataArray)
|
||||
assert_series_equal(result.to_series(), s)
|
||||
|
||||
def test_valid_deprecated(self):
|
||||
# GH18800
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series([]).valid()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s",
|
||||
[
|
||||
Series([np.arange(5)]),
|
||||
pd.date_range("1/1/2011", periods=24, freq="H"),
|
||||
pd.Series(range(5), index=pd.date_range("2017", periods=5)),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("shift_size", [0, 1, 2])
|
||||
def test_shift_always_copy(self, s, shift_size):
|
||||
# GH22397
|
||||
assert s.shift(shift_size) is not s
|
||||
|
||||
@pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")])
|
||||
def test_datetime_shift_always_copy(self, move_by_freq):
|
||||
# GH22397
|
||||
s = pd.Series(range(5), index=pd.date_range("2017", periods=5))
|
||||
assert s.shift(freq=move_by_freq) is not s
|
Reference in New Issue
Block a user