8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,231 @@
import numpy as np
import pytest
import pandas as pd
import pandas.util.testing as tm
# ------------------------------------------------------------------
# Helper Functions
def id_func(x):
if isinstance(x, tuple):
assert len(x) == 2
return x[0].__name__ + "-" + str(x[1])
else:
return x.__name__
# ------------------------------------------------------------------
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
# zero-dim integer array behaves like an integer
return request.param
zeros = [
box_cls([0] * 5, dtype=dtype)
for box_cls in [pd.Index, np.array]
for dtype in [np.int64, np.uint64, np.float64]
]
zeros.extend(
[box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]]
)
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([np.array(-0.0, dtype=np.float64)])
zeros.extend([0, 0.0, -0.0])
@pytest.fixture(params=zeros)
def zero(request):
# For testing division by (or of) zero for Index with length 5, this
# gives several scalar-zeros and length-5 vector-zeros
return request.param
# ------------------------------------------------------------------
# Vector Fixtures
@pytest.fixture(
params=[
pd.Float64Index(np.arange(5, dtype="float64")),
pd.Int64Index(np.arange(5, dtype="int64")),
pd.UInt64Index(np.arange(5, dtype="uint64")),
pd.RangeIndex(5),
],
ids=lambda x: type(x).__name__,
)
def numeric_idx(request):
"""
Several types of numeric-dtypes Index objects
"""
return request.param
# ------------------------------------------------------------------
# Scalar Fixtures
@pytest.fixture(
params=[
pd.Timedelta("5m4s").to_pytimedelta(),
pd.Timedelta("5m4s"),
pd.Timedelta("5m4s").to_timedelta64(),
],
ids=lambda x: type(x).__name__,
)
def scalar_td(request):
"""
Several variants of Timedelta scalars representing 5 minutes and 4 seconds
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Day(3),
pd.offsets.Hour(72),
pd.Timedelta(days=3).to_pytimedelta(),
pd.Timedelta("72:00:00"),
np.timedelta64(3, "D"),
np.timedelta64(72, "h"),
],
ids=lambda x: type(x).__name__,
)
def three_days(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 3-day timedelta
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Hour(2),
pd.offsets.Minute(120),
pd.Timedelta(hours=2).to_pytimedelta(),
pd.Timedelta(seconds=2 * 3600),
np.timedelta64(2, "h"),
np.timedelta64(120, "m"),
],
ids=lambda x: type(x).__name__,
)
def two_hours(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 2-hour timedelta
"""
return request.param
_common_mismatch = [
pd.offsets.YearBegin(2),
pd.offsets.MonthBegin(1),
pd.offsets.Minute(),
]
@pytest.fixture(
params=[
pd.Timedelta(minutes=30).to_pytimedelta(),
np.timedelta64(30, "s"),
pd.Timedelta(seconds=30),
]
+ _common_mismatch
)
def not_hourly(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Hourly frequencies.
"""
return request.param
@pytest.fixture(
params=[
np.timedelta64(4, "h"),
pd.Timedelta(hours=23).to_pytimedelta(),
pd.Timedelta("23:00:00"),
]
+ _common_mismatch
)
def not_daily(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Daily frequencies.
"""
return request.param
@pytest.fixture(
params=[
np.timedelta64(365, "D"),
pd.Timedelta(days=365).to_pytimedelta(),
pd.Timedelta(days=365),
]
+ _common_mismatch
)
def mismatched_freq(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Monthly or Annual frequencies.
"""
return request.param
# ------------------------------------------------------------------
@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame], ids=id_func)
def box(request):
"""
Several array-like containers that should have effectively identical
behavior with respect to arithmetic operations.
"""
return request.param
@pytest.fixture(
params=[pd.Index, pd.Series, pytest.param(pd.DataFrame, marks=pytest.mark.xfail)],
ids=id_func,
)
def box_df_fail(request):
"""
Fixture equivalent to `box` fixture but xfailing the DataFrame case.
"""
return request.param
@pytest.fixture(
params=[
(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail),
],
ids=id_func,
)
def box_transpose_fail(request):
"""
Fixture similar to `box` but testing both transpose cases for DataFrame,
with the tranpose=True case xfailed.
"""
# GH#23620
return request.param
@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func)
def box_with_array(request):
"""
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
classes
"""
return request.param
# alias so we can use the same fixture for multiple parameters in a test
box_with_array2 = box_with_array

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,318 @@
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
# behave identically.
# Specifically for object dtype
from decimal import Decimal
import operator
import numpy as np
import pytest
import pandas as pd
from pandas import Series, Timestamp
from pandas.core import ops
import pandas.util.testing as tm
# ------------------------------------------------------------------
# Comparisons
class TestObjectComparisons:
def test_comparison_object_numeric_nas(self):
ser = Series(np.random.randn(10), dtype=object)
shifted = ser.shift(2)
ops = ["lt", "le", "gt", "ge", "eq", "ne"]
for op in ops:
func = getattr(operator, op)
result = func(ser, shifted)
expected = func(ser.astype(float), shifted.astype(float))
tm.assert_series_equal(result, expected)
def test_object_comparisons(self):
ser = Series(["a", "b", np.nan, "c", "a"])
result = ser == "a"
expected = Series([True, False, False, False, True])
tm.assert_series_equal(result, expected)
result = ser < "a"
expected = Series([False, False, False, False, False])
tm.assert_series_equal(result, expected)
result = ser != "a"
expected = -(ser == "a")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [None, object])
def test_more_na_comparisons(self, dtype):
left = Series(["a", np.nan, "c"], dtype=dtype)
right = Series(["a", np.nan, "d"], dtype=dtype)
result = left == right
expected = Series([True, False, False])
tm.assert_series_equal(result, expected)
result = left != right
expected = Series([False, True, True])
tm.assert_series_equal(result, expected)
result = left == np.nan
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)
result = left != np.nan
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Arithmetic
class TestArithmetic:
# TODO: parametrize
def test_pow_ops_object(self):
# GH#22922
# pow is weird with masking & 1, so testing here
a = Series([1, np.nan, 1, np.nan], dtype=object)
b = Series([1, np.nan, np.nan, 1], dtype=object)
result = a ** b
expected = Series(a.values ** b.values, dtype=object)
tm.assert_series_equal(result, expected)
result = b ** a
expected = Series(b.values ** a.values, dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", [operator.add, ops.radd])
@pytest.mark.parametrize("other", ["category", "Int64"])
def test_add_extension_scalar(self, other, box, op):
# GH#22378
# Check that scalars satisfying is_extension_array_dtype(obj)
# do not incorrectly try to dispatch to an ExtensionArray operation
arr = pd.Series(["a", "b", "c"])
expected = pd.Series([op(x, other) for x in arr])
arr = tm.box_expected(arr, box)
expected = tm.box_expected(expected, box)
result = op(arr, other)
tm.assert_equal(result, expected)
def test_objarr_add_str(self, box):
ser = pd.Series(["x", np.nan, "x"])
expected = pd.Series(["xa", np.nan, "xa"])
ser = tm.box_expected(ser, box)
expected = tm.box_expected(expected, box)
result = ser + "a"
tm.assert_equal(result, expected)
def test_objarr_radd_str(self, box):
ser = pd.Series(["x", np.nan, "x"])
expected = pd.Series(["ax", np.nan, "ax"])
ser = tm.box_expected(ser, box)
expected = tm.box_expected(expected, box)
result = "a" + ser
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
[1, 2, 3],
[1.1, 2.2, 3.3],
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
["x", "y", 1],
],
)
@pytest.mark.parametrize("dtype", [None, object])
def test_objarr_radd_str_invalid(self, dtype, data, box):
ser = Series(data, dtype=dtype)
ser = tm.box_expected(ser, box)
with pytest.raises(TypeError):
"foo_" + ser
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
def test_objarr_add_invalid(self, op, box):
# invalid ops
obj_ser = tm.makeObjectSeries()
obj_ser.name = "objects"
obj_ser = tm.box_expected(obj_ser, box)
with pytest.raises(Exception):
op(obj_ser, 1)
with pytest.raises(Exception):
op(obj_ser, np.array(1, dtype=np.int64))
# TODO: Moved from tests.series.test_operators; needs cleanup
def test_operators_na_handling(self):
ser = Series(["foo", "bar", "baz", np.nan])
result = "prefix_" + ser
expected = pd.Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
tm.assert_series_equal(result, expected)
result = ser + "_suffix"
expected = pd.Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
tm.assert_series_equal(result, expected)
# TODO: parametrize over box
@pytest.mark.parametrize("dtype", [None, object])
def test_series_with_dtype_radd_timedelta(self, dtype):
# note this test is _not_ aimed at timedelta64-dtyped Series
ser = pd.Series(
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
dtype=dtype,
)
expected = pd.Series(
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")]
)
result = pd.Timedelta("3 days") + ser
tm.assert_series_equal(result, expected)
result = ser + pd.Timedelta("3 days")
tm.assert_series_equal(result, expected)
# TODO: cleanup & parametrize over box
def test_mixed_timezone_series_ops_object(self):
# GH#13043
ser = pd.Series(
[
pd.Timestamp("2015-01-01", tz="US/Eastern"),
pd.Timestamp("2015-01-01", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser.dtype == object
exp = pd.Series(
[
pd.Timestamp("2015-01-02", tz="US/Eastern"),
pd.Timestamp("2015-01-02", tz="Asia/Tokyo"),
],
name="xxx",
)
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
# object series & object series
ser2 = pd.Series(
[
pd.Timestamp("2015-01-03", tz="US/Eastern"),
pd.Timestamp("2015-01-05", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser2.dtype == object
exp = pd.Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx")
tm.assert_series_equal(ser2 - ser, exp)
tm.assert_series_equal(ser - ser2, -exp)
ser = pd.Series(
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
name="xxx",
dtype=object,
)
assert ser.dtype == object
exp = pd.Series(
[pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx"
)
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
# TODO: cleanup & parametrize over box
def test_iadd_preserves_name(self):
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
ser = pd.Series([1, 2, 3])
ser.index.name = "foo"
ser.index += 1
assert ser.index.name == "foo"
ser.index -= 1
assert ser.index.name == "foo"
def test_add_string(self):
# from bug report
index = pd.Index(["a", "b", "c"])
index2 = index + "foo"
assert "a" not in index2
assert "afoo" in index2
def test_iadd_string(self):
index = pd.Index(["a", "b", "c"])
# doesn't fail test unless there is a check before `+=`
assert "a" in index
index += "_x"
assert "a_x" in index
def test_add(self):
index = tm.makeStringIndex(100)
expected = pd.Index(index.values * 2)
tm.assert_index_equal(index + index, expected)
tm.assert_index_equal(index + index.tolist(), expected)
tm.assert_index_equal(index.tolist() + index, expected)
# test add and radd
index = pd.Index(list("abc"))
expected = pd.Index(["a1", "b1", "c1"])
tm.assert_index_equal(index + "1", expected)
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)
def test_sub_fail(self):
index = tm.makeStringIndex(100)
with pytest.raises(TypeError):
index - "a"
with pytest.raises(TypeError):
index - index
with pytest.raises(TypeError):
index - index.tolist()
with pytest.raises(TypeError):
index.tolist() - index
def test_sub_object(self):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(0), Decimal(1)])
result = index - Decimal(1)
tm.assert_index_equal(result, expected)
result = index - pd.Index([Decimal(1), Decimal(1)])
tm.assert_index_equal(result, expected)
with pytest.raises(TypeError):
index - "foo"
with pytest.raises(TypeError):
index - np.array([2, "foo"])
def test_rsub_object(self):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(1), Decimal(0)])
result = Decimal(2) - index
tm.assert_index_equal(result, expected)
result = np.array([Decimal(2), Decimal(2)]) - index
tm.assert_index_equal(result, expected)
with pytest.raises(TypeError):
"foo" - index
with pytest.raises(TypeError):
np.array([True, pd.Timestamp.now()]) - index

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff