8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,119 @@
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAccessor:
|
||||
# TODO: collect other Series accessor tests
|
||||
def test_to_dense(self):
|
||||
s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
|
||||
result = s.sparse.to_dense()
|
||||
expected = pd.Series([0, 1, 0, 10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestFrameAccessor:
|
||||
def test_accessor_raises(self):
|
||||
df = pd.DataFrame({"A": [0, 1]})
|
||||
with pytest.raises(AttributeError, match="sparse"):
|
||||
df.sparse
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
@pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int64"])
|
||||
@td.skip_if_no_scipy
|
||||
def test_from_spmatrix(self, format, labels, dtype):
|
||||
import scipy.sparse
|
||||
|
||||
sp_dtype = pd.SparseDtype(dtype, np.array(0, dtype=dtype).item())
|
||||
|
||||
mat = scipy.sparse.eye(10, format=format, dtype=dtype)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
|
||||
expected = pd.DataFrame(
|
||||
np.eye(10, dtype=dtype), index=labels, columns=labels
|
||||
).astype(sp_dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
|
||||
)
|
||||
@td.skip_if_no_scipy
|
||||
def test_from_spmatrix_columns(self, columns):
|
||||
import scipy.sparse
|
||||
|
||||
dtype = pd.SparseDtype("float64", 0.0)
|
||||
|
||||
mat = scipy.sparse.random(10, 2, density=0.5)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
|
||||
expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_to_coo(self):
|
||||
import scipy.sparse
|
||||
|
||||
df = pd.DataFrame({"A": [0, 1, 0], "B": [1, 0, 0]}, dtype="Sparse[int64, 0]")
|
||||
result = df.sparse.to_coo()
|
||||
expected = scipy.sparse.coo_matrix(np.asarray(df))
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
def test_to_dense(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)),
|
||||
"B": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)),
|
||||
"C": pd.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)),
|
||||
},
|
||||
index=["b", "a"],
|
||||
)
|
||||
result = df.sparse.to_dense()
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_density(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.SparseArray([1, 0, 2, 1], fill_value=0),
|
||||
"B": pd.SparseArray([0, 1, 1, 1], fill_value=0),
|
||||
}
|
||||
)
|
||||
res = df.sparse.density
|
||||
expected = 0.75
|
||||
assert res == expected
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "float64"])
|
||||
@pytest.mark.parametrize("dense_index", [True, False])
|
||||
@td.skip_if_no_scipy
|
||||
def test_series_from_coo(self, dtype, dense_index):
|
||||
import scipy.sparse
|
||||
|
||||
A = scipy.sparse.eye(3, format="coo", dtype=dtype)
|
||||
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
|
||||
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
expected = pd.Series(
|
||||
pd.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index
|
||||
)
|
||||
if dense_index:
|
||||
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_series_from_coo_incorrect_format_raises(self):
|
||||
# gh-26554
|
||||
import scipy.sparse
|
||||
|
||||
m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
|
||||
with pytest.raises(
|
||||
TypeError, match="Expected coo_matrix. Got csr_matrix instead."
|
||||
):
|
||||
pd.Series.sparse.from_coo(m)
|
@@ -0,0 +1,506 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import ops
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=["integer", "block"])
|
||||
def kind(request):
|
||||
"""kind kwarg to pass to SparseArray/SparseSeries"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def mix(request):
|
||||
# whether to operate op(sparse, dense) instead of op(sparse, sparse)
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
class TestSparseArrayArithmetics:
|
||||
|
||||
_base = np.array
|
||||
_klass = pd.SparseArray
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
|
||||
with np.errstate(invalid="ignore", divide="ignore"):
|
||||
if op in [operator.floordiv, ops.rfloordiv]:
|
||||
# FIXME: GH#13843
|
||||
if self._base == pd.Series and a.dtype.subtype == np.dtype("int64"):
|
||||
pytest.xfail("Not defined/working. See GH#13843")
|
||||
|
||||
if mix:
|
||||
result = op(a, b_dense).to_dense()
|
||||
else:
|
||||
result = op(a, b).to_dense()
|
||||
|
||||
if op in [operator.truediv, ops.rtruediv]:
|
||||
# pandas uses future division
|
||||
expected = op(a_dense * 1.0, b_dense)
|
||||
else:
|
||||
expected = op(a_dense, b_dense)
|
||||
|
||||
if op in [operator.floordiv, ops.rfloordiv]:
|
||||
# Series sets 1//0 to np.inf, which SparseArray does not do (yet)
|
||||
mask = np.isinf(expected)
|
||||
if mask.any():
|
||||
expected[mask] = np.nan
|
||||
|
||||
self._assert(result, expected)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, self._klass)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid="ignore"):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [0, 1, 3])
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, 2])
|
||||
def test_float_scalar(
|
||||
self, kind, mix, all_arithmetic_functions, fill_value, scalar
|
||||
):
|
||||
op = all_arithmetic_functions
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=fill_value)
|
||||
self._check_numeric_ops(a, scalar, values, scalar, mix, op)
|
||||
|
||||
def test_float_scalar_comparison(self, kind):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index(self, kind, mix, all_arithmetic_functions):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_comparison(self, kind):
|
||||
# when sp_index are the same
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_different_kind(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind="integer")
|
||||
b = self._klass(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=0)
|
||||
b = self._klass(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=0)
|
||||
b = self._klass(rvalues, kind="block", fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind="integer", fill_value=1)
|
||||
b = self._klass(rvalues, kind="block", fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_comparison(self, kind):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_int_array_comparison(self, kind):
|
||||
dtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_same_index(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = self._base([True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, True, True], dtype=np.bool)
|
||||
|
||||
a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_array_logical(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = self._base([True, False, True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, False, True, False, True], dtype=np.bool)
|
||||
|
||||
a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
rdtype = "int64"
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_mixed_array_comparison(self, kind):
|
||||
rdtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
|
||||
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
|
||||
|
||||
_base = pd.Series
|
||||
_klass = pd.SparseSeries
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
def test_alignment(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=0
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=np.nan
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=0
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(
|
||||
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=np.nan
|
||||
)
|
||||
self._check_numeric_ops(sa, sb, da, db, mix, op)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.eq, operator.add])
|
||||
def test_with_list(op):
|
||||
arr = pd.SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, pd.SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.exp])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [pd.SparseArray([0, 0, -1, 1]), pd.SparseArray([None, None, -1, 1])]
|
||||
)
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = pd.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(pd.SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.greater])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, pd.SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = pd.SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = pd.SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = pd.SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,183 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", 0),
|
||||
("float", np.nan),
|
||||
("bool", False),
|
||||
("object", np.nan),
|
||||
("datetime64[ns]", pd.NaT),
|
||||
("timedelta64[ns]", pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype("float", 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype("int", 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype("int", 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", None),
|
||||
("float", None),
|
||||
("bool", None),
|
||||
("object", None),
|
||||
("datetime64[ns]", None),
|
||||
("timedelta64[ns]", None),
|
||||
("int", np.nan),
|
||||
("float", 0),
|
||||
],
|
||||
)
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float("nan"))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(SparseDtype("float64"), SparseDtype("float32")),
|
||||
(SparseDtype("float64"), SparseDtype("float64", 0)),
|
||||
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype("float64"), np.dtype("float64")),
|
||||
],
|
||||
)
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(TypeError):
|
||||
SparseDtype.construct_from_string("not a dtype")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype("object")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[float64]", SparseDtype(np.dtype("float64"))),
|
||||
("Sparse[float32]", SparseDtype(np.dtype("float32"))),
|
||||
("Sparse[int]", SparseDtype(np.dtype("int"))),
|
||||
("Sparse[str]", SparseDtype(np.dtype("str"))),
|
||||
("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan)),
|
||||
],
|
||||
)
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected",
|
||||
[
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
],
|
||||
)
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[int]", "int"),
|
||||
("Sparse[int, 0]", "int"),
|
||||
("Sparse[int64]", "int64"),
|
||||
("Sparse[int64, 0]", "int64"),
|
||||
("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
|
||||
)
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match="fill_value in the string is not"):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected",
|
||||
[
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), str, SparseDtype(object, "1")),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
],
|
||||
)
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype",
|
||||
[(SparseDtype(float, np.nan), int), (SparseDtype(str, "abc"), int)],
|
||||
)
|
||||
def test_update_dtype_raises(original, dtype):
|
||||
with pytest.raises(ValueError):
|
||||
original.update_dtype(dtype)
|
@@ -0,0 +1,601 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
TEST_LENGTH = 20
|
||||
|
||||
plain_case = dict(
|
||||
xloc=[0, 7, 15],
|
||||
xlen=[3, 5, 5],
|
||||
yloc=[2, 9, 14],
|
||||
ylen=[2, 3, 5],
|
||||
intersect_loc=[2, 9, 15],
|
||||
intersect_len=[1, 3, 4],
|
||||
)
|
||||
delete_blocks = dict(
|
||||
xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], intersect_loc=[1], intersect_len=[3]
|
||||
)
|
||||
split_blocks = dict(
|
||||
xloc=[0],
|
||||
xlen=[10],
|
||||
yloc=[0, 5],
|
||||
ylen=[3, 7],
|
||||
intersect_loc=[0, 5],
|
||||
intersect_len=[3, 5],
|
||||
)
|
||||
skip_block = dict(
|
||||
xloc=[10],
|
||||
xlen=[5],
|
||||
yloc=[0, 12],
|
||||
ylen=[5, 3],
|
||||
intersect_loc=[12],
|
||||
intersect_len=[3],
|
||||
)
|
||||
|
||||
no_intersect = dict(
|
||||
xloc=[0, 10],
|
||||
xlen=[4, 6],
|
||||
yloc=[5, 17],
|
||||
ylen=[4, 2],
|
||||
intersect_loc=[],
|
||||
intersect_len=[],
|
||||
)
|
||||
|
||||
|
||||
def check_cases(_check_case):
|
||||
def _check_case_dict(case):
|
||||
_check_case(
|
||||
case["xloc"],
|
||||
case["xlen"],
|
||||
case["yloc"],
|
||||
case["ylen"],
|
||||
case["intersect_loc"],
|
||||
case["intersect_len"],
|
||||
)
|
||||
|
||||
_check_case_dict(plain_case)
|
||||
_check_case_dict(delete_blocks)
|
||||
_check_case_dict(split_blocks)
|
||||
_check_case_dict(skip_block)
|
||||
_check_case_dict(no_intersect)
|
||||
|
||||
# one or both is empty
|
||||
_check_case([0], [5], [], [], [], [])
|
||||
_check_case([], [], [], [], [], [])
|
||||
|
||||
|
||||
class TestSparseIndexUnion:
|
||||
def test_index_make_union(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert isinstance(bresult, BlockIndex)
|
||||
tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(
|
||||
bresult.blengths, np.array(elen, dtype=np.int32)
|
||||
)
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert isinstance(iresult, IntIndex)
|
||||
tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices)
|
||||
|
||||
"""
|
||||
x: ----
|
||||
y: ----
|
||||
r: --------
|
||||
"""
|
||||
xloc = [0]
|
||||
xlen = [5]
|
||||
yloc = [5]
|
||||
ylen = [4]
|
||||
eloc = [0]
|
||||
elen = [9]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----- -----
|
||||
y: ----- --
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [5, 5]
|
||||
yloc = [2, 17]
|
||||
ylen = [5, 2]
|
||||
eloc = [0, 10, 17]
|
||||
elen = [7, 5, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------
|
||||
y: -------
|
||||
r: ----------
|
||||
"""
|
||||
xloc = [1]
|
||||
xlen = [5]
|
||||
yloc = [3]
|
||||
ylen = [5]
|
||||
eloc = [1]
|
||||
elen = [7]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4]
|
||||
ylen = [8]
|
||||
eloc = [2]
|
||||
elen = [12]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: --- -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [0, 5]
|
||||
xlen = [3, 5]
|
||||
yloc = [0]
|
||||
ylen = [7]
|
||||
eloc = [0]
|
||||
elen = [10]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: ------- ---
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4, 13]
|
||||
ylen = [8, 4]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----------------------
|
||||
y: ---- ---- ---
|
||||
r: ----------------------
|
||||
"""
|
||||
xloc = [2]
|
||||
xlen = [15]
|
||||
yloc = [4, 9, 14]
|
||||
ylen = [3, 2, 2]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ---- ---
|
||||
y: --- ---
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [3, 3]
|
||||
yloc = [5, 15]
|
||||
ylen = [2, 2]
|
||||
eloc = [0, 5, 10, 15]
|
||||
elen = [3, 2, 3, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
|
||||
def test_int_index_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect:
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self):
|
||||
def _check_correct(a, b, expected):
|
||||
result = a.intersect(b)
|
||||
assert result.equals(expected)
|
||||
|
||||
def _check_length_exc(a, longer):
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
a.intersect(longer)
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
expected = BlockIndex(TEST_LENGTH, eloc, elen)
|
||||
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
|
||||
|
||||
_check_correct(xindex, yindex, expected)
|
||||
_check_correct(
|
||||
xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index()
|
||||
)
|
||||
|
||||
_check_length_exc(xindex, longer_index)
|
||||
_check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
def test_intersect_identical(self):
|
||||
cases = [
|
||||
IntIndex(5, np.array([1, 2], dtype=np.int32)),
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
|
||||
IntIndex(0, np.array([], dtype=np.int32)),
|
||||
IntIndex(5, np.array([], dtype=np.int32)),
|
||||
]
|
||||
|
||||
for case in cases:
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon:
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_lookup(self):
|
||||
for kind in ["integer", "block"]:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
def test_lookup_array(self):
|
||||
for kind in ["integer", "block"]:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_lookup_basics(self):
|
||||
def _check(index):
|
||||
assert index.lookup(0) == -1
|
||||
assert index.lookup(5) == 0
|
||||
assert index.lookup(7) == 2
|
||||
assert index.lookup(8) == -1
|
||||
assert index.lookup(9) == -1
|
||||
assert index.lookup(10) == -1
|
||||
assert index.lookup(11) == -1
|
||||
assert index.lookup(12) == 3
|
||||
assert index.lookup(17) == 8
|
||||
assert index.lookup(18) == -1
|
||||
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
iindex = bindex.to_int_index()
|
||||
|
||||
_check(bindex)
|
||||
_check(iindex)
|
||||
|
||||
# corner cases
|
||||
|
||||
|
||||
class TestBlockIndex:
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_make_block_boundary(self):
|
||||
for i in [5, 10, 100, 101]:
|
||||
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block")
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
# TODO: index variables are not used...is that right?
|
||||
index = BlockIndex(0, locs, lengths) # noqa
|
||||
|
||||
# also OK even though empty
|
||||
index = BlockIndex(1, locs, lengths) # noqa
|
||||
|
||||
msg = "Block 0 extends beyond end"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [5], [10])
|
||||
|
||||
msg = "Block 0 overlaps"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex:
|
||||
def test_check_integrity(self):
|
||||
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators:
|
||||
def _op_tests(self, sparse_op, python_op):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10.0 + 1
|
||||
y = np.arange(yindex.npoints) * 100.0 + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(
|
||||
x, xindex, xfill, y, yindex, yfill
|
||||
)
|
||||
result_int_vals, ri_index, ifill = sparse_op(
|
||||
x, xdindex, xfill, y, ydindex, yfill
|
||||
)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals, series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
@pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
|
||||
def test_op(self, opname):
|
||||
sparse_op = getattr(splib, "sparse_{opname}_float64".format(opname=opname))
|
||||
python_op = getattr(operator, opname)
|
||||
self._op_tests(sparse_op, python_op)
|
Reference in New Issue
Block a user