8th day of python challenges 111-117
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, SparseArray, SparseDataFrame, bdate_range
|
||||
|
||||
data = {
|
||||
"A": [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
|
||||
"B": [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
|
||||
"C": np.arange(10, dtype=np.float64),
|
||||
"D": [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
dates = bdate_range("1/1/2011", periods=10)
|
||||
|
||||
|
||||
# fixture names must be compatible with the tests in
|
||||
# tests/frame/test_api.SharedWithSparse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_dense():
|
||||
"""
|
||||
Fixture for dense DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||||
"""
|
||||
return DataFrame(data, index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame():
|
||||
"""
|
||||
Fixture for sparse DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||||
"""
|
||||
# default_kind='block' is the default
|
||||
return SparseDataFrame(data, index=dates, default_kind="block")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_int_kind():
|
||||
"""
|
||||
Fixture for sparse DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
|
||||
Some entries are missing.
|
||||
"""
|
||||
return SparseDataFrame(data, index=dates, default_kind="integer")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_string_frame():
|
||||
"""
|
||||
Fixture for sparse DataFrame of floats and strings with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
|
||||
"""
|
||||
sdf = SparseDataFrame(data, index=dates)
|
||||
sdf["foo"] = SparseArray(["bar"] * len(dates))
|
||||
return sdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_fill0_dense():
|
||||
"""
|
||||
Fixture for dense DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
|
||||
"""
|
||||
values = SparseDataFrame(data).values
|
||||
values[np.isnan(values)] = 0
|
||||
return DataFrame(values, columns=["A", "B", "C", "D"], index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_fill0():
|
||||
"""
|
||||
Fixture for sparse DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
|
||||
"""
|
||||
values = SparseDataFrame(data).values
|
||||
values[np.isnan(values)] = 0
|
||||
return SparseDataFrame(
|
||||
values, columns=["A", "B", "C", "D"], default_fill_value=0, index=dates
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_fill2_dense():
|
||||
"""
|
||||
Fixture for dense DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
|
||||
"""
|
||||
values = SparseDataFrame(data).values
|
||||
values[np.isnan(values)] = 2
|
||||
return DataFrame(values, columns=["A", "B", "C", "D"], index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_fill2():
|
||||
"""
|
||||
Fixture for sparse DataFrame of floats with DatetimeIndex
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
|
||||
"""
|
||||
values = SparseDataFrame(data).values
|
||||
values[np.isnan(values)] = 2
|
||||
return SparseDataFrame(
|
||||
values, columns=["A", "B", "C", "D"], default_fill_value=2, index=dates
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame():
|
||||
"""
|
||||
Fixture for empty SparseDataFrame
|
||||
"""
|
||||
return SparseDataFrame()
|
||||
@@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, SparseDataFrame, SparseSeries
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_quantile():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = 0.1
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_quantile_multi():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = [0.1, 0.5]
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -0,0 +1,117 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series, SparseDataFrame, bdate_range
|
||||
from pandas.core import nanops
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dates():
|
||||
return bdate_range("1/1/2011", periods=10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty():
|
||||
return SparseDataFrame()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(dates):
|
||||
data = {
|
||||
"A": [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
|
||||
"B": [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
|
||||
"C": np.arange(10, dtype=np.float64),
|
||||
"D": [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
|
||||
return SparseDataFrame(data, index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fill_frame(frame):
|
||||
values = frame.values.copy()
|
||||
values[np.isnan(values)] = 2
|
||||
|
||||
return SparseDataFrame(
|
||||
values, columns=["A", "B", "C", "D"], default_fill_value=2, index=frame.index
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
def test_apply(frame):
|
||||
applied = frame.apply(np.sqrt)
|
||||
assert isinstance(applied, SparseDataFrame)
|
||||
tm.assert_almost_equal(applied.values, np.sqrt(frame.values))
|
||||
|
||||
# agg / broadcast
|
||||
# two FutureWarnings, so we can't check stacklevel properly.
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
broadcasted = frame.apply(np.sum, broadcast=True)
|
||||
assert isinstance(broadcasted, SparseDataFrame)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
exp = frame.to_dense().apply(np.sum, broadcast=True)
|
||||
tm.assert_frame_equal(broadcasted.to_dense(), exp)
|
||||
|
||||
applied = frame.apply(np.sum)
|
||||
tm.assert_series_equal(applied, frame.to_dense().apply(nanops.nansum).to_sparse())
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_apply_fill(fill_frame):
|
||||
applied = fill_frame.apply(np.sqrt)
|
||||
assert applied["A"].fill_value == np.sqrt(2)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_apply_empty(empty):
|
||||
assert empty.apply(np.sqrt) is empty
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
def test_apply_nonuq():
|
||||
orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
|
||||
sparse = orig.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=1)
|
||||
exp = orig.apply(lambda s: s[0], axis=1)
|
||||
|
||||
# dtype must be kept
|
||||
assert res.dtype == SparseDtype(np.int64)
|
||||
|
||||
# ToDo: apply must return subclassed dtype
|
||||
assert isinstance(res, Series)
|
||||
tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
# df.T breaks
|
||||
sparse = orig.T.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=0) # noqa
|
||||
exp = orig.T.apply(lambda s: s[0], axis=0)
|
||||
|
||||
# TODO: no non-unique columns supported in sparse yet
|
||||
# tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_applymap(frame):
|
||||
# just test that it works
|
||||
result = frame.applymap(lambda x: x * 2)
|
||||
assert isinstance(result, SparseDataFrame)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_apply_keep_sparse_dtype():
|
||||
# GH 23744
|
||||
sdf = SparseDataFrame(
|
||||
np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
|
||||
columns=["b", "a", "c"],
|
||||
default_fill_value=1,
|
||||
)
|
||||
df = DataFrame(sdf)
|
||||
|
||||
expected = sdf.apply(np.exp)
|
||||
result = df.apply(np.exp)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, SparseDataFrame
|
||||
from pandas.util import testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
],
|
||||
)
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_numeric_data(data):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("other", [True, -100, 0.1, 100.0 + 100.0j])
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_numeric_data_and_other(data, other):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_bool_data():
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [True, 0, 0.1, 100.0 + 100.0j])
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_bool_data_and_other(other):
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -0,0 +1,24 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import SparseDataFrame, read_csv
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
class TestSparseDataFrameToCsv:
|
||||
fill_values = [np.nan, 0, None, 1]
|
||||
|
||||
@pytest.mark.parametrize("fill_value", fill_values)
|
||||
def test_to_csv_sparse_dataframe(self, fill_value):
|
||||
# GH19384
|
||||
sdf = SparseDataFrame(
|
||||
{"a": type(self).fill_values}, default_fill_value=fill_value
|
||||
)
|
||||
|
||||
with tm.ensure_clean("sparse_df.csv") as path:
|
||||
sdf.to_csv(path, index=False)
|
||||
df = read_csv(path, skip_blank_lines=False)
|
||||
|
||||
tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
|
||||
@@ -0,0 +1,196 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_bool_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDataFrame, SparseSeries
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
from pandas.util import testing as tm
|
||||
|
||||
scipy = pytest.importorskip("scipy")
|
||||
ignore_matrix_warning = pytest.mark.filterwarnings(
|
||||
"ignore:the matrix subclass:PendingDeprecationWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [None, list("abc")]) # noqa: F811
|
||||
@pytest.mark.parametrize("columns", [None, list("def")])
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, np.nan])
|
||||
@pytest.mark.parametrize("dtype", [bool, int, float, np.uint16])
|
||||
@ignore_matrix_warning
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
|
||||
# GH 4343
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(3, dtype=dtype)
|
||||
# GH 16179
|
||||
arr[0, 1] = dtype(2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(
|
||||
spm, index=index, columns=columns, default_fill_value=fill_value
|
||||
)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan
|
||||
)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(object)
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
# XXX: verify this
|
||||
res_dtype = bool if is_bool_dtype(dtype) else dtype
|
||||
tm.assert_contains_all(
|
||||
sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}
|
||||
)
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
# However, adding a str column results in an upcast to object
|
||||
sdf["strings"] = np.arange(len(sdf)).astype(str)
|
||||
assert sdf.to_coo().dtype == np.object_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, np.nan]) # noqa: F811
|
||||
@ignore_matrix_warning
|
||||
@pytest.mark.filterwarnings("ignore:object dtype is not supp:UserWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_from_to_scipy_object(spmatrix, fill_value):
|
||||
# GH 4343
|
||||
dtype = object
|
||||
columns = list("cd")
|
||||
index = list("ab")
|
||||
|
||||
if spmatrix is scipy.sparse.dok_matrix:
|
||||
pytest.skip("dok_matrix from object does not work in SciPy")
|
||||
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(2, dtype=dtype)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(
|
||||
spm, index=index, columns=columns, default_fill_value=fill_value
|
||||
)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan
|
||||
)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(SparseDtype(object, fill_value))
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
res_dtype = object
|
||||
tm.assert_contains_all(
|
||||
sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}
|
||||
)
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
|
||||
@ignore_matrix_warning
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_from_scipy_correct_ordering(spmatrix):
|
||||
# GH 16179
|
||||
arr = np.arange(1, 5).reshape(2, 2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm)
|
||||
expected = SparseDataFrame(arr)
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
|
||||
|
||||
|
||||
@ignore_matrix_warning
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_from_scipy_fillna(spmatrix):
|
||||
# GH 16112
|
||||
arr = np.eye(3)
|
||||
arr[1:, 0] = np.nan
|
||||
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm).fillna(-1.0)
|
||||
|
||||
# Returning frame should fill all nan values with -1.0
|
||||
expected = SparseDataFrame(
|
||||
{
|
||||
0: SparseSeries([1.0, -1, -1]),
|
||||
1: SparseSeries([np.nan, 1, np.nan]),
|
||||
2: SparseSeries([np.nan, np.nan, 1]),
|
||||
},
|
||||
default_fill_value=-1,
|
||||
)
|
||||
|
||||
# fill_value is expected to be what .fillna() above was called with
|
||||
# We don't use -1 as initial fill_value in expected SparseSeries
|
||||
# construction because this way we obtain "compressed" SparseArrays,
|
||||
# avoiding having to construct them ourselves
|
||||
for col in expected:
|
||||
expected[col].fill_value = -1
|
||||
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
def test_index_names_multiple_nones():
|
||||
# https://github.com/pandas-dev/pandas/pull/24092
|
||||
sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
s = pd.Series(1, index=pd.MultiIndex.from_product([["A", "B"], [0, 1]])).to_sparse()
|
||||
result, _, _ = s.to_coo()
|
||||
assert isinstance(result, sparse.coo_matrix)
|
||||
result = result.toarray()
|
||||
expected = np.ones((2, 2), dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,113 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series, SparseSeries
|
||||
from pandas.util import testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 1, 2, 2, 3, 3, 4, 4, 0, 0],
|
||||
[1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan],
|
||||
[
|
||||
1.0,
|
||||
1.0 + 1.0j,
|
||||
2.0 + 2.0j,
|
||||
2.0,
|
||||
3.0,
|
||||
3.0 + 3.0j,
|
||||
4.0 + 4.0j,
|
||||
4.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
],
|
||||
)
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_numeric_data(data):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse > lower_bound)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense > lower_bound)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 1, 2, 2, 3, 3, 4, 4, 0, 0],
|
||||
[1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan],
|
||||
[
|
||||
1.0,
|
||||
1.0 + 1.0j,
|
||||
2.0 + 2.0j,
|
||||
2.0,
|
||||
3.0,
|
||||
3.0 + 3.0j,
|
||||
4.0 + 4.0j,
|
||||
4.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("other", [True, -100, 0.1, 100.0 + 100.0j])
|
||||
@pytest.mark.skip(reason="Wrong SparseBlock initialization (Segfault) (GH 17386)")
|
||||
def test_where_with_numeric_data_and_other(data, other):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse > lower_bound, other)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense > lower_bound, other)
|
||||
sparse_expected = SparseSeries(dense_expected, fill_value=other)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
|
||||
def test_where_with_bool_data():
|
||||
# GH 17386
|
||||
data = [False, False, True, True, False, False]
|
||||
cond = True
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse == cond)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense == cond)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [True, 0, 0.1, 100.0 + 100.0j])
|
||||
@pytest.mark.skip(reason="Wrong SparseBlock initialization (Segfault) (GH 17386)")
|
||||
def test_where_with_bool_data_and_other(other):
|
||||
# GH 17386
|
||||
data = [False, False, True, True, False, False]
|
||||
cond = True
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse == cond, other)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense == cond, other)
|
||||
sparse_expected = SparseSeries(dense_expected, fill_value=other)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,474 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseArrayConcat:
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_basic(self, kind):
|
||||
a = pd.SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = pd.SparseArray([1, 0, 2, 2], kind=kind)
|
||||
|
||||
result = pd.SparseArray._concat_same_type([a, b])
|
||||
# Can't make any assertions about the sparse index itself
|
||||
# since we aren't don't merge sparse blocs across arrays
|
||||
# in to_concat
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_uses_first_kind(self, kind):
|
||||
other = "integer" if kind == "block" else "block"
|
||||
a = pd.SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = pd.SparseArray([1, 0, 2, 2], kind=other)
|
||||
|
||||
result = pd.SparseArray._concat_same_type([a, b])
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
class TestSparseSeriesConcat:
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_concat(self, kind):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name="x", kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, name="y", kind=kind)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, fill_value=0, name="x", kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, fill_value=0, name="y", kind=kind)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
def test_concat_axis1(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name="x")
|
||||
sparse2 = pd.SparseSeries(val2, name="y")
|
||||
|
||||
res = pd.concat([sparse1, sparse2], axis=1)
|
||||
exp = pd.concat([pd.Series(val1, name="x"), pd.Series(val2, name="y")], axis=1)
|
||||
exp = pd.SparseDataFrame(exp)
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
def test_concat_different_fill(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
for kind in ["integer", "block"]:
|
||||
sparse1 = pd.SparseSeries(val1, name="x", kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, name="y", kind=kind, fill_value=0)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, raise_on_extra_warnings=False
|
||||
):
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, raise_on_extra_warnings=False
|
||||
):
|
||||
res = pd.concat([sparse2, sparse1])
|
||||
|
||||
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
|
||||
exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
def test_concat_axis1_different_fill(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name="x")
|
||||
sparse2 = pd.SparseSeries(val2, name="y", fill_value=0)
|
||||
|
||||
res = pd.concat([sparse1, sparse2], axis=1)
|
||||
exp = pd.concat([pd.Series(val1, name="x"), pd.Series(val2, name="y")], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
def test_concat_different_kind(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name="x", kind="integer")
|
||||
sparse2 = pd.SparseSeries(val2, name="y", kind="block")
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind=sparse1.kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse1])
|
||||
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
|
||||
exp = pd.SparseSeries(exp, kind=sparse2.kind)
|
||||
tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_concat_sparse_dense(self, kind):
|
||||
# use first input's fill_value
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse = pd.SparseSeries(val1, name="x", kind=kind)
|
||||
dense = pd.Series(val2, name="y")
|
||||
|
||||
res = pd.concat([sparse, dense])
|
||||
exp = pd.SparseSeries(pd.concat([pd.Series(val1), dense]), kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([dense, sparse, dense])
|
||||
exp = pd.concat([dense, pd.Series(val1), dense])
|
||||
# XXX: changed from SparseSeries to Series[sparse]
|
||||
exp = pd.Series(pd.SparseArray(exp, kind=kind), index=exp.index, name=exp.name)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
sparse = pd.SparseSeries(val1, name="x", kind=kind, fill_value=0)
|
||||
dense = pd.Series(val2, name="y")
|
||||
|
||||
res = pd.concat([sparse, dense])
|
||||
# XXX: changed from SparseSeries to Series[sparse]
|
||||
exp = pd.concat([pd.Series(val1), dense])
|
||||
exp = pd.Series(
|
||||
pd.SparseArray(exp, kind=kind, fill_value=0), index=exp.index, name=exp.name
|
||||
)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([dense, sparse, dense])
|
||||
exp = pd.concat([dense, pd.Series(val1), dense])
|
||||
# XXX: changed from SparseSeries to Series[sparse]
|
||||
exp = pd.Series(
|
||||
pd.SparseArray(exp, kind=kind, fill_value=0), index=exp.index, name=exp.name
|
||||
)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
class TestSparseDataFrameConcat:
|
||||
def setup_method(self, method):
|
||||
|
||||
self.dense1 = pd.DataFrame(
|
||||
{
|
||||
"A": [0.0, 1.0, 2.0, np.nan],
|
||||
"B": [0.0, 0.0, 0.0, 0.0],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan],
|
||||
"D": [1.0, 2.0, 3.0, 4.0],
|
||||
}
|
||||
)
|
||||
|
||||
self.dense2 = pd.DataFrame(
|
||||
{
|
||||
"A": [5.0, 6.0, 7.0, 8.0],
|
||||
"B": [np.nan, 0.0, 7.0, 8.0],
|
||||
"C": [5.0, 6.0, np.nan, np.nan],
|
||||
"D": [np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
self.dense3 = pd.DataFrame(
|
||||
{
|
||||
"E": [5.0, 6.0, 7.0, 8.0],
|
||||
"F": [np.nan, 0.0, 7.0, 8.0],
|
||||
"G": [5.0, 6.0, np.nan, np.nan],
|
||||
"H": [np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
def test_concat(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse])
|
||||
exp = pd.concat([self.dense1, self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse2, sparse2])
|
||||
exp = pd.concat([self.dense2, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse])
|
||||
exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse2, sparse2])
|
||||
exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
def test_concat_different_fill_value(self):
|
||||
# 1st fill_value will be used
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, raise_on_extra_warnings=False
|
||||
):
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
PerformanceWarning, raise_on_extra_warnings=False
|
||||
):
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, consolidate_block_indices=True)
|
||||
|
||||
def test_concat_different_columns_sort_warns(self):
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
# stacklevel is wrong since we have two FutureWarnings,
|
||||
# one for depr, one for sorting.
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
|
||||
):
|
||||
res = pd.concat([sparse, sparse3])
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
|
||||
):
|
||||
exp = pd.concat([self.dense1, self.dense3])
|
||||
|
||||
exp = exp.to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, check_kind=False)
|
||||
|
||||
def test_concat_different_columns(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, check_kind=False)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse()
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp, check_kind=False)
|
||||
|
||||
def test_concat_bug(self):
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
|
||||
x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan], fill_value=0)})
|
||||
y = pd.SparseDataFrame({"B": []})
|
||||
res = pd.concat([x, y], sort=False)[["A"]]
|
||||
exp = pd.DataFrame(
|
||||
{"A": pd.SparseArray([np.nan, np.nan], dtype=SparseDtype(float, 0))}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_concat_different_columns_buggy(self):
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
|
||||
tm.assert_sp_frame_equal(
|
||||
res, exp, check_kind=False, consolidate_block_indices=True
|
||||
)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(
|
||||
res, exp, check_kind=False, consolidate_block_indices=True
|
||||
)
|
||||
|
||||
# different fill values
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
# each columns keeps its fill_value, thus compare in dense
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = pd.concat([self.dense1, self.dense3], sort=True)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = pd.concat([self.dense3, self.dense1], sort=True)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
def test_concat_series(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse()
|
||||
|
||||
for col in ["A", "D"]:
|
||||
res = pd.concat([sparse, sparse2[col]])
|
||||
exp = pd.concat([self.dense1, self.dense2[col]]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, check_kind=False)
|
||||
|
||||
res = pd.concat([sparse2[col], sparse])
|
||||
exp = pd.concat([self.dense2[col], self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp, check_kind=False)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
for col in ["C", "D"]:
|
||||
res = pd.concat([sparse, sparse2[col]])
|
||||
exp = pd.concat([self.dense1, self.dense2[col]]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(
|
||||
res, exp, check_kind=False, consolidate_block_indices=True
|
||||
)
|
||||
|
||||
res = pd.concat([sparse2[col], sparse])
|
||||
exp = pd.concat([self.dense2[col], self.dense1]).to_sparse(fill_value=0)
|
||||
exp["C"] = res["C"]
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(
|
||||
res, exp, consolidate_block_indices=True, check_kind=False
|
||||
)
|
||||
|
||||
def test_concat_axis1(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3], axis=1).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1], axis=1).to_sparse()
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3], axis=1).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1], axis=1).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# different fill values
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
# each columns keeps its fill_value, thus compare in dense
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_value,sparse_idx,dense_idx",
|
||||
itertools.product([None, 0, 1, np.nan], [0, 1], [1, 0]),
|
||||
)
|
||||
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
|
||||
frames = [self.dense1, self.dense2]
|
||||
sparse_frame = [
|
||||
frames[dense_idx],
|
||||
frames[sparse_idx].to_sparse(fill_value=fill_value),
|
||||
]
|
||||
dense_frame = [frames[dense_idx], frames[sparse_idx]]
|
||||
|
||||
# This will try both directions sparse + dense and dense + sparse
|
||||
for _ in range(2):
|
||||
res = pd.concat(sparse_frame)
|
||||
exp = pd.concat(dense_frame)
|
||||
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
sparse_frame = sparse_frame[::-1]
|
||||
dense_frame = dense_frame[::-1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_value,sparse_idx,dense_idx",
|
||||
itertools.product([None, 0, 1, np.nan], [0, 1], [1, 0]),
|
||||
)
|
||||
@pytest.mark.xfail(reason="The iloc fails and I can't make expected", strict=False)
|
||||
def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx):
|
||||
# See GH16874, GH18914 and #18686 for why this should be a DataFrame
|
||||
from pandas.core.dtypes.common import is_sparse
|
||||
|
||||
frames = [self.dense1, self.dense3]
|
||||
|
||||
sparse_frame = [
|
||||
frames[dense_idx],
|
||||
frames[sparse_idx].to_sparse(fill_value=fill_value),
|
||||
]
|
||||
dense_frame = [frames[dense_idx], frames[sparse_idx]]
|
||||
|
||||
# This will try both directions sparse + dense and dense + sparse
|
||||
for _ in range(2):
|
||||
res = pd.concat(sparse_frame, axis=1)
|
||||
exp = pd.concat(dense_frame, axis=1)
|
||||
cols = [i for (i, x) in enumerate(res.dtypes) if is_sparse(x)]
|
||||
|
||||
for col in cols:
|
||||
exp.iloc[:, col] = exp.iloc[:, col].astype("Sparse")
|
||||
|
||||
for column in frames[dense_idx].columns:
|
||||
if dense_idx == sparse_idx:
|
||||
tm.assert_frame_equal(res[column], exp[column])
|
||||
else:
|
||||
tm.assert_series_equal(res[column], exp[column])
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
sparse_frame = sparse_frame[::-1]
|
||||
dense_frame = dense_frame[::-1]
|
||||
@@ -0,0 +1,165 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_32bit, is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import option_context
|
||||
import pandas.util.testing as tm
|
||||
|
||||
use_32bit_repr = is_platform_windows() or is_platform_32bit()
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
class TestSparseSeriesFormatting:
|
||||
@property
|
||||
def dtype_format_for_platform(self):
|
||||
return "" if use_32bit_repr else ", dtype=int32"
|
||||
|
||||
def test_sparse_max_row(self):
|
||||
s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
|
||||
result = repr(s)
|
||||
dfm = self.dtype_format_for_platform
|
||||
exp = (
|
||||
"0 1.0\n1 NaN\n2 NaN\n3 3.0\n"
|
||||
"4 NaN\ndtype: Sparse[float64, nan]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
def test_sparsea_max_row_truncated(self):
|
||||
s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
|
||||
dfm = self.dtype_format_for_platform
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
# GH 10560
|
||||
result = repr(s)
|
||||
exp = (
|
||||
"0 1.0\n ... \n4 NaN\n"
|
||||
"Length: 5, dtype: Sparse[float64, nan]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_mi_max_row(self):
|
||||
idx = pd.MultiIndex.from_tuples(
|
||||
[("A", 0), ("A", 1), ("B", 0), ("C", 0), ("C", 1), ("C", 2)]
|
||||
)
|
||||
s = pd.Series([1, np.nan, np.nan, 3, np.nan, np.nan], index=idx).to_sparse()
|
||||
result = repr(s)
|
||||
dfm = self.dtype_format_for_platform
|
||||
exp = (
|
||||
"A 0 1.0\n 1 NaN\nB 0 NaN\n"
|
||||
"C 0 3.0\n 1 NaN\n 2 NaN\n"
|
||||
"dtype: Sparse[float64, nan]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3, "display.show_dimensions", False):
|
||||
# GH 13144
|
||||
result = repr(s)
|
||||
exp = (
|
||||
"A 0 1.0\n ... \nC 2 NaN\n"
|
||||
"dtype: Sparse[float64, nan]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_bool(self):
|
||||
# GH 13110
|
||||
s = pd.SparseSeries([True, False, False, True, False, False], fill_value=False)
|
||||
result = repr(s)
|
||||
dtype = "" if use_32bit_repr else ", dtype=int32"
|
||||
exp = (
|
||||
"0 True\n1 False\n2 False\n"
|
||||
"3 True\n4 False\n5 False\n"
|
||||
"dtype: Sparse[bool, False]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
result = repr(s)
|
||||
exp = (
|
||||
"0 True\n ... \n5 False\n"
|
||||
"Length: 6, dtype: Sparse[bool, False]\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_int(self):
|
||||
# GH 13110
|
||||
s = pd.SparseSeries([0, 1, 0, 0, 1, 0], fill_value=False)
|
||||
|
||||
result = repr(s)
|
||||
dtype = "" if use_32bit_repr else ", dtype=int32"
|
||||
exp = (
|
||||
"0 0\n1 1\n2 0\n3 0\n4 1\n"
|
||||
"5 0\ndtype: Sparse[int64, False]\nBlockIndex\n"
|
||||
"Block locations: array([1, 4]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3, "display.show_dimensions", False):
|
||||
result = repr(s)
|
||||
exp = (
|
||||
"0 0\n ..\n5 0\n"
|
||||
"dtype: Sparse[int64, False]\nBlockIndex\n"
|
||||
"Block locations: array([1, 4]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype)
|
||||
)
|
||||
assert result == exp
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
class TestSparseDataFrameFormatting:
|
||||
def test_sparse_frame(self):
|
||||
# GH 13110
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": [True, False, True, False, True],
|
||||
"B": [True, False, True, False, True],
|
||||
"C": [0, 0, 3, 0, 5],
|
||||
"D": [np.nan, np.nan, np.nan, 1, 2],
|
||||
}
|
||||
)
|
||||
sparse = df.to_sparse()
|
||||
assert repr(sparse) == repr(df)
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
assert repr(sparse) == repr(df)
|
||||
|
||||
def test_sparse_repr_after_set(self):
|
||||
# GH 15488
|
||||
sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
|
||||
res = sdf.copy()
|
||||
|
||||
# Ignore the warning
|
||||
with pd.option_context("mode.chained_assignment", None):
|
||||
sdf[0][1] = 2 # This line triggers the bug
|
||||
|
||||
repr(sdf)
|
||||
tm.assert_sp_frame_equal(sdf, res)
|
||||
|
||||
|
||||
def test_repr_no_warning():
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
df = pd.SparseDataFrame({"A": [1, 2]})
|
||||
s = df["A"]
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
repr(df)
|
||||
repr(s)
|
||||
@@ -0,0 +1,73 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
class TestSparseGroupBy:
|
||||
def setup_method(self, method):
|
||||
self.dense = pd.DataFrame(
|
||||
{
|
||||
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
|
||||
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
|
||||
"C": np.random.randn(8),
|
||||
"D": np.random.randn(8),
|
||||
"E": [np.nan, np.nan, 1, 2, np.nan, 1, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
self.sparse = self.dense.to_sparse()
|
||||
|
||||
def test_first_last_nth(self):
|
||||
# tests for first / last / nth
|
||||
sparse_grouped = self.sparse.groupby("A")
|
||||
dense_grouped = self.dense.groupby("A")
|
||||
|
||||
sparse_grouped_first = sparse_grouped.first()
|
||||
sparse_grouped_last = sparse_grouped.last()
|
||||
sparse_grouped_nth = sparse_grouped.nth(1)
|
||||
|
||||
dense_grouped_first = pd.DataFrame(dense_grouped.first().to_sparse())
|
||||
dense_grouped_last = pd.DataFrame(dense_grouped.last().to_sparse())
|
||||
dense_grouped_nth = pd.DataFrame(dense_grouped.nth(1).to_sparse())
|
||||
|
||||
tm.assert_frame_equal(sparse_grouped_first, dense_grouped_first)
|
||||
tm.assert_frame_equal(sparse_grouped_last, dense_grouped_last)
|
||||
tm.assert_frame_equal(sparse_grouped_nth, dense_grouped_nth)
|
||||
|
||||
def test_aggfuncs(self):
|
||||
sparse_grouped = self.sparse.groupby("A")
|
||||
dense_grouped = self.dense.groupby("A")
|
||||
|
||||
result = sparse_grouped.mean().to_sparse()
|
||||
expected = dense_grouped.mean().to_sparse()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ToDo: sparse sum includes str column
|
||||
# tm.assert_frame_equal(sparse_grouped.sum(),
|
||||
# dense_grouped.sum())
|
||||
|
||||
result = sparse_grouped.count().to_sparse()
|
||||
expected = dense_grouped.count().to_sparse()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
def test_groupby_includes_fill_value(fill_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/5078
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"a": [fill_value, 1, fill_value, fill_value],
|
||||
"b": [fill_value, 1, fill_value, fill_value],
|
||||
}
|
||||
)
|
||||
sdf = df.to_sparse(fill_value=fill_value)
|
||||
result = sdf.groupby("a").sum()
|
||||
expected = pd.DataFrame(df.groupby("a").sum().to_sparse(fill_value=fill_value))
|
||||
tm.assert_frame_equal(result, expected, check_index_type=False)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import _np_version_under1p17
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
|
||||
class TestPivotTable:
|
||||
def setup_method(self, method):
|
||||
rs = np.random.RandomState(0)
|
||||
self.dense = pd.DataFrame(
|
||||
{
|
||||
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
|
||||
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
|
||||
"C": rs.randn(8),
|
||||
"D": rs.randn(8),
|
||||
"E": [np.nan, np.nan, 1, 2, np.nan, 1, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
self.sparse = self.dense.to_sparse()
|
||||
|
||||
def test_pivot_table(self):
|
||||
res_sparse = pd.pivot_table(self.sparse, index="A", columns="B", values="C")
|
||||
res_dense = pd.pivot_table(self.dense, index="A", columns="B", values="C")
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
res_sparse = pd.pivot_table(self.sparse, index="A", columns="B", values="E")
|
||||
res_dense = pd.pivot_table(self.dense, index="A", columns="B", values="E")
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
res_sparse = pd.pivot_table(
|
||||
self.sparse, index="A", columns="B", values="E", aggfunc="mean"
|
||||
)
|
||||
res_dense = pd.pivot_table(
|
||||
self.dense, index="A", columns="B", values="E", aggfunc="mean"
|
||||
)
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
def test_pivot_table_with_nans(self):
|
||||
res_sparse = pd.pivot_table(
|
||||
self.sparse, index="A", columns="B", values="E", aggfunc="sum"
|
||||
)
|
||||
res_dense = pd.pivot_table(
|
||||
self.dense, index="A", columns="B", values="E", aggfunc="sum"
|
||||
)
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
not _np_version_under1p17,
|
||||
reason="failing occasionally on numpy > 1.17",
|
||||
strict=False,
|
||||
)
|
||||
def test_pivot_table_multi(self):
|
||||
res_sparse = pd.pivot_table(
|
||||
self.sparse, index="A", columns="B", values=["D", "E"]
|
||||
)
|
||||
res_dense = pd.pivot_table(
|
||||
self.dense, index="A", columns="B", values=["D", "E"]
|
||||
)
|
||||
res_dense = res_dense.apply(lambda x: x.astype("Sparse[float64]"))
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
@@ -0,0 +1,43 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sparse_df():
|
||||
return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_index3():
|
||||
return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_sparse_frame_stack(sparse_df, multi_index3):
|
||||
ss = sparse_df.stack()
|
||||
expected = pd.SparseSeries(np.ones(3), index=multi_index3)
|
||||
tm.assert_sp_series_equal(ss, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_sparse_frame_unstack(sparse_df):
|
||||
mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)])
|
||||
sparse_df.index = mi
|
||||
arr = np.array([[1, np.nan, np.nan], [np.nan, 1, np.nan], [np.nan, np.nan, 1]])
|
||||
unstacked_df = pd.DataFrame(arr, index=mi).unstack()
|
||||
unstacked_sdf = sparse_df.unstack()
|
||||
|
||||
tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
|
||||
def test_sparse_series_unstack(sparse_df, multi_index3):
|
||||
frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack()
|
||||
|
||||
arr = np.array([1, np.nan, np.nan])
|
||||
arrays = {i: pd.SparseArray(np.roll(arr, i)) for i in range(3)}
|
||||
expected = pd.DataFrame(arrays)
|
||||
tm.assert_frame_equal(frame, expected)
|
||||
Reference in New Issue
Block a user