8th day of python challenges 111-117

2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/init.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/init.py
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/conftest.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/conftest.py
@@ -0,0 +1,120 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, SparseArray, SparseDataFrame, bdate_range
+
+data = {
+    "A": [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
+    "B": [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
+    "C": np.arange(10, dtype=np.float64),
+    "D": [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan],
+}
+dates = bdate_range("1/1/2011", periods=10)
+
+
+# fixture names must be compatible with the tests in
+# tests/frame/test_api.SharedWithSparse
+
+
+@pytest.fixture
+def float_frame_dense():
+    """
+    Fixture for dense DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; some entries are missing
+    """
+    return DataFrame(data, index=dates)
+
+
+@pytest.fixture
+def float_frame():
+    """
+    Fixture for sparse DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; some entries are missing
+    """
+    # default_kind='block' is the default
+    return SparseDataFrame(data, index=dates, default_kind="block")
+
+
+@pytest.fixture
+def float_frame_int_kind():
+    """
+    Fixture for sparse DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
+    Some entries are missing.
+    """
+    return SparseDataFrame(data, index=dates, default_kind="integer")
+
+
+@pytest.fixture
+def float_string_frame():
+    """
+    Fixture for sparse DataFrame of floats and strings with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
+    """
+    sdf = SparseDataFrame(data, index=dates)
+    sdf["foo"] = SparseArray(["bar"] * len(dates))
+    return sdf
+
+
+@pytest.fixture
+def float_frame_fill0_dense():
+    """
+    Fixture for dense DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
+    """
+    values = SparseDataFrame(data).values
+    values[np.isnan(values)] = 0
+    return DataFrame(values, columns=["A", "B", "C", "D"], index=dates)
+
+
+@pytest.fixture
+def float_frame_fill0():
+    """
+    Fixture for sparse DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
+    """
+    values = SparseDataFrame(data).values
+    values[np.isnan(values)] = 0
+    return SparseDataFrame(
+        values, columns=["A", "B", "C", "D"], default_fill_value=0, index=dates
+    )
+
+
+@pytest.fixture
+def float_frame_fill2_dense():
+    """
+    Fixture for dense DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
+    """
+    values = SparseDataFrame(data).values
+    values[np.isnan(values)] = 2
+    return DataFrame(values, columns=["A", "B", "C", "D"], index=dates)
+
+
+@pytest.fixture
+def float_frame_fill2():
+    """
+    Fixture for sparse DataFrame of floats with DatetimeIndex
+
+    Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
+    """
+    values = SparseDataFrame(data).values
+    values[np.isnan(values)] = 2
+    return SparseDataFrame(
+        values, columns=["A", "B", "C", "D"], default_fill_value=2, index=dates
+    )
+
+
+@pytest.fixture
+def empty_frame():
+    """
+    Fixture for empty SparseDataFrame
+    """
+    return SparseDataFrame()
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_analytics.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_analytics.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, SparseDataFrame, SparseSeries
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_quantile():
+    # GH 17386
+    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
+    q = 0.1
+
+    sparse_df = SparseDataFrame(data)
+    result = sparse_df.quantile(q)
+
+    dense_df = DataFrame(data)
+    dense_expected = dense_df.quantile(q)
+    sparse_expected = SparseSeries(dense_expected)
+
+    tm.assert_series_equal(result, dense_expected)
+    tm.assert_sp_series_equal(result, sparse_expected)
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_quantile_multi():
+    # GH 17386
+    data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
+    q = [0.1, 0.5]
+
+    sparse_df = SparseDataFrame(data)
+    result = sparse_df.quantile(q)
+
+    dense_df = DataFrame(data)
+    dense_expected = dense_df.quantile(q)
+    sparse_expected = SparseDataFrame(dense_expected)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_apply.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_apply.py
@@ -0,0 +1,117 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Series, SparseDataFrame, bdate_range
+from pandas.core import nanops
+from pandas.core.sparse.api import SparseDtype
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def dates():
+    return bdate_range("1/1/2011", periods=10)
+
+
+@pytest.fixture
+def empty():
+    return SparseDataFrame()
+
+
+@pytest.fixture
+def frame(dates):
+    data = {
+        "A": [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
+        "B": [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
+        "C": np.arange(10, dtype=np.float64),
+        "D": [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan],
+    }
+
+    return SparseDataFrame(data, index=dates)
+
+
+@pytest.fixture
+def fill_frame(frame):
+    values = frame.values.copy()
+    values[np.isnan(values)] = 2
+
+    return SparseDataFrame(
+        values, columns=["A", "B", "C", "D"], default_fill_value=2, index=frame.index
+    )
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
+def test_apply(frame):
+    applied = frame.apply(np.sqrt)
+    assert isinstance(applied, SparseDataFrame)
+    tm.assert_almost_equal(applied.values, np.sqrt(frame.values))
+
+    # agg / broadcast
+    # two FutureWarnings, so we can't check stacklevel properly.
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        broadcasted = frame.apply(np.sum, broadcast=True)
+    assert isinstance(broadcasted, SparseDataFrame)
+
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        exp = frame.to_dense().apply(np.sum, broadcast=True)
+    tm.assert_frame_equal(broadcasted.to_dense(), exp)
+
+    applied = frame.apply(np.sum)
+    tm.assert_series_equal(applied, frame.to_dense().apply(nanops.nansum).to_sparse())
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_apply_fill(fill_frame):
+    applied = fill_frame.apply(np.sqrt)
+    assert applied["A"].fill_value == np.sqrt(2)
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_apply_empty(empty):
+    assert empty.apply(np.sqrt) is empty
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
+def test_apply_nonuq():
+    orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
+    sparse = orig.to_sparse()
+    res = sparse.apply(lambda s: s[0], axis=1)
+    exp = orig.apply(lambda s: s[0], axis=1)
+
+    # dtype must be kept
+    assert res.dtype == SparseDtype(np.int64)
+
+    # ToDo: apply must return subclassed dtype
+    assert isinstance(res, Series)
+    tm.assert_series_equal(res.to_dense(), exp)
+
+    # df.T breaks
+    sparse = orig.T.to_sparse()
+    res = sparse.apply(lambda s: s[0], axis=0)  # noqa
+    exp = orig.T.apply(lambda s: s[0], axis=0)
+
+    # TODO: no non-unique columns supported in sparse yet
+    # tm.assert_series_equal(res.to_dense(), exp)
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_applymap(frame):
+    # just test that it works
+    result = frame.applymap(lambda x: x * 2)
+    assert isinstance(result, SparseDataFrame)
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_apply_keep_sparse_dtype():
+    # GH 23744
+    sdf = SparseDataFrame(
+        np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
+        columns=["b", "a", "c"],
+        default_fill_value=1,
+    )
+    df = DataFrame(sdf)
+
+    expected = sdf.apply(np.exp)
+    result = df.apply(np.exp)
+    tm.assert_frame_equal(expected, result)
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_frame.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_frame.py
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_indexing.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_indexing.py
@@ -0,0 +1,103 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, SparseDataFrame
+from pandas.util import testing as tm
+
+pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
+        [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
+        [
+            [1.0, 1.0 + 1.0j],
+            [2.0 + 2.0j, 2.0],
+            [3.0, 3.0 + 3.0j],
+            [4.0 + 4.0j, 4.0],
+            [np.nan, np.nan],
+        ],
+    ],
+)
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_where_with_numeric_data(data):
+    # GH 17386
+    lower_bound = 1.5
+
+    sparse = SparseDataFrame(data)
+    result = sparse.where(sparse > lower_bound)
+
+    dense = DataFrame(data)
+    dense_expected = dense.where(dense > lower_bound)
+    sparse_expected = SparseDataFrame(dense_expected)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
+        [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
+        [
+            [1.0, 1.0 + 1.0j],
+            [2.0 + 2.0j, 2.0],
+            [3.0, 3.0 + 3.0j],
+            [4.0 + 4.0j, 4.0],
+            [np.nan, np.nan],
+        ],
+    ],
+)
+@pytest.mark.parametrize("other", [True, -100, 0.1, 100.0 + 100.0j])
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_where_with_numeric_data_and_other(data, other):
+    # GH 17386
+    lower_bound = 1.5
+
+    sparse = SparseDataFrame(data)
+    result = sparse.where(sparse > lower_bound, other)
+
+    dense = DataFrame(data)
+    dense_expected = dense.where(dense > lower_bound, other)
+    sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)
+
+
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_where_with_bool_data():
+    # GH 17386
+    data = [[False, False], [True, True], [False, False]]
+    cond = True
+
+    sparse = SparseDataFrame(data)
+    result = sparse.where(sparse == cond)
+
+    dense = DataFrame(data)
+    dense_expected = dense.where(dense == cond)
+    sparse_expected = SparseDataFrame(dense_expected)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)
+
+
+@pytest.mark.parametrize("other", [True, 0, 0.1, 100.0 + 100.0j])
+@pytest.mark.xfail(reason="Wrong SparseBlock initialization (GH#17386)")
+def test_where_with_bool_data_and_other(other):
+    # GH 17386
+    data = [[False, False], [True, True], [False, False]]
+    cond = True
+
+    sparse = SparseDataFrame(data)
+    result = sparse.where(sparse == cond, other)
+
+    dense = DataFrame(data)
+    dense_expected = dense.where(dense == cond, other)
+    sparse_expected = SparseDataFrame(dense_expected, default_fill_value=other)
+
+    tm.assert_frame_equal(result, dense_expected)
+    tm.assert_sp_frame_equal(result, sparse_expected)
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_to_csv.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_to_csv.py
@@ -0,0 +1,24 @@
+import numpy as np
+import pytest
+
+from pandas import SparseDataFrame, read_csv
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
+class TestSparseDataFrameToCsv:
+    fill_values = [np.nan, 0, None, 1]
+
+    @pytest.mark.parametrize("fill_value", fill_values)
+    def test_to_csv_sparse_dataframe(self, fill_value):
+        # GH19384
+        sdf = SparseDataFrame(
+            {"a": type(self).fill_values}, default_fill_value=fill_value
+        )
+
+        with tm.ensure_clean("sparse_df.csv") as path:
+            sdf.to_csv(path, index=False)
+            df = read_csv(path, skip_blank_lines=False)
+
+            tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
--- a/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_to_from_scipy.py
+++ b/venv/lib/python3.6/site-packages/pandas/tests/sparse/frame/test_to_from_scipy.py
@@ -0,0 +1,196 @@
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.common import is_bool_dtype
+
+import pandas as pd
+from pandas import SparseDataFrame, SparseSeries
+from pandas.core.sparse.api import SparseDtype
+from pandas.util import testing as tm
+
+scipy = pytest.importorskip("scipy")
+ignore_matrix_warning = pytest.mark.filterwarnings(
+    "ignore:the matrix subclass:PendingDeprecationWarning"
+)
+
+
+@pytest.mark.parametrize("index", [None, list("abc")])  # noqa: F811
+@pytest.mark.parametrize("columns", [None, list("def")])
+@pytest.mark.parametrize("fill_value", [None, 0, np.nan])
+@pytest.mark.parametrize("dtype", [bool, int, float, np.uint16])
+@ignore_matrix_warning
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
+    # GH 4343
+    # Make one ndarray and from it one sparse matrix, both to be used for
+    # constructing frames and comparing results
+    arr = np.eye(3, dtype=dtype)
+    # GH 16179
+    arr[0, 1] = dtype(2)
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = SparseDataFrame(
+        spm, index=index, columns=columns, default_fill_value=fill_value
+    )
+
+    # Expected result construction is kind of tricky for all
+    # dtype-fill_value combinations; easiest to cast to something generic
+    # and except later on
+    rarr = arr.astype(object)
+    rarr[arr == 0] = np.nan
+    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
+        fill_value if fill_value is not None else np.nan
+    )
+
+    # Assert frame is as expected
+    sdf_obj = sdf.astype(object)
+    tm.assert_sp_frame_equal(sdf_obj, expected)
+    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
+
+    # Assert spmatrices equal
+    assert dict(sdf.to_coo().todok()) == dict(spm.todok())
+
+    # Ensure dtype is preserved if possible
+    # XXX: verify this
+    res_dtype = bool if is_bool_dtype(dtype) else dtype
+    tm.assert_contains_all(
+        sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}
+    )
+    assert sdf.to_coo().dtype == res_dtype
+
+    # However, adding a str column results in an upcast to object
+    sdf["strings"] = np.arange(len(sdf)).astype(str)
+    assert sdf.to_coo().dtype == np.object_
+
+
+@pytest.mark.parametrize("fill_value", [None, 0, np.nan])  # noqa: F811
+@ignore_matrix_warning
+@pytest.mark.filterwarnings("ignore:object dtype is not supp:UserWarning")
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_from_to_scipy_object(spmatrix, fill_value):
+    # GH 4343
+    dtype = object
+    columns = list("cd")
+    index = list("ab")
+
+    if spmatrix is scipy.sparse.dok_matrix:
+        pytest.skip("dok_matrix from object does not work in SciPy")
+
+    # Make one ndarray and from it one sparse matrix, both to be used for
+    # constructing frames and comparing results
+    arr = np.eye(2, dtype=dtype)
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = SparseDataFrame(
+        spm, index=index, columns=columns, default_fill_value=fill_value
+    )
+
+    # Expected result construction is kind of tricky for all
+    # dtype-fill_value combinations; easiest to cast to something generic
+    # and except later on
+    rarr = arr.astype(object)
+    rarr[arr == 0] = np.nan
+    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
+        fill_value if fill_value is not None else np.nan
+    )
+
+    # Assert frame is as expected
+    sdf_obj = sdf.astype(SparseDtype(object, fill_value))
+    tm.assert_sp_frame_equal(sdf_obj, expected)
+    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
+
+    # Assert spmatrices equal
+    assert dict(sdf.to_coo().todok()) == dict(spm.todok())
+
+    # Ensure dtype is preserved if possible
+    res_dtype = object
+    tm.assert_contains_all(
+        sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}
+    )
+    assert sdf.to_coo().dtype == res_dtype
+
+
+@ignore_matrix_warning
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_from_scipy_correct_ordering(spmatrix):
+    # GH 16179
+    arr = np.arange(1, 5).reshape(2, 2)
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = SparseDataFrame(spm)
+    expected = SparseDataFrame(arr)
+    tm.assert_sp_frame_equal(sdf, expected)
+    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
+
+
+@ignore_matrix_warning
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_from_scipy_fillna(spmatrix):
+    # GH 16112
+    arr = np.eye(3)
+    arr[1:, 0] = np.nan
+
+    try:
+        spm = spmatrix(arr)
+        assert spm.dtype == arr.dtype
+    except (TypeError, AssertionError):
+        # If conversion to sparse fails for this spmatrix type and arr.dtype,
+        # then the combination is not currently supported in NumPy, so we
+        # can just skip testing it thoroughly
+        return
+
+    sdf = SparseDataFrame(spm).fillna(-1.0)
+
+    # Returning frame should fill all nan values with -1.0
+    expected = SparseDataFrame(
+        {
+            0: SparseSeries([1.0, -1, -1]),
+            1: SparseSeries([np.nan, 1, np.nan]),
+            2: SparseSeries([np.nan, np.nan, 1]),
+        },
+        default_fill_value=-1,
+    )
+
+    # fill_value is expected to be what .fillna() above was called with
+    # We don't use -1 as initial fill_value in expected SparseSeries
+    # construction because this way we obtain "compressed" SparseArrays,
+    # avoiding having to construct them ourselves
+    for col in expected:
+        expected[col].fill_value = -1
+
+    tm.assert_sp_frame_equal(sdf, expected)
+
+
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
+def test_index_names_multiple_nones():
+    # https://github.com/pandas-dev/pandas/pull/24092
+    sparse = pytest.importorskip("scipy.sparse")
+
+    s = pd.Series(1, index=pd.MultiIndex.from_product([["A", "B"], [0, 1]])).to_sparse()
+    result, _, _ = s.to_coo()
+    assert isinstance(result, sparse.coo_matrix)
+    result = result.toarray()
+    expected = np.ones((2, 2), dtype="int64")
+    tm.assert_numpy_array_equal(result, expected)