from datetime import datetime import numpy as np import pytest import pandas as pd from pandas import NaT, Series, Timestamp from pandas.core.internals.blocks import IntBlock import pandas.util.testing as tm from pandas.util.testing import assert_series_equal class TestSeriesInternals: # GH 10265 def test_convert(self): # Tests: All to nans, coerce, true # Test coercion returns correct type s = Series(["a", "b", "c"]) results = s._convert(datetime=True, coerce=True) expected = Series([NaT] * 3) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([np.nan] * 3) assert_series_equal(results, expected) expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]")) results = s._convert(timedelta=True, coerce=True) assert_series_equal(results, expected) dt = datetime(2001, 1, 1, 0, 0) td = dt - datetime(2000, 1, 1, 0, 0) # Test coercion with mixed types s = Series(["a", "3.1415", dt, td]) results = s._convert(datetime=True, coerce=True) expected = Series([NaT, NaT, dt, NaT]) assert_series_equal(results, expected) results = s._convert(numeric=True, coerce=True) expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True, coerce=True) expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]")) assert_series_equal(results, expected) # Test standard conversion returns original results = s._convert(datetime=True) assert_series_equal(results, s) results = s._convert(numeric=True) expected = Series([np.nan, 3.1415, np.nan, np.nan]) assert_series_equal(results, expected) results = s._convert(timedelta=True) assert_series_equal(results, s) # test pass-through and non-conversion when other types selected s = Series(["1.0", "2.0", "3.0"]) results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([1.0, 2.0, 3.0]) assert_series_equal(results, expected) results = s._convert(True, False, True) assert_series_equal(results, s) s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O") results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) assert_series_equal(results, expected) results = s._convert(datetime=False, numeric=True, timedelta=True) assert_series_equal(results, s) td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) s = Series([td, td], dtype="O") results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([td, td]) assert_series_equal(results, expected) results = s._convert(True, True, False) assert_series_equal(results, s) s = Series([1.0, 2, 3], index=["a", "b", "c"]) result = s._convert(numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype("O") r["a"] = "1" result = r._convert(numeric=True) assert_series_equal(result, s) r = s.copy().astype("O") r["a"] = "1." result = r._convert(numeric=True) assert_series_equal(result, s) r = s.copy().astype("O") r["a"] = "garbled" result = r._convert(numeric=True) expected = s.copy() expected["a"] = np.nan assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, "na", 3, 4]) result = s._convert(datetime=True, numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, "", 3, 4]) result = s._convert(datetime=True, numeric=True) assert_series_equal(result, expected) # dates s = Series( [ datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), ] ) s2 = Series( [ datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), "foo", 1.0, 1, Timestamp("20010104"), "20010105", ], dtype="O", ) result = s._convert(datetime=True) expected = Series( [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")], dtype="M8[ns]", ) assert_series_equal(result, expected) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) expected = Series( [ Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103"), NaT, NaT, NaT, Timestamp("20010104"), Timestamp("20010105"), ], dtype="M8[ns]", ) result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True) assert_series_equal(result, expected) result = s2._convert(datetime=True, coerce=True) assert_series_equal(result, expected) s = Series(["foo", "bar", 1, 1.0], dtype="O") result = s._convert(datetime=True, coerce=True) expected = Series([NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object s = Series([1], dtype="float32") result = s._convert(datetime=True, coerce=True) assert_series_equal(result, s) # r = s.copy() # r[0] = np.nan # result = r._convert(convert_dates=True,convert_numeric=False) # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date expected = Series([NaT]) for x in "abcdefghijklmnopqrstuvwxyz": s = Series([x]) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) s = Series([x.upper()]) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) def test_convert_no_arg_error(self): s = Series(["1.0", "2"]) msg = r"At least one of datetime, numeric or timedelta must be True\." with pytest.raises(ValueError, match=msg): s._convert() def test_convert_preserve_bool(self): s = Series([1, True, 3, 5], dtype=object) r = s._convert(datetime=True, numeric=True) e = Series([1, 1, 3, 5], dtype="i8") tm.assert_series_equal(r, e) def test_convert_preserve_all_bool(self): s = Series([False, True, False, False], dtype=object) r = s._convert(datetime=True, numeric=True) e = Series([False, True, False, False], dtype=bool) tm.assert_series_equal(r, e) def test_constructor_no_pandas_array(self): ser = pd.Series([1, 2, 3]) result = pd.Series(ser.array) tm.assert_series_equal(ser, result) assert isinstance(result._data.blocks[0], IntBlock) def test_astype_no_pandas_dtype(self): # https://github.com/pandas-dev/pandas/pull/24866 ser = pd.Series([1, 2], dtype="int64") # Don't have PandasDtype in the public API, so we use `.array.dtype`, # which is a PandasDtype. result = ser.astype(ser.array.dtype) tm.assert_series_equal(result, ser) def test_from_array(self): result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) assert result._data.blocks[0].is_extension is False result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]")) assert result._data.blocks[0].is_extension is False def test_from_list_dtype(self): result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]") assert result._data.blocks[0].is_extension is False result = pd.Series(["2015"], dtype="datetime64[ns]") assert result._data.blocks[0].is_extension is False def test_hasnans_unchached_for_series(): # GH#19700 idx = pd.Index([0, 1]) assert idx.hasnans is False assert "hasnans" in idx._cache ser = idx.to_series() assert ser.hasnans is False assert not hasattr(ser, "_cache") ser.iloc[-1] = np.nan assert ser.hasnans is True assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ def test_put_deprecated(): # GH 18262 s = pd.Series([1]) with tm.assert_produces_warning(FutureWarning): s.put(0, 0)