import numpy as np from pandas.util._decorators import cache_readonly import pandas as pd import pandas.util.testing as tm _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = pd.DataFrame(_seriesd) _frame2 = pd.DataFrame(_seriesd, columns=["D", "C", "B", "A"]) _intframe = pd.DataFrame({k: v.astype(int) for k, v in _seriesd.items()}) _tsframe = pd.DataFrame(_tsd) _mixed_frame = _frame.copy() _mixed_frame["foo"] = "bar" class TestData: @cache_readonly def frame(self): return _frame.copy() @cache_readonly def frame2(self): return _frame2.copy() @cache_readonly def intframe(self): # force these all to int64 to avoid platform testing issues return pd.DataFrame({c: s for c, s in _intframe.items()}, dtype=np.int64) @cache_readonly def tsframe(self): return _tsframe.copy() @cache_readonly def mixed_frame(self): return _mixed_frame.copy() @cache_readonly def mixed_float(self): return pd.DataFrame( { "A": _frame["A"].copy().astype("float32"), "B": _frame["B"].copy().astype("float32"), "C": _frame["C"].copy().astype("float16"), "D": _frame["D"].copy().astype("float64"), } ) @cache_readonly def mixed_float2(self): return pd.DataFrame( { "A": _frame2["A"].copy().astype("float32"), "B": _frame2["B"].copy().astype("float32"), "C": _frame2["C"].copy().astype("float16"), "D": _frame2["D"].copy().astype("float64"), } ) @cache_readonly def mixed_int(self): return pd.DataFrame( { "A": _intframe["A"].copy().astype("int32"), "B": np.ones(len(_intframe["B"]), dtype="uint64"), "C": _intframe["C"].copy().astype("uint8"), "D": _intframe["D"].copy().astype("int64"), } ) @cache_readonly def all_mixed(self): return pd.DataFrame( { "a": 1.0, "b": 2, "c": "foo", "float32": np.array([1.0] * 10, dtype="float32"), "int32": np.array([1] * 10, dtype="int32"), }, index=np.arange(10), ) @cache_readonly def tzframe(self): result = pd.DataFrame( { "A": pd.date_range("20130101", periods=3), "B": pd.date_range("20130101", periods=3, tz="US/Eastern"), "C": pd.date_range("20130101", periods=3, tz="CET"), } ) result.iloc[1, 1] = pd.NaT result.iloc[1, 2] = pd.NaT return result @cache_readonly def empty(self): return pd.DataFrame() @cache_readonly def ts1(self): return tm.makeTimeSeries(nper=30) @cache_readonly def ts2(self): return tm.makeTimeSeries(nper=30)[5:] @cache_readonly def simple(self): arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) return pd.DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"]) # self.ts3 = tm.makeTimeSeries()[-5:] # self.ts4 = tm.makeTimeSeries()[1:-1] def _check_mixed_float(df, dtype=None): # float16 are most likely to be upcasted to float32 dtypes = dict(A="float32", B="float32", C="float16", D="float64") if isinstance(dtype, str): dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) if dtypes.get("A"): assert df.dtypes["A"] == dtypes["A"] if dtypes.get("B"): assert df.dtypes["B"] == dtypes["B"] if dtypes.get("C"): assert df.dtypes["C"] == dtypes["C"] if dtypes.get("D"): assert df.dtypes["D"] == dtypes["D"] def _check_mixed_int(df, dtype=None): dtypes = dict(A="int32", B="uint64", C="uint8", D="int64") if isinstance(dtype, str): dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) if dtypes.get("A"): assert df.dtypes["A"] == dtypes["A"] if dtypes.get("B"): assert df.dtypes["B"] == dtypes["B"] if dtypes.get("C"): assert df.dtypes["C"] == dtypes["C"] if dtypes.get("D"): assert df.dtypes["D"] == dtypes["D"]