import numpy as np import pytest from pandas.core.dtypes.common import is_integer import pandas as pd from pandas import Index, Series from pandas.core.indexes.datetimes import Timestamp import pandas.util.testing as tm from .common import TestData class TestSeriesQuantile(TestData): def test_quantile(self): q = self.ts.quantile(0.1) assert q == np.percentile(self.ts.dropna(), 10) q = self.ts.quantile(0.9) assert q == np.percentile(self.ts.dropna(), 90) # object dtype q = Series(self.ts, dtype=object).quantile(0.9) assert q == np.percentile(self.ts.dropna(), 90) # datetime64[ns] dtype dts = self.ts.index.to_series() q = dts.quantile(0.2) assert q == Timestamp("2000-01-10 19:12:00") # timedelta64[ns] dtype tds = dts.diff() q = tds.quantile(0.25) assert q == pd.to_timedelta("24:00:00") # GH7661 result = Series([np.timedelta64("NaT")]).sum() assert result == pd.Timedelta(0) msg = "percentiles should all be in the interval \\[0, 1\\]" for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: with pytest.raises(ValueError, match=msg): self.ts.quantile(invalid) def test_quantile_multi(self): qs = [0.1, 0.9] result = self.ts.quantile(qs) expected = pd.Series( [np.percentile(self.ts.dropna(), 10), np.percentile(self.ts.dropna(), 90)], index=qs, name=self.ts.name, ) tm.assert_series_equal(result, expected) dts = self.ts.index.to_series() dts.name = "xxx" result = dts.quantile((0.2, 0.2)) expected = Series( [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")], index=[0.2, 0.2], name="xxx", ) tm.assert_series_equal(result, expected) result = self.ts.quantile([]) expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float)) tm.assert_series_equal(result, expected) def test_quantile_interpolation(self): # see gh-10174 # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation="linear") assert q == np.percentile(self.ts.dropna(), 10) q1 = self.ts.quantile(0.1) assert q1 == np.percentile(self.ts.dropna(), 10) # test with and without interpolation keyword assert q == q1 def test_quantile_interpolation_dtype(self): # GH #10174 # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="lower") assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="higher") assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) def test_quantile_nan(self): # GH 13098 s = pd.Series([1, 2, 3, 4, np.nan]) result = s.quantile(0.5) expected = 2.5 assert result == expected # all nan/empty cases = [Series([]), Series([np.nan, np.nan])] for s in cases: res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5])) res = s.quantile([0.2, 0.3]) tm.assert_series_equal(res, pd.Series([np.nan, np.nan], index=[0.2, 0.3])) @pytest.mark.parametrize( "case", [ [ pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02"), pd.Timestamp("2011-01-03"), ], [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), pd.Timestamp("2011-01-03", tz="US/Eastern"), ], [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], # NaT [ pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02"), pd.Timestamp("2011-01-03"), pd.NaT, ], [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), pd.Timestamp("2011-01-03", tz="US/Eastern"), pd.NaT, ], [ pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days"), pd.NaT, ], ], ) def test_quantile_box(self, case): s = pd.Series(case, name="XXX") res = s.quantile(0.5) assert res == case[1] res = s.quantile([0.5]) exp = pd.Series([case[1]], index=[0.5], name="XXX") tm.assert_series_equal(res, exp) def test_datetime_timedelta_quantiles(self): # covers #9694 assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5)) assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5)) def test_quantile_nat(self): res = Series([pd.NaT, pd.NaT]).quantile(0.5) assert res is pd.NaT res = Series([pd.NaT, pd.NaT]).quantile([0.5]) tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5])) @pytest.mark.parametrize( "values, dtype", [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")], ) def test_quantile_sparse(self, values, dtype): ser = pd.Series(values, dtype=dtype) result = ser.quantile([0.5]) expected = pd.Series(np.asarray(ser)).quantile([0.5]) tm.assert_series_equal(result, expected) def test_quantile_empty(self): # floats s = Series([], dtype="float64") res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) tm.assert_series_equal(res, exp) # int s = Series([], dtype="int64") res = s.quantile(0.5) assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) tm.assert_series_equal(res, exp) # datetime s = Series([], dtype="datetime64[ns]") res = s.quantile(0.5) assert res is pd.NaT res = s.quantile([0.5]) exp = Series([pd.NaT], index=[0.5]) tm.assert_series_equal(res, exp)