8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,123 @@
from datetime import datetime
import pytest
import pytz
from pandas.errors import NullFrequencyError
import pandas as pd
from pandas import DatetimeIndex, Series, date_range
import pandas.util.testing as tm
class TestDatetimeIndexArithmetic:
# -------------------------------------------------------------
# DatetimeIndex.shift is used in integer addition
def test_dti_shift_tzaware(self, tz_naive_fixture):
# GH#9903
tz = tz_naive_fixture
idx = pd.DatetimeIndex([], name="xxx", tz=tz)
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
tm.assert_index_equal(idx.shift(3, freq="H"), idx)
idx = pd.DatetimeIndex(
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
name="xxx",
tz=tz,
)
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
exp = pd.DatetimeIndex(
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
name="xxx",
tz=tz,
)
tm.assert_index_equal(idx.shift(3, freq="H"), exp)
exp = pd.DatetimeIndex(
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
name="xxx",
tz=tz,
)
tm.assert_index_equal(idx.shift(-3, freq="H"), exp)
def test_dti_shift_freqs(self):
# test shift for DatetimeIndex and non DatetimeIndex
# GH#8083
drange = pd.date_range("20130101", periods=5)
result = drange.shift(1)
expected = pd.DatetimeIndex(
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(-1)
expected = pd.DatetimeIndex(
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(3, freq="2D")
expected = pd.DatetimeIndex(
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
freq="D",
)
tm.assert_index_equal(result, expected)
def test_dti_shift_int(self):
rng = date_range("1/1/2000", periods=20)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng + 5
expected = rng.shift(5)
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng - 5
expected = rng.shift(-5)
tm.assert_index_equal(result, expected)
def test_dti_shift_no_freq(self):
# GH#19147
dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
with pytest.raises(NullFrequencyError):
dti.shift(2)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_shift_localized(self, tzstr):
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
dr_tz = dr.tz_localize(tzstr)
result = dr_tz.shift(1, "10T")
assert result.tz == dr_tz.tz
def test_dti_shift_across_dst(self):
# GH 8616
idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H")
s = Series(index=idx[:-1])
result = s.shift(freq="H")
expected = Series(index=idx[1:])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"shift, result_time",
[
[0, "2014-11-14 00:00:00"],
[-1, "2014-11-13 23:00:00"],
[1, "2014-11-14 01:00:00"],
],
)
def test_dti_shift_near_midnight(self, shift, result_time):
# GH 8616
dt = datetime(2014, 11, 14, 0)
dt_est = pytz.timezone("EST").localize(dt)
s = Series(data=[1], index=[dt_est])
result = s.shift(shift, freq="H")
expected = Series(1, index=DatetimeIndex([result_time], tz="EST"))
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,378 @@
from datetime import datetime
import dateutil
from dateutil.tz import tzlocal
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Int64Index,
NaT,
Period,
Series,
Timestamp,
date_range,
)
import pandas.util.testing as tm
class TestDatetimeIndex:
def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
result = idx.astype(object)
expected = Index([Timestamp("2016-05-16")] + [NaT] * 3, dtype=object)
tm.assert_index_equal(result, expected)
result = idx.astype(int)
expected = Int64Index(
[1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64
)
tm.assert_index_equal(result, expected)
rng = date_range("1/1/2000", periods=10)
result = rng.astype("i8")
tm.assert_index_equal(result, Index(rng.asi8))
tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_uint(self):
arr = date_range("2000", periods=2)
expected = pd.UInt64Index(
np.array([946684800000000000, 946771200000000000], dtype="uint64")
)
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
def test_astype_with_tz(self):
# with tz
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
result = rng.astype("datetime64[ns]")
expected = (
date_range("1/1/2000", periods=10, tz="US/Eastern")
.tz_convert("UTC")
.tz_localize(None)
)
tm.assert_index_equal(result, expected)
# BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
result = pd.Series(pd.date_range("2012-01-01", periods=3)).astype(str)
expected = pd.Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
tm.assert_series_equal(result, expected)
result = Series(pd.date_range("2012-01-01", periods=3, tz="US/Eastern")).astype(
str
)
expected = Series(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
dtype=object,
)
tm.assert_series_equal(result, expected)
# GH 18951: tz-aware to tz-aware
idx = date_range("20170101", periods=4, tz="US/Pacific")
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)
# GH 18951: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)
def test_astype_str_compat(self):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
result = idx.astype(str)
expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
tm.assert_index_equal(result, expected)
def test_astype_str(self):
# test astype string - #10442
result = date_range("2012-01-01", periods=4, name="test_name").astype(str)
expected = Index(
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
# test astype string with tz and name
result = date_range(
"2012-01-01", periods=3, name="test_name", tz="US/Eastern"
).astype(str)
expected = Index(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
# test astype string with freqH and name
result = date_range("1/1/2011", periods=3, freq="H", name="test_name").astype(
str
)
expected = Index(
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
# test astype string with freqH and timezone
result = date_range(
"3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name"
).astype(str)
expected = Index(
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
dtype=object,
name="test_name",
)
tm.assert_index_equal(result, expected)
def test_astype_datetime64(self):
# GH 13149, GH 13209
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
result = idx.astype("datetime64[ns]")
tm.assert_index_equal(result, idx)
assert result is not idx
result = idx.astype("datetime64[ns]", copy=False)
tm.assert_index_equal(result, idx)
assert result is idx
idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST")
result = idx_tz.astype("datetime64[ns]")
expected = DatetimeIndex(
["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]"
)
tm.assert_index_equal(result, expected)
def test_astype_object(self):
rng = date_range("1/1/2000", periods=20)
casted = rng.astype("O")
exp_values = list(rng)
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
assert casted.tolist() == exp_values
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_astype_object_tz(self, tz):
idx = pd.date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz)
expected_list = [
Timestamp("2013-01-31", tz=tz),
Timestamp("2013-02-28", tz=tz),
Timestamp("2013-03-31", tz=tz),
Timestamp("2013-04-30", tz=tz),
]
expected = pd.Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype_object_with_nat(self):
idx = DatetimeIndex(
[datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)],
name="idx",
)
expected_list = [
Timestamp("2013-01-01"),
Timestamp("2013-01-02"),
pd.NaT,
Timestamp("2013-01-04"),
]
expected = pd.Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
@pytest.mark.parametrize(
"dtype",
[float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"],
)
def test_astype_raises(self, dtype):
# GH 13149, GH 13209
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
msg = "Cannot cast DatetimeArray to dtype"
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_index_convert_to_datetime_array(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="US/Eastern")
rng_utc = date_range("20090415", "20090519", tz="utc")
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_explicit_pytz(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
rng_utc = date_range("20090415", "20090519", tz=pytz.utc)
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_dateutil(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc())
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
@pytest.mark.parametrize(
"tz, dtype",
[["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]],
)
def test_integer_index_astype_datetime(self, tz, dtype):
# GH 20997, 20964, 24559
val = [pd.Timestamp("2018-01-01", tz=tz).value]
result = pd.Index(val).astype(dtype)
expected = pd.DatetimeIndex(["2018-01-01"], tz=tz)
tm.assert_index_equal(result, expected)
class TestToPeriod:
def setup_method(self, method):
data = [
Timestamp("2007-01-01 10:11:12.123456Z"),
Timestamp("2007-01-01 10:11:13.789123Z"),
]
self.index = DatetimeIndex(data)
def test_to_period_millisecond(self):
index = self.index
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="L")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123Z", "L")
assert period[1] == Period("2007-01-01 10:11:13.789Z", "L")
def test_to_period_microsecond(self):
index = self.index
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="U")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U")
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U")
@pytest.mark.parametrize(
"tz",
["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()],
)
def test_to_period_tz(self, tz):
ts = date_range("1/1/2000", "2/1/2000", tz=tz)
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period()[0]
expected = ts[0].to_period()
assert result == expected
expected = date_range("1/1/2000", "2/1/2000").to_period()
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"])
def test_to_period_tz_utc_offset_consistency(self, tz):
# GH 22905
ts = pd.date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
with tm.assert_produces_warning(UserWarning):
result = ts.to_period()[0]
expected = ts[0].to_period()
assert result == expected
def test_to_period_nofreq(self):
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"])
with pytest.raises(ValueError):
idx.to_period()
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer")
assert idx.freqstr == "D"
expected = pd.PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D")
tm.assert_index_equal(idx.to_period(), expected)
# GH 7606
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert idx.freqstr is None
tm.assert_index_equal(idx.to_period(), expected)
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_category(self, tz):
obj = pd.date_range("2000", periods=2, tz=tz)
result = obj.astype("category")
expected = pd.CategoricalIndex(
[pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]
)
tm.assert_index_equal(result, expected)
result = obj._data.astype("category")
expected = expected.values
tm.assert_categorical_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_array_fallback(self, tz):
obj = pd.date_range("2000", periods=2, tz=tz)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,975 @@
from datetime import timedelta
from functools import partial
from operator import attrgetter
import dateutil
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import OutOfBoundsDatetime, conversion
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Timestamp,
date_range,
datetime,
offsets,
to_datetime,
)
from pandas.core.arrays import DatetimeArray, period_array
import pandas.util.testing as tm
class TestDatetimeIndex:
@pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence])
def test_freq_validation_with_nat(self, dt_cls):
# GH#11587 make sure we get a useful error message when generate_range
# raises
msg = (
"Inferred frequency None from passed values does not conform "
"to passed frequency D"
)
with pytest.raises(ValueError, match=msg):
dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D")
with pytest.raises(ValueError, match=msg):
dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D")
def test_categorical_preserves_tz(self):
# GH#18664 retain tz when going DTI-->Categorical-->DTI
# TODO: parametrize over DatetimeIndex/DatetimeArray
# once CategoricalIndex(DTA) works
dti = pd.DatetimeIndex(
[pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
)
ci = pd.CategoricalIndex(dti)
carr = pd.Categorical(dti)
cser = pd.Series(ci)
for obj in [ci, carr, cser]:
result = pd.DatetimeIndex(obj)
tm.assert_index_equal(result, dti)
def test_dti_with_period_data_raises(self):
# GH#23675
data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(data)
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(data)
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(period_array(data))
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(period_array(data))
def test_dti_with_timedelta64_data_deprecation(self):
# GH#23675
data = np.array([0], dtype="m8[ns]")
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(data)
assert result[0] == Timestamp("1970-01-01")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(data)
assert result[0] == Timestamp("1970-01-01")
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(pd.TimedeltaIndex(data))
assert result[0] == Timestamp("1970-01-01")
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(pd.TimedeltaIndex(data))
assert result[0] == Timestamp("1970-01-01")
def test_construction_caching(self):
df = pd.DataFrame(
{
"dt": pd.date_range("20130101", periods=3),
"dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
"dt_with_null": [
pd.Timestamp("20130101"),
pd.NaT,
pd.Timestamp("20130103"),
],
"dtns": pd.date_range("20130101", periods=3, freq="ns"),
}
)
assert df.dttz.dtype.tz.zone == "US/Eastern"
@pytest.mark.parametrize(
"kwargs",
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
)
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
result = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(i, result)
@pytest.mark.parametrize(
"kwargs",
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
)
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
if str(tz) in ("UTC", "tzutc()", "UTC+00:00"):
warn = None
else:
warn = FutureWarning
with tm.assert_produces_warning(warn, check_stacklevel=False):
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
expected = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(result, expected)
# localize into the provided tz
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
expected = i.tz_localize(None).tz_localize("UTC")
tm.assert_index_equal(i2, expected)
# incompat tz/dtype
msg = "cannot supply both a tz and a dtype with a tz"
with pytest.raises(ValueError, match=msg):
DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")
def test_construction_index_with_mixed_timezones(self):
# gh-11488: no tz results in DatetimeIndex
result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
exp = DatetimeIndex(
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# same tz results in DatetimeIndex
result = Index(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
],
name="idx",
)
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
tz="Asia/Tokyo",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# same tz results in DatetimeIndex (DST)
result = Index(
[
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
tz="US/Eastern",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# Different tz results in Index(dtype=object)
result = Index(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = Index(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
dtype="object",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
result = Index(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = Index(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
dtype="object",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
# length = 1
result = Index([Timestamp("2011-01-01")], name="idx")
exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# length = 1 with tz
result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
def test_construction_index_with_mixed_timezones_with_NaT(self):
# see gh-11488
result = Index(
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
name="idx",
)
exp = DatetimeIndex(
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# Same tz results in DatetimeIndex
result = Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
],
name="idx",
)
exp = DatetimeIndex(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00"),
],
tz="Asia/Tokyo",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# same tz results in DatetimeIndex (DST)
result = Index(
[
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
pd.NaT,
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
tz="US/Eastern",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# different tz results in Index(dtype=object)
result = Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
dtype="object",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
result = Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
dtype="object",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
# all NaT
result = Index([pd.NaT, pd.NaT], name="idx")
exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
def test_construction_dti_with_mixed_timezones(self):
# GH 11488 (not changed, added explicit tests)
# no tz results in DatetimeIndex
result = DatetimeIndex(
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
)
exp = DatetimeIndex(
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# same tz results in DatetimeIndex
result = DatetimeIndex(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
],
name="idx",
)
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
tz="Asia/Tokyo",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# same tz results in DatetimeIndex (DST)
result = DatetimeIndex(
[
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
],
name="idx",
)
exp = DatetimeIndex(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
tz="US/Eastern",
name="idx",
)
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# tz mismatch affecting to tz-aware raises TypeError/ValueError
with pytest.raises(ValueError):
DatetimeIndex(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
name="idx",
)
msg = "cannot be converted to datetime64"
with pytest.raises(ValueError, match=msg):
DatetimeIndex(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)
with pytest.raises(ValueError):
DatetimeIndex(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="US/Eastern",
name="idx",
)
with pytest.raises(ValueError, match=msg):
# passing tz should results in DatetimeIndex, then mismatch raises
# TypeError
Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)
def test_construction_base_constructor(self):
arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))
arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr)))
def test_construction_outofbounds(self):
# GH 13663
dates = [
datetime(3000, 1, 1),
datetime(4000, 1, 1),
datetime(5000, 1, 1),
datetime(6000, 1, 1),
]
exp = Index(dates, dtype=object)
# coerces to object
tm.assert_index_equal(Index(dates), exp)
with pytest.raises(OutOfBoundsDatetime):
# can't create DatetimeIndex
DatetimeIndex(dates)
def test_construction_with_ndarray(self):
# GH 5152
dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
result = DatetimeIndex(data, freq=pd.offsets.BDay())
expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
tm.assert_index_equal(result, expected)
def test_verify_integrity_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(["1/1/2000"], verify_integrity=False)
def test_range_kwargs_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(start="1/1/2000", end="1/10/2000", freq="D")
def test_integer_values_and_tz_deprecated(self):
# GH-24559
values = np.array([946684800000000000])
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(values, tz="US/Central")
expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
tm.assert_index_equal(result, expected)
# but UTC is *not* deprecated.
with tm.assert_produces_warning(None):
result = DatetimeIndex(values, tz="UTC")
expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
def test_constructor_coverage(self):
rng = date_range("1/1/2000", periods=10.5)
exp = date_range("1/1/2000", periods=10)
tm.assert_index_equal(rng, exp)
msg = "periods must be a number, got foo"
with pytest.raises(TypeError, match=msg):
date_range(start="1/1/2000", periods="foo", freq="D")
with pytest.raises(ValueError):
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(start="1/1/2000", end="1/10/2000")
with pytest.raises(TypeError):
DatetimeIndex("1/1/2000")
# generator expression
gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
result = DatetimeIndex(gen)
expected = DatetimeIndex(
[datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
)
tm.assert_index_equal(result, expected)
# NumPy string array
strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
result = DatetimeIndex(strings)
expected = DatetimeIndex(strings.astype("O"))
tm.assert_index_equal(result, expected)
from_ints = DatetimeIndex(expected.asi8)
tm.assert_index_equal(from_ints, expected)
# string with NaT
strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
result = DatetimeIndex(strings)
expected = DatetimeIndex(strings.astype("O"))
tm.assert_index_equal(result, expected)
from_ints = DatetimeIndex(expected.asi8)
tm.assert_index_equal(from_ints, expected)
# non-conforming
msg = (
"Inferred frequency None from passed values does not conform"
" to passed frequency D"
)
with pytest.raises(ValueError, match=msg):
DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")
msg = (
"Of the four parameters: start, end, periods, and freq, exactly"
" three must be specified"
)
with pytest.raises(ValueError, match=msg):
date_range(start="2011-01-01", freq="b")
with pytest.raises(ValueError, match=msg):
date_range(end="2011-01-01", freq="B")
with pytest.raises(ValueError, match=msg):
date_range(periods=10, freq="D")
@pytest.mark.parametrize("freq", ["AS", "W-SUN"])
def test_constructor_datetime64_tzformat(self, freq):
# see GH#6572: ISO 8601 format results in pytz.FixedOffset
idx = date_range(
"2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
)
expected = date_range(
"2013-01-01T00:00:00",
"2016-01-01T23:59:59",
freq=freq,
tz=pytz.FixedOffset(-300),
)
tm.assert_index_equal(idx, expected)
# Unable to use `US/Eastern` because of DST
expected_i8 = date_range(
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
)
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
idx = date_range(
"2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
)
expected = date_range(
"2013-01-01T00:00:00",
"2016-01-01T23:59:59",
freq=freq,
tz=pytz.FixedOffset(540),
)
tm.assert_index_equal(idx, expected)
expected_i8 = date_range(
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
)
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
# Non ISO 8601 format results in dateutil.tz.tzoffset
idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
expected = date_range(
"2013-01-01T00:00:00",
"2016-01-01T23:59:59",
freq=freq,
tz=pytz.FixedOffset(-300),
)
tm.assert_index_equal(idx, expected)
# Unable to use `US/Eastern` because of DST
expected_i8 = date_range(
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
)
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
expected = date_range(
"2013-01-01T00:00:00",
"2016-01-01T23:59:59",
freq=freq,
tz=pytz.FixedOffset(540),
)
tm.assert_index_equal(idx, expected)
expected_i8 = date_range(
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
)
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
def test_constructor_dtype(self):
# passing a dtype with a tz should localize
idx = DatetimeIndex(
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
)
expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern")
tm.assert_index_equal(idx, expected)
idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern")
tm.assert_index_equal(idx, expected)
# if we already have a tz and its not the same, then raise
idx = DatetimeIndex(
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
)
msg = (
"cannot supply both a tz and a timezone-naive dtype"
r" \(i\.e\. datetime64\[ns\]\)"
)
with pytest.raises(ValueError, match=msg):
DatetimeIndex(idx, dtype="datetime64[ns]")
# this is effectively trying to convert tz's
msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
with pytest.raises(TypeError, match=msg):
DatetimeIndex(idx, dtype="datetime64[ns, CET]")
msg = "cannot supply both a tz and a dtype with a tz"
with pytest.raises(ValueError, match=msg):
DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")
result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
tm.assert_index_equal(idx, result)
@pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
def test_constructor_invalid_dtype_raises(self, dtype):
# GH 23986
with pytest.raises(ValueError):
DatetimeIndex([1, 2], dtype=dtype)
def test_constructor_name(self):
idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST")
assert idx.name == "TEST"
def test_000constructor_resolution(self):
# 2252
t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
idx = DatetimeIndex([t1])
assert idx.nanosecond[0] == t1.nanosecond
def test_disallow_setting_tz(self):
# GH 3746
dti = DatetimeIndex(["2010"], tz="UTC")
with pytest.raises(AttributeError):
dti.tz = pytz.timezone("US/Pacific")
@pytest.mark.parametrize(
"tz",
[
None,
"America/Los_Angeles",
pytz.timezone("America/Los_Angeles"),
Timestamp("2000", tz="America/Los_Angeles").tz,
],
)
def test_constructor_start_end_with_tz(self, tz):
# GH 18595
start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
result = date_range(freq="D", start=start, end=end, tz=tz)
expected = DatetimeIndex(
["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles"
)
tm.assert_index_equal(result, expected)
# Especially assert that the timezone is consistent for pytz
assert pytz.timezone("America/Los_Angeles") is result.tz
@pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
def test_constructor_with_non_normalized_pytz(self, tz):
# GH 18595
non_norm_tz = Timestamp("2010", tz=tz).tz
result = DatetimeIndex(["2010"], tz=non_norm_tz)
assert pytz.timezone(tz) is result.tz
def test_constructor_timestamp_near_dst(self):
# GH 20854
ts = [
Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
]
result = DatetimeIndex(ts)
expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
tm.assert_index_equal(result, expected)
# TODO(GH-24559): Remove the xfail for the tz-aware case.
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
@pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
@pytest.mark.parametrize(
"tz, dtype",
[
pytest.param(
"US/Pacific",
"datetime64[ns, US/Pacific]",
marks=[
pytest.mark.xfail(),
pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning"),
],
),
[None, "datetime64[ns]"],
],
)
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
# GH 20997, 20964
ts = Timestamp("2018-01-01", tz=tz)
result = klass(box([ts.value]), dtype=dtype)
expected = klass([ts])
assert result == expected
# This is the desired future behavior
@pytest.mark.xfail(reason="Future behavior", strict=False)
@pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
def test_construction_int_rountrip(self, tz_naive_fixture):
# GH 12619
# TODO(GH-24559): Remove xfail
tz = tz_naive_fixture
result = 1293858000000000000
expected = DatetimeIndex([1293858000000000000], tz=tz).asi8[0]
assert result == expected
def test_construction_from_replaced_timestamps_with_dst(self):
# GH 18785
index = pd.date_range(
pd.Timestamp(2000, 1, 1),
pd.Timestamp(2005, 1, 1),
freq="MS",
tz="Australia/Melbourne",
)
test = pd.DataFrame({"data": range(len(index))}, index=index)
test = test.resample("Y").mean()
result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index])
expected = pd.DatetimeIndex(
[
"2000-06-01 00:00:00",
"2001-06-01 00:00:00",
"2002-06-01 00:00:00",
"2003-06-01 00:00:00",
"2004-06-01 00:00:00",
"2005-06-01 00:00:00",
],
tz="Australia/Melbourne",
)
tm.assert_index_equal(result, expected)
def test_construction_with_tz_and_tz_aware_dti(self):
# GH 23579
dti = date_range("2016-01-01", periods=3, tz="US/Central")
with pytest.raises(TypeError):
DatetimeIndex(dti, tz="Asia/Tokyo")
def test_construction_with_nat_and_tzlocal(self):
tz = dateutil.tz.tzlocal()
result = DatetimeIndex(["2018", "NaT"], tz=tz)
expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
tm.assert_index_equal(result, expected)
def test_constructor_no_precision_warns(self):
# GH-24753, GH-24739
expected = pd.DatetimeIndex(["2000"], dtype="datetime64[ns]")
# we set the stacklevel for DatetimeIndex
with tm.assert_produces_warning(FutureWarning):
result = pd.DatetimeIndex(["2000"], dtype="datetime64")
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = pd.Index(["2000"], dtype="datetime64")
tm.assert_index_equal(result, expected)
def test_constructor_wrong_precision_raises(self):
with pytest.raises(ValueError):
pd.DatetimeIndex(["2000"], dtype="datetime64[us]")
class TestTimeSeries:
def test_dti_constructor_preserve_dti_freq(self):
rng = date_range("1/1/2000", "1/2/2000", freq="5min")
rng2 = DatetimeIndex(rng)
assert rng.freq == rng2.freq
def test_dti_constructor_years_only(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH 6961
rng1 = date_range("2014", "2015", freq="M", tz=tz)
expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz)
rng2 = date_range("2014", "2015", freq="MS", tz=tz)
expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz)
rng3 = date_range("2014", "2020", freq="A", tz=tz)
expected3 = date_range("2014-12-31", "2019-12-31", freq="A", tz=tz)
rng4 = date_range("2014", "2020", freq="AS", tz=tz)
expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz)
for rng, expected in [
(rng1, expected1),
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
tm.assert_index_equal(rng, expected)
def test_dti_constructor_small_int(self, any_int_dtype):
# see gh-13721
exp = DatetimeIndex(
[
"1970-01-01 00:00:00.00000000",
"1970-01-01 00:00:00.00000001",
"1970-01-01 00:00:00.00000002",
]
)
arr = np.array([0, 10, 20], dtype=any_int_dtype)
tm.assert_index_equal(DatetimeIndex(arr), exp)
def test_ctor_str_intraday(self):
rng = DatetimeIndex(["1-1-2000 00:00:01"])
assert rng[0].second == 1
def test_is_(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="M")
assert dti.is_(dti)
assert dti.is_(dti.view())
assert not dti.is_(dti.copy())
def test_index_cast_datetime64_other_units(self):
arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]")
idx = Index(arr)
assert (idx.values == conversion.ensure_datetime64ns(arr)).all()
def test_constructor_int64_nocopy(self):
# GH#1624
arr = np.arange(1000, dtype=np.int64)
index = DatetimeIndex(arr)
arr[50:100] = -1
assert (index.asi8[50:100] == -1).all()
arr = np.arange(1000, dtype=np.int64)
index = DatetimeIndex(arr, copy=True)
arr[50:100] = -1
assert (index.asi8[50:100] != -1).all()
@pytest.mark.parametrize(
"freq", ["M", "Q", "A", "D", "B", "BH", "T", "S", "L", "U", "H", "N", "C"]
)
def test_from_freq_recreate_from_data(self, freq):
org = date_range(start="2001/02/01 09:00", freq=freq, periods=1)
idx = DatetimeIndex(org, freq=freq)
tm.assert_index_equal(idx, org)
org = date_range(
start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1
)
idx = DatetimeIndex(org, freq=freq, tz="US/Pacific")
tm.assert_index_equal(idx, org)
def test_datetimeindex_constructor_misc(self):
arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
with pytest.raises(ValueError, match=msg):
DatetimeIndex(arr)
arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
idx1 = DatetimeIndex(arr)
arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
idx2 = DatetimeIndex(arr)
arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
idx3 = DatetimeIndex(arr)
arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
idx4 = DatetimeIndex(arr)
arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
idx5 = DatetimeIndex(arr)
arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
idx6 = DatetimeIndex(arr)
idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
idx8 = DatetimeIndex(
["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
)
tm.assert_index_equal(idx7, idx8)
for other in [idx2, idx3, idx4, idx5, idx6]:
assert (idx1.values == other.values).all()
sdate = datetime(1999, 12, 25)
edate = datetime(2000, 1, 1)
idx = date_range(start=sdate, freq="1B", periods=20)
assert len(idx) == 20
assert idx[0] == sdate + 0 * offsets.BDay()
assert idx.freq == "B"
idx = date_range(end=edate, freq=("D", 5), periods=20)
assert len(idx) == 20
assert idx[-1] == edate
assert idx.freq == "5D"
idx1 = date_range(start=sdate, end=edate, freq="W-SUN")
idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
idx1 = date_range(start=sdate, end=edate, freq="QS")
idx2 = date_range(
start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)
)
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
idx1 = date_range(start=sdate, end=edate, freq="BQ")
idx2 = date_range(
start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)
)
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq

View File

@@ -0,0 +1,948 @@
"""
test date_range, bdate_range construction from the convenience range functions
"""
from datetime import datetime, time, timedelta
import numpy as np
import pytest
import pytz
from pytz import timezone
from pandas.errors import OutOfBoundsDatetime
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DatetimeIndex, Timestamp, bdate_range, date_range, offsets
from pandas.tests.series.common import TestData
import pandas.util.testing as tm
from pandas.tseries.offsets import (
BDay,
CDay,
DateOffset,
MonthEnd,
generate_range,
prefix_mapping,
)
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestTimestampEquivDateRange:
# Older tests in TestTimeSeries constructed their `stamp` objects
# using `date_range` instead of the `Timestamp` constructor.
# TestTimestampEquivDateRange checks that these are equivalent in the
# pertinent cases.
def test_date_range_timestamp_equiv(self):
rng = date_range("20090415", "20090519", tz="US/Eastern")
stamp = rng[0]
ts = Timestamp("20090415", tz="US/Eastern", freq="D")
assert ts == stamp
def test_date_range_timestamp_equiv_dateutil(self):
rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
stamp = rng[0]
ts = Timestamp("20090415", tz="dateutil/US/Eastern", freq="D")
assert ts == stamp
def test_date_range_timestamp_equiv_explicit_pytz(self):
rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
stamp = rng[0]
ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"), freq="D")
assert ts == stamp
@td.skip_if_windows_python_3
def test_date_range_timestamp_equiv_explicit_dateutil(self):
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
rng = date_range("20090415", "20090519", tz=gettz("US/Eastern"))
stamp = rng[0]
ts = Timestamp("20090415", tz=gettz("US/Eastern"), freq="D")
assert ts == stamp
def test_date_range_timestamp_equiv_from_datetime_instance(self):
datetime_instance = datetime(2014, 3, 4)
# build a timestamp with a frequency, since then it supports
# addition/subtraction of integers
timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0]
ts = Timestamp(datetime_instance, freq="D")
assert ts == timestamp_instance
def test_date_range_timestamp_equiv_preserve_frequency(self):
timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0]
ts = Timestamp("2014-03-05", freq="D")
assert timestamp_instance == ts
class TestDateRanges(TestData):
def test_date_range_nat(self):
# GH#11587
msg = "Neither `start` nor `end` can be NaT"
with pytest.raises(ValueError, match=msg):
date_range(start="2016-01-01", end=pd.NaT, freq="D")
with pytest.raises(ValueError, match=msg):
date_range(start=pd.NaT, end="2016-01-01", freq="D")
def test_date_range_multiplication_overflow(self):
# GH#24255
# check that overflows in calculating `addend = periods * stride`
# are caught
with tm.assert_produces_warning(None):
# we should _not_ be seeing a overflow RuntimeWarning
dti = date_range(start="1677-09-22", periods=213503, freq="D")
assert dti[0] == Timestamp("1677-09-22")
assert len(dti) == 213503
msg = "Cannot generate range with"
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range("1969-05-04", periods=200000000, freq="30000D")
def test_date_range_unsigned_overflow_handling(self):
# GH#24255
# case where `addend = periods * stride` overflows int64 bounds
# but not uint64 bounds
dti = date_range(start="1677-09-22", end="2262-04-11", freq="D")
dti2 = date_range(start=dti[0], periods=len(dti), freq="D")
assert dti2.equals(dti)
dti3 = date_range(end=dti[-1], periods=len(dti), freq="D")
assert dti3.equals(dti)
def test_date_range_int64_overflow_non_recoverable(self):
# GH#24255
# case with start later than 1970-01-01, overflow int64 but not uint64
msg = "Cannot generate range with"
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range(start="1970-02-01", periods=106752 * 24, freq="H")
# case with end before 1970-01-01, overflow int64 but not uint64
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range(end="1969-11-14", periods=106752 * 24, freq="H")
def test_date_range_int64_overflow_stride_endpoint_different_signs(self):
# cases where stride * periods overflow int64 and stride/endpoint
# have different signs
start = Timestamp("2262-02-23")
end = Timestamp("1969-11-14")
expected = date_range(start=start, end=end, freq="-1H")
assert expected[0] == start
assert expected[-1] == end
dti = date_range(end=end, periods=len(expected), freq="-1H")
tm.assert_index_equal(dti, expected)
start2 = Timestamp("1970-02-01")
end2 = Timestamp("1677-10-22")
expected2 = date_range(start=start2, end=end2, freq="-1H")
assert expected2[0] == start2
assert expected2[-1] == end2
dti2 = date_range(start=start2, periods=len(expected2), freq="-1H")
tm.assert_index_equal(dti2, expected2)
def test_date_range_out_of_bounds(self):
# GH#14187
with pytest.raises(OutOfBoundsDatetime):
date_range("2016-01-01", periods=100000, freq="D")
with pytest.raises(OutOfBoundsDatetime):
date_range(end="1763-10-12", periods=100000, freq="D")
def test_date_range_gen_error(self):
rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
assert len(rng) == 4
@pytest.mark.parametrize("freq", ["AS", "YS"])
def test_begin_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(
["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
freq=freq,
)
tm.assert_index_equal(rng, exp)
@pytest.mark.parametrize("freq", ["A", "Y"])
def test_end_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"], freq=freq
)
tm.assert_index_equal(rng, exp)
@pytest.mark.parametrize("freq", ["BA", "BY"])
def test_business_end_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq
)
tm.assert_index_equal(rng, exp)
def test_date_range_negative_freq(self):
# GH 11018
rng = date_range("2011-12-31", freq="-2A", periods=3)
exp = pd.DatetimeIndex(["2011-12-31", "2009-12-31", "2007-12-31"], freq="-2A")
tm.assert_index_equal(rng, exp)
assert rng.freq == "-2A"
rng = date_range("2011-01-31", freq="-2M", periods=3)
exp = pd.DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M")
tm.assert_index_equal(rng, exp)
assert rng.freq == "-2M"
def test_date_range_bms_bug(self):
# #1645
rng = date_range("1/1/2000", periods=10, freq="BMS")
ex_first = Timestamp("2000-01-03")
assert rng[0] == ex_first
def test_date_range_normalize(self):
snap = datetime.today()
n = 50
rng = date_range(snap, periods=n, normalize=False, freq="2D")
offset = timedelta(2)
values = DatetimeIndex([snap + i * offset for i in range(n)])
tm.assert_index_equal(rng, values)
rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B")
the_time = time(8, 15)
for val in rng:
assert val.time() == the_time
def test_date_range_fy5252(self):
dr = date_range(
start="2013-01-01",
periods=2,
freq=offsets.FY5253(startingMonth=1, weekday=3, variation="nearest"),
)
assert dr[0] == Timestamp("2013-01-31")
assert dr[1] == Timestamp("2014-01-30")
def test_date_range_ambiguous_arguments(self):
# #2538
start = datetime(2011, 1, 1, 5, 3, 40)
end = datetime(2011, 1, 1, 8, 9, 40)
msg = (
"Of the four parameters: start, end, periods, and "
"freq, exactly three must be specified"
)
with pytest.raises(ValueError, match=msg):
date_range(start, end, periods=10, freq="s")
def test_date_range_convenience_periods(self):
# GH 20808
result = date_range("2018-04-24", "2018-04-27", periods=3)
expected = DatetimeIndex(
["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"],
freq=None,
)
tm.assert_index_equal(result, expected)
# Test if spacing remains linear if tz changes to dst in range
result = date_range(
"2018-04-01 01:00:00",
"2018-04-01 04:00:00",
tz="Australia/Sydney",
periods=3,
)
expected = DatetimeIndex(
[
Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"),
Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"),
Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"),
]
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"start,end,result_tz",
[
["20180101", "20180103", "US/Eastern"],
[datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"],
[Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"],
[
Timestamp("20180101", tz="US/Eastern"),
Timestamp("20180103", tz="US/Eastern"),
"US/Eastern",
],
[
Timestamp("20180101", tz="US/Eastern"),
Timestamp("20180103", tz="US/Eastern"),
None,
],
],
)
def test_date_range_linspacing_tz(self, start, end, result_tz):
# GH 20983
result = date_range(start, end, periods=3, tz=result_tz)
expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern")
tm.assert_index_equal(result, expected)
def test_date_range_businesshour(self):
idx = DatetimeIndex(
[
"2014-07-04 09:00",
"2014-07-04 10:00",
"2014-07-04 11:00",
"2014-07-04 12:00",
"2014-07-04 13:00",
"2014-07-04 14:00",
"2014-07-04 15:00",
"2014-07-04 16:00",
],
freq="BH",
)
rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="BH")
tm.assert_index_equal(idx, rng)
idx = DatetimeIndex(["2014-07-04 16:00", "2014-07-07 09:00"], freq="BH")
rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="BH")
tm.assert_index_equal(idx, rng)
idx = DatetimeIndex(
[
"2014-07-04 09:00",
"2014-07-04 10:00",
"2014-07-04 11:00",
"2014-07-04 12:00",
"2014-07-04 13:00",
"2014-07-04 14:00",
"2014-07-04 15:00",
"2014-07-04 16:00",
"2014-07-07 09:00",
"2014-07-07 10:00",
"2014-07-07 11:00",
"2014-07-07 12:00",
"2014-07-07 13:00",
"2014-07-07 14:00",
"2014-07-07 15:00",
"2014-07-07 16:00",
"2014-07-08 09:00",
"2014-07-08 10:00",
"2014-07-08 11:00",
"2014-07-08 12:00",
"2014-07-08 13:00",
"2014-07-08 14:00",
"2014-07-08 15:00",
"2014-07-08 16:00",
],
freq="BH",
)
rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="BH")
tm.assert_index_equal(idx, rng)
def test_range_misspecified(self):
# GH #1095
msg = (
"Of the four parameters: start, end, periods, and "
"freq, exactly three must be specified"
)
with pytest.raises(ValueError, match=msg):
date_range(start="1/1/2000")
with pytest.raises(ValueError, match=msg):
date_range(end="1/1/2000")
with pytest.raises(ValueError, match=msg):
date_range(periods=10)
with pytest.raises(ValueError, match=msg):
date_range(start="1/1/2000", freq="H")
with pytest.raises(ValueError, match=msg):
date_range(end="1/1/2000", freq="H")
with pytest.raises(ValueError, match=msg):
date_range(periods=10, freq="H")
with pytest.raises(ValueError, match=msg):
date_range()
def test_compat_replace(self):
# https://github.com/statsmodels/statsmodels/issues/3349
# replace should take ints/longs for compat
result = date_range(
Timestamp("1960-04-01 00:00:00", freq="QS-JAN"), periods=76, freq="QS-JAN"
)
assert len(result) == 76
def test_catch_infinite_loop(self):
offset = offsets.DateOffset(minute=5)
# blow up, don't loop forever
msg = "Offset <DateOffset: minute=5> did not increment date"
with pytest.raises(ValueError, match=msg):
date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset)
@pytest.mark.parametrize("periods", (1, 2))
def test_wom_len(self, periods):
# https://github.com/pandas-dev/pandas/issues/20517
res = date_range(start="20110101", periods=periods, freq="WOM-1MON")
assert len(res) == periods
def test_construct_over_dst(self):
# GH 20854
pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
"US/Pacific", ambiguous=True
)
pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
"US/Pacific", ambiguous=False
)
expect_data = [
Timestamp("2010-11-07 00:00:00", tz="US/Pacific"),
pre_dst,
pst_dst,
]
expected = DatetimeIndex(expect_data)
result = date_range(start="2010-11-7", periods=3, freq="H", tz="US/Pacific")
tm.assert_index_equal(result, expected)
def test_construct_with_different_start_end_string_format(self):
# GH 12064
result = date_range(
"2013-01-01 00:00:00+09:00", "2013/01/01 02:00:00+09:00", freq="H"
)
expected = DatetimeIndex(
[
Timestamp("2013-01-01 00:00:00+09:00"),
Timestamp("2013-01-01 01:00:00+09:00"),
Timestamp("2013-01-01 02:00:00+09:00"),
]
)
tm.assert_index_equal(result, expected)
def test_error_with_zero_monthends(self):
msg = r"Offset <0 \* MonthEnds> did not increment date"
with pytest.raises(ValueError, match=msg):
date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0))
def test_range_bug(self):
# GH #770
offset = DateOffset(months=3)
result = date_range("2011-1-1", "2012-1-31", freq=offset)
start = datetime(2011, 1, 1)
expected = DatetimeIndex([start + i * offset for i in range(5)])
tm.assert_index_equal(result, expected)
def test_range_tz_pytz(self):
# see gh-2906
tz = timezone("US/Eastern")
start = tz.localize(datetime(2011, 1, 1))
end = tz.localize(datetime(2011, 1, 3))
dr = date_range(start=start, periods=3)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
dr = date_range(end=end, periods=3)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
dr = date_range(start=start, end=end)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
@pytest.mark.parametrize(
"start, end",
[
[
Timestamp(datetime(2014, 3, 6), tz="US/Eastern"),
Timestamp(datetime(2014, 3, 12), tz="US/Eastern"),
],
[
Timestamp(datetime(2013, 11, 1), tz="US/Eastern"),
Timestamp(datetime(2013, 11, 6), tz="US/Eastern"),
],
],
)
def test_range_tz_dst_straddle_pytz(self, start, end):
dr = date_range(start, end, freq="D")
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
dr = date_range(start, end, freq="D", tz="US/Eastern")
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
dr = date_range(
start.replace(tzinfo=None),
end.replace(tzinfo=None),
freq="D",
tz="US/Eastern",
)
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
def test_range_tz_dateutil(self):
# see gh-2906
# Use maybe_get_tz to fix filename in tz under dateutil.
from pandas._libs.tslibs.timezones import maybe_get_tz
tz = lambda x: maybe_get_tz("dateutil/" + x)
start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern"))
end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern"))
dr = date_range(start=start, periods=3)
assert dr.tz == tz("US/Eastern")
assert dr[0] == start
assert dr[2] == end
dr = date_range(end=end, periods=3)
assert dr.tz == tz("US/Eastern")
assert dr[0] == start
assert dr[2] == end
dr = date_range(start=start, end=end)
assert dr.tz == tz("US/Eastern")
assert dr[0] == start
assert dr[2] == end
@pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"])
def test_range_closed(self, freq):
begin = datetime(2011, 1, 1)
end = datetime(2014, 1, 1)
closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
expected_left = left
expected_right = right
if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
def test_range_closed_with_tz_aware_start_end(self):
# GH12409, GH12684
begin = Timestamp("2011/1/1", tz="US/Eastern")
end = Timestamp("2014/1/1", tz="US/Eastern")
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
expected_left = left
expected_right = right
if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
begin = Timestamp("2011/1/1")
end = Timestamp("2014/1/1")
begintz = Timestamp("2011/1/1", tz="US/Eastern")
endtz = Timestamp("2014/1/1", tz="US/Eastern")
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern")
left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern")
right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern")
expected_left = left
expected_right = right
if endtz == closed[-1]:
expected_left = closed[:-1]
if begintz == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
@pytest.mark.parametrize("closed", ["right", "left", None])
def test_range_closed_boundary(self, closed):
# GH#11804
right_boundary = date_range(
"2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed
)
left_boundary = date_range(
"2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed
)
both_boundary = date_range(
"2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed
)
expected_right = expected_left = expected_both = both_boundary
if closed == "right":
expected_left = both_boundary[1:]
if closed == "left":
expected_right = both_boundary[:-1]
if closed is None:
expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]
tm.assert_index_equal(right_boundary, expected_right)
tm.assert_index_equal(left_boundary, expected_left)
tm.assert_index_equal(both_boundary, expected_both)
def test_years_only(self):
# GH 6961
dr = date_range("2014", "2015", freq="M")
assert dr[0] == datetime(2014, 1, 31)
assert dr[-1] == datetime(2014, 12, 31)
def test_freq_divides_end_in_nanos(self):
# GH 10885
result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min")
result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min")
expected_1 = DatetimeIndex(
["2005-01-12 10:00:00", "2005-01-12 15:45:00"],
dtype="datetime64[ns]",
freq="345T",
tz=None,
)
expected_2 = DatetimeIndex(
["2005-01-13 10:00:00", "2005-01-13 15:45:00"],
dtype="datetime64[ns]",
freq="345T",
tz=None,
)
tm.assert_index_equal(result_1, expected_1)
tm.assert_index_equal(result_2, expected_2)
def test_cached_range_bug(self):
rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6))
assert len(rng) == 50
assert rng[0] == datetime(2010, 9, 1, 5)
def test_timezone_comparaison_bug(self):
# smoke test
start = Timestamp("20130220 10:00", tz="US/Eastern")
result = date_range(start, periods=2, tz="US/Eastern")
assert len(result) == 2
def test_timezone_comparaison_assert(self):
start = Timestamp("20130220 10:00", tz="US/Eastern")
msg = "Inferred time zone not equal to passed time zone"
with pytest.raises(AssertionError, match=msg):
date_range(start, periods=2, tz="Europe/Berlin")
def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
# GH 23270
tz = tz_aware_fixture
result = pd.date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz)
expected = pd.date_range(
end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz
)[::-1]
tm.assert_index_equal(result, expected)
class TestGenRangeGeneration:
def test_generate(self):
rng1 = list(generate_range(START, END, offset=BDay()))
rng2 = list(generate_range(START, END, offset="B"))
assert rng1 == rng2
def test_generate_cday(self):
rng1 = list(generate_range(START, END, offset=CDay()))
rng2 = list(generate_range(START, END, offset="C"))
assert rng1 == rng2
def test_1(self):
rng = list(generate_range(start=datetime(2009, 3, 25), periods=2))
expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
assert rng == expected
def test_2(self):
rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3)))
expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
assert rng == expected
def test_3(self):
rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6)))
expected = []
assert rng == expected
def test_precision_finer_than_offset(self):
# GH#9907
result1 = pd.date_range(
start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q"
)
result2 = pd.date_range(
start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
)
expected1_list = [
"2015-06-30 00:00:03",
"2015-09-30 00:00:03",
"2015-12-31 00:00:03",
"2016-03-31 00:00:03",
]
expected2_list = [
"2015-04-19 00:00:03",
"2015-04-26 00:00:03",
"2015-05-03 00:00:03",
"2015-05-10 00:00:03",
"2015-05-17 00:00:03",
"2015-05-24 00:00:03",
"2015-05-31 00:00:03",
"2015-06-07 00:00:03",
"2015-06-14 00:00:03",
"2015-06-21 00:00:03",
]
expected1 = DatetimeIndex(
expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None
)
expected2 = DatetimeIndex(
expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
)
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)
dt1, dt2 = "2017-01-01", "2017-01-01"
tz1, tz2 = "US/Eastern", "Europe/London"
@pytest.mark.parametrize(
"start,end",
[
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)),
(pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)),
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)),
(pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)),
],
)
def test_mismatching_tz_raises_err(self, start, end):
# issue 18488
with pytest.raises(TypeError):
pd.date_range(start, end)
with pytest.raises(TypeError):
pd.date_range(start, end, freq=BDay())
class TestBusinessDateRange:
def test_constructor(self):
bdate_range(START, END, freq=BDay())
bdate_range(START, periods=20, freq=BDay())
bdate_range(end=START, periods=20, freq=BDay())
msg = "periods must be a number, got B"
with pytest.raises(TypeError, match=msg):
date_range("2011-1-1", "2012-1-1", "B")
with pytest.raises(TypeError, match=msg):
bdate_range("2011-1-1", "2012-1-1", "B")
msg = "freq must be specified for bdate_range; use date_range instead"
with pytest.raises(TypeError, match=msg):
bdate_range(START, END, periods=10, freq=None)
def test_naive_aware_conflicts(self):
naive = bdate_range(START, END, freq=BDay(), tz=None)
aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")
msg = "tz-naive.*tz-aware"
with pytest.raises(TypeError, match=msg):
naive.join(aware)
with pytest.raises(TypeError, match=msg):
aware.join(naive)
def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20)
firstDate = end - 19 * BDay()
assert len(dr) == 20
assert dr[0] == firstDate
assert dr[-1] == end
def test_date_parse_failure(self):
badly_formed_date = "2007/100/1"
with pytest.raises(ValueError):
Timestamp(badly_formed_date)
with pytest.raises(ValueError):
bdate_range(start=badly_formed_date, periods=10)
with pytest.raises(ValueError):
bdate_range(end=badly_formed_date, periods=10)
with pytest.raises(ValueError):
bdate_range(badly_formed_date, badly_formed_date)
def test_daterange_bug_456(self):
# GH #456
rng1 = bdate_range("12/5/2011", "12/5/2011")
rng2 = bdate_range("12/2/2011", "12/5/2011")
rng2.freq = BDay()
result = rng1.union(rng2)
assert isinstance(result, DatetimeIndex)
@pytest.mark.parametrize("closed", ["left", "right"])
def test_bdays_and_open_boundaries(self, closed):
# GH 6673
start = "2018-07-21" # Saturday
end = "2018-07-29" # Sunday
result = pd.date_range(start, end, freq="B", closed=closed)
bday_start = "2018-07-23" # Monday
bday_end = "2018-07-27" # Friday
expected = pd.date_range(bday_start, bday_end, freq="D")
tm.assert_index_equal(result, expected)
def test_bday_near_overflow(self):
# GH#24252 avoid doing unnecessary addition that _would_ overflow
start = pd.Timestamp.max.floor("D").to_pydatetime()
rng = pd.date_range(start, end=None, periods=1, freq="B")
expected = pd.DatetimeIndex([start], freq="B")
tm.assert_index_equal(rng, expected)
def test_bday_overflow_error(self):
# GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
start = pd.Timestamp.max.floor("D").to_pydatetime()
with pytest.raises(OutOfBoundsDatetime):
pd.date_range(start, periods=2, freq="B")
class TestCustomDateRange:
def test_constructor(self):
bdate_range(START, END, freq=CDay())
bdate_range(START, periods=20, freq=CDay())
bdate_range(end=START, periods=20, freq=CDay())
msg = "periods must be a number, got C"
with pytest.raises(TypeError, match=msg):
date_range("2011-1-1", "2012-1-1", "C")
with pytest.raises(TypeError, match=msg):
bdate_range("2011-1-1", "2012-1-1", "C")
def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20, freq="C")
firstDate = end - 19 * CDay()
assert len(dr) == 20
assert dr[0] == firstDate
assert dr[-1] == end
def test_daterange_bug_456(self):
# GH #456
rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C")
rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C")
rng2.freq = CDay()
result = rng1.union(rng2)
assert isinstance(result, DatetimeIndex)
def test_cdaterange(self):
result = bdate_range("2013-05-01", periods=3, freq="C")
expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"])
tm.assert_index_equal(result, expected)
def test_cdaterange_weekmask(self):
result = bdate_range(
"2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu"
)
expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-05"])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = (
"a custom frequency string is required when holidays or "
"weekmask are passed, got frequency B"
)
with pytest.raises(ValueError, match=msg):
bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu")
def test_cdaterange_holidays(self):
result = bdate_range("2013-05-01", periods=3, freq="C", holidays=["2013-05-01"])
expected = DatetimeIndex(["2013-05-02", "2013-05-03", "2013-05-06"])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = (
"a custom frequency string is required when holidays or "
"weekmask are passed, got frequency B"
)
with pytest.raises(ValueError, match=msg):
bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"])
def test_cdaterange_weekmask_and_holidays(self):
result = bdate_range(
"2013-05-01",
periods=3,
freq="C",
weekmask="Sun Mon Tue Wed Thu",
holidays=["2013-05-01"],
)
expected = DatetimeIndex(["2013-05-02", "2013-05-05", "2013-05-06"])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = (
"a custom frequency string is required when holidays or "
"weekmask are passed, got frequency B"
)
with pytest.raises(ValueError, match=msg):
bdate_range(
"2013-05-01",
periods=3,
weekmask="Sun Mon Tue Wed Thu",
holidays=["2013-05-01"],
)
@pytest.mark.parametrize(
"freq", [freq for freq in prefix_mapping if freq.startswith("C")]
)
def test_all_custom_freq(self, freq):
# should not raise
bdate_range(
START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"]
)
bad_freq = freq + "FOO"
msg = "invalid custom frequency string: {freq}"
with pytest.raises(ValueError, match=msg.format(freq=bad_freq)):
bdate_range(START, END, freq=bad_freq)
@pytest.mark.parametrize(
"start_end",
[
("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"),
("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"),
("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"),
],
)
def test_range_with_millisecond_resolution(self, start_end):
# https://github.com/pandas-dev/pandas/issues/24110
start, end = start_end
result = pd.date_range(start=start, end=end, periods=2, closed="left")
expected = DatetimeIndex([start])
tm.assert_index_equal(result, expected)

View File

@@ -0,0 +1,452 @@
from datetime import date
import dateutil
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
randn = np.random.randn
class TestDatetimeIndex:
def test_roundtrip_pickle_with_tz(self):
# GH 8367
# round-trip of timezone
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
unpickled = tm.round_trip_pickle(index)
tm.assert_index_equal(index, unpickled)
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
# GH7774
index = date_range("20130101", periods=3, tz="US/Eastern")
assert str(index.reindex([])[0].tz) == "US/Eastern"
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
def test_time_loc(self): # GH8667
from datetime import time
from pandas._libs.index import _SIZE_CUTOFF
ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
key = time(15, 11, 30)
start = key.hour * 3600 + key.minute * 60 + key.second
step = 24 * 3600
for n in ns:
idx = pd.date_range("2014-11-26", periods=n, freq="S")
ts = pd.Series(np.random.randn(n), index=idx)
i = np.arange(start, n, step)
tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False)
tm.assert_series_equal(ts[key], ts.iloc[i])
left, right = ts.copy(), ts.copy()
left[key] *= -10
right.iloc[i] *= -10
tm.assert_series_equal(left, right)
def test_time_overflow_for_32bit_machines(self):
# GH8943. On some machines NumPy defaults to np.int32 (for example,
# 32-bit Linux machines). In the function _generate_regular_range
# found in tseries/index.py, `periods` gets multiplied by `strides`
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
# and since those machines won't promote np.int32 to np.int64, we get
# overflow.
periods = np.int_(1000)
idx1 = pd.date_range(start="2000", periods=periods, freq="S")
assert len(idx1) == periods
idx2 = pd.date_range(end="2000", periods=periods, freq="S")
assert len(idx2) == periods
def test_nat(self):
assert DatetimeIndex([np.nan])[0] is pd.NaT
def test_week_of_month_frequency(self):
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
d1 = date(2002, 9, 1)
d2 = date(2013, 10, 27)
d3 = date(2012, 9, 30)
idx1 = DatetimeIndex([d1, d2])
idx2 = DatetimeIndex([d3])
result_append = idx1.append(idx2)
expected = DatetimeIndex([d1, d2, d3])
tm.assert_index_equal(result_append, expected)
result_union = idx1.union(idx2)
expected = DatetimeIndex([d1, d3, d2])
tm.assert_index_equal(result_union, expected)
# GH 5115
result = date_range("2013-1-1", periods=4, freq="WOM-1SAT")
dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"]
expected = DatetimeIndex(dates, freq="WOM-1SAT")
tm.assert_index_equal(result, expected)
def test_hash_error(self):
index = date_range("20010101", periods=10)
with pytest.raises(
TypeError, match=("unhashable type: {0.__name__!r}".format(type(index)))
):
hash(index)
def test_stringified_slice_with_tz(self):
# GH#2658
start = "2013-01-07"
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
df = DataFrame(np.arange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
def test_append_join_nondatetimeindex(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])
result = rng.append(idx)
assert isinstance(result[0], Timestamp)
# it works
rng.join(idx, how="outer")
def test_map(self):
rng = date_range("1/1/2000", periods=10)
f = lambda x: x.strftime("%Y%m%d")
result = rng.map(f)
exp = Index([f(x) for x in rng], dtype="<U8")
tm.assert_index_equal(result, exp)
def test_map_fallthrough(self, capsys):
# GH#22067, check we don't get warnings about silently ignored errors
dti = date_range("2017-01-01", "2018-01-01", freq="B")
dti.map(lambda x: pd.Period(year=x.year, month=x.month, freq="M"))
captured = capsys.readouterr()
assert captured.err == ""
def test_iteration_preserves_tz(self):
# see gh-8890
index = date_range("2012-01-01", periods=3, freq="H", tz="US/Eastern")
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result == expected
index = date_range(
"2012-01-01", periods=3, freq="H", tz=dateutil.tz.tzoffset(None, -28800)
)
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result._repr_base == expected._repr_base
assert result == expected
# 9100
index = pd.DatetimeIndex(
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
)
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result._repr_base == expected._repr_base
assert result == expected
@pytest.mark.parametrize("periods", [0, 9999, 10000, 10001])
def test_iteration_over_chunksize(self, periods):
# GH21012
index = date_range("2000-01-01 00:00:00", periods=periods, freq="min")
num = 0
for stamp in index:
assert index[num] == stamp
num += 1
assert num == len(index)
def test_misc_coverage(self):
rng = date_range("1/1/2000", periods=5)
result = rng.groupby(rng.day)
assert isinstance(list(result.values())[0][0], Timestamp)
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
assert not idx.equals(list(idx))
non_datetime = Index(list("abc"))
assert not idx.equals(list(non_datetime))
def test_string_index_series_name_converted(self):
# #1644
df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10))
result = df.loc["1/3/2000"]
assert result.name == df.index[2]
result = df.T["1/3/2000"]
assert result.name == df.index[2]
def test_get_duplicates(self):
idx = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-02",
"2000-01-03",
"2000-01-03",
"2000-01-04",
]
)
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()
ex = DatetimeIndex(["2000-01-02", "2000-01-03"])
tm.assert_index_equal(result, ex)
def test_argmin_argmax(self):
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
assert idx.argmin() == 1
assert idx.argmax() == 0
def test_sort_values(self):
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
ordered = idx.sort_values()
assert ordered.is_monotonic
ordered = idx.sort_values(ascending=False)
assert ordered[::-1].is_monotonic
ordered, dexer = idx.sort_values(return_indexer=True)
assert ordered.is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
assert ordered[::-1].is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
def test_map_bug_1677(self):
index = DatetimeIndex(["2012-04-25 09:30:00.393000"])
f = index.asof
result = index.map(f)
expected = Index([f(index[0])])
tm.assert_index_equal(result, expected)
def test_groupby_function_tuple_1677(self):
df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100))
monthly_group = df.groupby(lambda x: (x.year, x.month))
result = monthly_group.mean()
assert isinstance(result.index[0], tuple)
def test_append_numpy_bug_1681(self):
# another datetime64 bug
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
a = DataFrame()
c = DataFrame({"A": "foo", "B": dr}, index=dr)
result = a.append(c)
assert (result["B"] == dr).all()
def test_isin(self):
index = tm.makeDateIndex(4)
result = index.isin(index)
assert result.all()
result = index.isin(list(index))
assert result.all()
assert_almost_equal(
index.isin([index[2], 5]), np.array([False, False, True, False])
)
def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(
10,
10,
data_gen_f=lambda *args, **kwargs: randn(),
r_idx_type="i",
c_idx_type="dt",
)
cols = df.columns.join(df.index, how="outer")
joined = cols.join(df.columns)
assert cols.dtype == np.dtype("O")
assert cols.dtype == joined.dtype
tm.assert_numpy_array_equal(cols.values, joined.values)
def test_join_self(self, join_type):
index = date_range("1/1/2000", periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def assert_index_parameters(self, index):
assert index.freq == "40960N"
assert index.inferred_freq == "40960N"
def test_ns_index(self):
nsamples = 400
ns = int(1e9 / 24414)
dtstart = np.datetime64("2012-09-20T00:00:00")
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
freq = ns * offsets.Nano()
index = pd.DatetimeIndex(dt, freq=freq, name="time")
self.assert_index_parameters(index)
new_index = pd.date_range(start=index[0], end=index[-1], freq=index.freq)
self.assert_index_parameters(new_index)
def test_join_with_period_index(self, join_type):
df = tm.makeCustomDataframe(
10,
10,
data_gen_f=lambda *args: np.random.randint(2),
c_idx_type="p",
r_idx_type="dt",
)
s = df.iloc[:5, 0]
expected = df.columns.astype("O").join(s.index, how=join_type)
result = df.columns.join(s.index, how=join_type)
tm.assert_index_equal(expected, result)
def test_factorize(self):
idx1 = DatetimeIndex(
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
# tz must be preserved
idx1 = idx1.tz_localize("Asia/Tokyo")
exp_idx = exp_idx.tz_localize("Asia/Tokyo")
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
idx2 = pd.DatetimeIndex(
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
# freq must be preserved
idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
arr, idx = idx3.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
def test_factorize_tz(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH#13750
base = pd.date_range("2016-11-05", freq="H", periods=100, tz=tz)
idx = base.repeat(5)
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(res, base)
def test_factorize_dst(self):
# GH 13750
idx = pd.date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
idx = pd.date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
@pytest.mark.parametrize(
"arr, expected",
[
(pd.DatetimeIndex(["2017", "2017"]), pd.DatetimeIndex(["2017"])),
(
pd.DatetimeIndex(["2017", "2017"], tz="US/Eastern"),
pd.DatetimeIndex(["2017"], tz="US/Eastern"),
),
],
)
def test_unique(self, arr, expected):
result = arr.unique()
tm.assert_index_equal(result, expected)
# GH 21737
# Ensure the underlying data is consistent
assert result[0] == expected[0]
def test_asarray_tz_naive(self):
# This shouldn't produce a warning.
idx = pd.date_range("2000", periods=2)
# M8[ns] by default
with tm.assert_produces_warning(None):
result = np.asarray(idx)
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# optionally, object
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype=object)
expected = np.array([pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02")])
tm.assert_numpy_array_equal(result, expected)
def test_asarray_tz_aware(self):
tz = "US/Central"
idx = pd.date_range("2000", periods=2, tz=tz)
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
# We warn by default and return an ndarray[M8[ns]]
with tm.assert_produces_warning(FutureWarning):
result = np.asarray(idx)
tm.assert_numpy_array_equal(result, expected)
# Old behavior with no warning
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# Future behavior with no warning
expected = np.array(
[pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]
)
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype=object)
tm.assert_numpy_array_equal(result, expected)

View File

@@ -0,0 +1,32 @@
""" generic tests from the Datetimelike class """
from pandas import DatetimeIndex, date_range
from pandas.util import testing as tm
from ..datetimelike import DatetimeLike
class TestDatetimeIndex(DatetimeLike):
_holder = DatetimeIndex
def setup_method(self, method):
self.indices = dict(
index=tm.makeDateIndex(10),
index_dec=date_range("20130110", periods=10, freq="-1D"),
)
self.setup_indices()
def create_index(self):
return date_range("20130101", periods=5)
def test_shift(self):
pass # handled in test_ops
def test_pickle_compat_construction(self):
pass
def test_intersection(self):
pass # handled in test_setops
def test_union(self):
pass # handled in test_setops

View File

@@ -0,0 +1,241 @@
from datetime import datetime
import dateutil.tz
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import DatetimeIndex, Series
import pandas.util.testing as tm
def test_to_native_types():
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
# First, with no arguments.
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index.to_native_types(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
# Make sure slicing works
expected = np.array(["2017-01-01", "2017-01-03"], dtype=object)
result = index.to_native_types([0, 2])
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
result = index.to_native_types(date_format="%m-%Y-%d")
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = DatetimeIndex(["2017-01-01", pd.NaT, "2017-01-03"])
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
result = index.to_native_types(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
class TestDatetimeIndexRendering:
def test_dti_repr_short(self):
dr = pd.date_range(start="1/1/2012", periods=1)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=2)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=3)
repr(dr)
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
def test_dti_representation(self, method):
idxs = []
idxs.append(DatetimeIndex([], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="H",
tz="Asia/Tokyo",
)
)
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
)
)
idxs.append(
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="UTC")
)
exp = []
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
", dtype='datetime64[ns, Asia/Tokyo]', freq='H')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
"'2011-01-01 10:00:00-05:00', 'NaT'], "
"dtype='datetime64[ns, US/Eastern]', freq=None)"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
"'2011-01-01 10:00:00+00:00', 'NaT'], "
"dtype='datetime64[ns, UTC]', freq=None)"
""
)
with pd.option_context("display.width", 300):
for indx, expected in zip(idxs, exp):
result = getattr(indx, method)()
assert result == expected
def test_dti_representation_to_series(self):
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="H",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
)
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
exp4 = (
"0 2011-01-01\n"
"1 2011-01-02\n"
"2 2011-01-03\n"
"dtype: datetime64[ns]"
)
exp5 = (
"0 2011-01-01 09:00:00+09:00\n"
"1 2011-01-01 10:00:00+09:00\n"
"2 2011-01-01 11:00:00+09:00\n"
"dtype: datetime64[ns, Asia/Tokyo]"
)
exp6 = (
"0 2011-01-01 09:00:00-05:00\n"
"1 2011-01-01 10:00:00-05:00\n"
"2 NaT\n"
"dtype: datetime64[ns, US/Eastern]"
)
exp7 = (
"0 2011-01-01 09:00:00\n"
"1 2011-01-02 10:15:00\n"
"dtype: datetime64[ns]"
)
with pd.option_context("display.width", 300):
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
):
result = repr(Series(idx))
assert result == expected
def test_dti_summary(self):
# GH#9116
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="H",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", pd.NaT], tz="US/Eastern"
)
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
exp5 = (
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
"to 2011-01-01 11:00:00+09:00\n"
"Freq: H"
)
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
):
result = idx._summary()
assert result == expected
def test_dti_business_repr(self):
# only really care that it works
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)))
def test_dti_business_summary(self):
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))
rng._summary()
rng[2:2]._summary()
def test_dti_business_summary_pytz(self):
pd.bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)._summary()
def test_dti_business_summary_dateutil(self):
pd.bdate_range("1/1/2005", "1/1/2009", tz=dateutil.tz.tzutc())._summary()
def test_dti_custom_business_repr(self):
# only really care that it works
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C"))
def test_dti_custom_business_summary(self):
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C")
rng._summary()
rng[2:2]._summary()
def test_dti_custom_business_summary_pytz(self):
pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=pytz.utc)._summary()
def test_dti_custom_business_summary_dateutil(self):
pd.bdate_range(
"1/1/2005", "1/1/2009", freq="C", tz=dateutil.tz.tzutc()
)._summary()

View File

@@ -0,0 +1,719 @@
from datetime import datetime, time, timedelta
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import DatetimeIndex, Index, Timestamp, date_range, notna
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay, CDay
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestGetItem:
def test_ellipsis(self):
# GH#21282
idx = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx2 = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
for idx in [idx1, idx2]:
result = idx[0]
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx[0:5]
expected = pd.date_range(
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = pd.date_range(
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = pd.date_range(
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
freq="-1D",
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
def test_dti_business_getitem(self):
rng = pd.bdate_range(START, END)
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == BDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_business_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END)
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
def test_dti_custom_getitem(self):
rng = pd.bdate_range(START, END, freq="C")
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == CDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_custom_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END, freq="C")
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
class TestWhere:
def test_where_other(self):
# other is ndarray or Index
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
def test_where_tz(self):
i = pd.date_range("20130101", periods=3, tz="US/Eastern")
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)
class TestTake:
def test_take(self):
# GH#10295
idx1 = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx2 = pd.date_range(
"2011-01-01", "2011-01-31", freq="D", tz="Asia/Tokyo", name="idx"
)
for idx in [idx1, idx2]:
result = idx.take([0])
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx.take([0, 1, 2])
expected = pd.date_range(
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.date_range(
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = pd.date_range(
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = DatetimeIndex(
["2011-01-04", "2011-01-03", "2011-01-06"],
freq=None,
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = DatetimeIndex(
["2011-01-29", "2011-01-03", "2011-01-06"],
freq=None,
tz=idx.tz,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
# TODO: This method came from test_datetime; de-dup with version above
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
def test_take2(self, tz):
dates = [
datetime(2010, 1, 1, 14),
datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17),
datetime(2010, 1, 1, 21),
]
idx = pd.date_range(
start="2010-01-01 09:00",
end="2010-02-01 09:00",
freq="H",
tz=tz,
name="idx",
)
expected = DatetimeIndex(dates, freq=None, name="idx", tz=tz)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, DatetimeIndex)
assert taken.freq is None
assert taken.tz == expected.tz
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_take_fill_value_with_timezone(self):
idx = pd.DatetimeIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestDatetimeIndex:
@pytest.mark.parametrize("null", [None, np.nan, pd.NaT])
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)
idx = pd.DatetimeIndex(["2017-01-01"], tz=tz)
expected = pd.DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
res = idx.insert(0, null)
tm.assert_index_equal(res, expected)
def test_insert(self):
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx")
result = idx.insert(2, datetime(2000, 1, 5))
exp = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
)
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[
datetime(2000, 1, 4),
"inserted",
datetime(2000, 1, 1),
datetime(2000, 1, 2),
],
name="idx",
)
assert not isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
idx = date_range("1/1/2000", periods=3, freq="M", name="idx")
# preserve freq
expected_0 = DatetimeIndex(
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq="M",
)
expected_3 = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
name="idx",
freq="M",
)
# reset freq to None
expected_1_nofreq = DatetimeIndex(
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq=None,
)
expected_3_nofreq = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
)
cases = [
(0, datetime(1999, 12, 31), expected_0),
(-3, datetime(1999, 12, 31), expected_0),
(3, datetime(2000, 4, 30), expected_3),
(1, datetime(2000, 1, 31), expected_1_nofreq),
(3, datetime(2000, 1, 2), expected_3_nofreq),
]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# reset freq to None
result = idx.insert(3, datetime(2000, 1, 2))
expected = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
# see gh-7299
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp("2000-01-04"))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4))
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern"))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")))
for tz in ["US/Pacific", "Asia/Singapore"]:
idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx")
# preserve freq
expected = date_range(
"1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx"
)
for d in [
pd.Timestamp("2000-01-01 15:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-01 14:00",
"2000-01-01 10:00",
],
name="idx",
tz=tz,
freq=None,
)
# reset freq to None
for d in [
pd.Timestamp("2000-01-01 10:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.tz == expected.tz
assert result.freq is None
def test_delete(self):
idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx")
# prserve freq
expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx")
expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx")
# reset freq to None
expected_1 = DatetimeIndex(
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
freq=None,
name="idx",
)
cases = {
0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
idx.delete(5)
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
idx = date_range(
start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
)
expected = date_range(
start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz
)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "H"
assert result.tz == expected.tz
expected = date_range(
start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz
)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "H"
assert result.tz == expected.tz
def test_delete_slice(self):
idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx")
# prserve freq
expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx")
expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx")
# reset freq to None
expected_3_5 = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
],
freq=None,
name="idx",
)
cases = {
(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
for tz in [None, "Asia/Tokyo", "US/Pacific"]:
ts = pd.Series(
1,
index=pd.date_range(
"2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz
),
)
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = pd.date_range(
"2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 11:00",
"2000-01-01 13:00",
"2000-01-01 15:00",
"2000-01-01 17:00",
],
freq=None,
name="idx",
tz=tz,
)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
def test_get_loc(self):
idx = pd.date_range("2000-01-01", periods=3)
for method in [None, "pad", "backfill", "nearest"]:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
if method is not None:
assert (
idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1
)
assert idx.get_loc("2000-01-01", method="nearest") == 0
assert idx.get_loc("2000-01-01T12", method="nearest") == 1
assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D"))
== 1
)
assert (
idx.get_loc(
"2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D")
)
== 1
)
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1
)
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
with pytest.raises(KeyError, match="'2000-01-01T03'"):
idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
with pytest.raises(
ValueError, match="tolerance size must match target index size"
):
idx.get_loc(
"2000-01-01",
method="nearest",
tolerance=[
pd.Timedelta("1day").to_timedelta64(),
pd.Timedelta("1day").to_timedelta64(),
],
)
assert idx.get_loc("2000", method="nearest") == slice(0, 3)
assert idx.get_loc("2000-01", method="nearest") == slice(0, 3)
assert idx.get_loc("1999", method="nearest") == 0
assert idx.get_loc("2001", method="nearest") == 2
with pytest.raises(KeyError, match="'1999'"):
idx.get_loc("1999", method="pad")
with pytest.raises(KeyError, match="'2001'"):
idx.get_loc("2001", method="backfill")
with pytest.raises(KeyError, match="'foobar'"):
idx.get_loc("foobar")
with pytest.raises(TypeError):
idx.get_loc(slice(2))
idx = pd.to_datetime(["2000-01-01", "2000-01-04"])
assert idx.get_loc("2000-01-02", method="nearest") == 0
assert idx.get_loc("2000-01-03", method="nearest") == 1
assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)
# time indexing
idx = pd.date_range("2000-01-01", periods=24, freq="H")
tm.assert_numpy_array_equal(
idx.get_loc(time(12)), np.array([12]), check_dtype=False
)
tm.assert_numpy_array_equal(
idx.get_loc(time(12, 30)), np.array([]), check_dtype=False
)
with pytest.raises(NotImplementedError):
idx.get_loc(time(12, 30), method="pad")
def test_get_indexer(self):
idx = pd.date_range("2000-01-01", periods=3)
exp = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
np.array([0, -1, 1], dtype=np.intp),
)
tol_raw = [
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour").to_timedelta64(),
]
tm.assert_numpy_array_equal(
idx.get_indexer(
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
),
np.array([0, -1, 1], dtype=np.intp),
)
tol_bad = [
pd.Timedelta("2 hour").to_timedelta64(),
pd.Timedelta("1 hour").to_timedelta64(),
"foo",
]
with pytest.raises(ValueError, match="abbreviation w/o a number"):
idx.get_indexer(target, "nearest", tolerance=tol_bad)
with pytest.raises(ValueError):
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
def test_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
with pytest.raises(KeyError, match="2000"):
index.get_loc("1/1/2000")
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
def test_timedelta_invalid_key(self, key):
# GH#20464
dti = pd.date_range("1970-01-01", periods=10)
with pytest.raises(TypeError):
dti.get_loc(key)
def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
assert index.get_loc(pd.NaT) == 1

View File

@@ -0,0 +1,379 @@
import calendar
import locale
import unicodedata
import numpy as np
import pytest
import pandas as pd
from pandas import DatetimeIndex, Index, Timestamp, date_range, datetime, offsets
import pandas.util.testing as tm
class TestTimeSeries:
def test_pass_datetimeindex_to_index(self):
# Bugs in #1396
rng = date_range("1/1/2000", "3/1/2000")
idx = Index(rng, dtype=object)
expected = Index(rng.to_pydatetime(), dtype=object)
tm.assert_numpy_array_equal(idx.values, expected.values)
def test_range_edges(self):
# GH#13672
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:00.000000001"),
end=Timestamp("1970-01-01 00:00:00.000000004"),
freq="N",
)
exp = DatetimeIndex(
[
"1970-01-01 00:00:00.000000001",
"1970-01-01 00:00:00.000000002",
"1970-01-01 00:00:00.000000003",
"1970-01-01 00:00:00.000000004",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:00.000000004"),
end=Timestamp("1970-01-01 00:00:00.000000001"),
freq="N",
)
exp = DatetimeIndex([])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:00.000000001"),
end=Timestamp("1970-01-01 00:00:00.000000001"),
freq="N",
)
exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:00.000001"),
end=Timestamp("1970-01-01 00:00:00.000004"),
freq="U",
)
exp = DatetimeIndex(
[
"1970-01-01 00:00:00.000001",
"1970-01-01 00:00:00.000002",
"1970-01-01 00:00:00.000003",
"1970-01-01 00:00:00.000004",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:00.001"),
end=Timestamp("1970-01-01 00:00:00.004"),
freq="L",
)
exp = DatetimeIndex(
[
"1970-01-01 00:00:00.001",
"1970-01-01 00:00:00.002",
"1970-01-01 00:00:00.003",
"1970-01-01 00:00:00.004",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:00:01"),
end=Timestamp("1970-01-01 00:00:04"),
freq="S",
)
exp = DatetimeIndex(
[
"1970-01-01 00:00:01",
"1970-01-01 00:00:02",
"1970-01-01 00:00:03",
"1970-01-01 00:00:04",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 00:01"),
end=Timestamp("1970-01-01 00:04"),
freq="T",
)
exp = DatetimeIndex(
[
"1970-01-01 00:01",
"1970-01-01 00:02",
"1970-01-01 00:03",
"1970-01-01 00:04",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01 01:00"),
end=Timestamp("1970-01-01 04:00"),
freq="H",
)
exp = DatetimeIndex(
[
"1970-01-01 01:00",
"1970-01-01 02:00",
"1970-01-01 03:00",
"1970-01-01 04:00",
]
)
tm.assert_index_equal(idx, exp)
idx = pd.date_range(
start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D"
)
exp = DatetimeIndex(["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"])
tm.assert_index_equal(idx, exp)
class TestDatetime64:
def test_datetimeindex_accessors(self):
dti_naive = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
# GH#13303
dti_tz = pd.date_range(
freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern"
)
for dti in [dti_naive, dti_tz]:
assert dti.year[0] == 1998
assert dti.month[0] == 1
assert dti.day[0] == 1
assert dti.hour[0] == 0
assert dti.minute[0] == 0
assert dti.second[0] == 0
assert dti.microsecond[0] == 0
assert dti.dayofweek[0] == 3
assert dti.dayofyear[0] == 1
assert dti.dayofyear[120] == 121
assert dti.weekofyear[0] == 1
assert dti.weekofyear[120] == 18
assert dti.quarter[0] == 1
assert dti.quarter[120] == 2
assert dti.days_in_month[0] == 31
assert dti.days_in_month[90] == 30
assert dti.is_month_start[0]
assert not dti.is_month_start[1]
assert dti.is_month_start[31]
assert dti.is_quarter_start[0]
assert dti.is_quarter_start[90]
assert dti.is_year_start[0]
assert not dti.is_year_start[364]
assert not dti.is_month_end[0]
assert dti.is_month_end[30]
assert not dti.is_month_end[31]
assert dti.is_month_end[364]
assert not dti.is_quarter_end[0]
assert not dti.is_quarter_end[30]
assert dti.is_quarter_end[89]
assert dti.is_quarter_end[364]
assert not dti.is_year_end[0]
assert dti.is_year_end[364]
assert len(dti.year) == 365
assert len(dti.month) == 365
assert len(dti.day) == 365
assert len(dti.hour) == 365
assert len(dti.minute) == 365
assert len(dti.second) == 365
assert len(dti.microsecond) == 365
assert len(dti.dayofweek) == 365
assert len(dti.dayofyear) == 365
assert len(dti.weekofyear) == 365
assert len(dti.quarter) == 365
assert len(dti.is_month_start) == 365
assert len(dti.is_month_end) == 365
assert len(dti.is_quarter_start) == 365
assert len(dti.is_quarter_end) == 365
assert len(dti.is_year_start) == 365
assert len(dti.is_year_end) == 365
assert len(dti.weekday_name) == 365
dti.name = "name"
# non boolean accessors -> return Index
for accessor in DatetimeIndex._field_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, Index)
assert res.name == "name"
# boolean accessors -> return array
for accessor in DatetimeIndex._bool_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, np.ndarray)
# test boolean indexing
res = dti[dti.is_quarter_start]
exp = dti[[0, 90, 181, 273]]
tm.assert_index_equal(res, exp)
res = dti[dti.is_leap_year]
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name")
tm.assert_index_equal(res, exp)
dti = pd.date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4)
assert sum(dti.is_quarter_start) == 0
assert sum(dti.is_quarter_end) == 4
assert sum(dti.is_year_start) == 0
assert sum(dti.is_year_end) == 1
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
msg = "Custom business days is not supported by is_month_start"
with pytest.raises(ValueError, match=msg):
dti.is_month_start
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert dti.is_month_start[0] == 1
tests = [
(Timestamp("2013-06-01", freq="M").is_month_start, 1),
(Timestamp("2013-06-01", freq="BM").is_month_start, 0),
(Timestamp("2013-06-03", freq="M").is_month_start, 0),
(Timestamp("2013-06-03", freq="BM").is_month_start, 1),
(Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1),
(Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1),
(Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1),
(Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1),
(Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1),
(Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1),
(Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1),
(Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0),
(Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0),
(Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1),
(Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1),
(Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1),
(Timestamp("2013-06-30", freq="BQ").is_month_end, 0),
(Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0),
(Timestamp("2013-06-30", freq="BQ").is_year_end, 0),
(Timestamp("2013-06-28", freq="BQ").is_month_end, 1),
(Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1),
(Timestamp("2013-06-28", freq="BQ").is_year_end, 0),
(Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0),
(Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0),
(Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0),
(Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1),
(Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1),
(Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1),
(Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1),
(Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1),
(Timestamp("2012-02-01").days_in_month, 29),
(Timestamp("2013-02-01").days_in_month, 28),
]
for ts, value in tests:
assert ts == value
# GH 6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
expected = [52, 1, 1]
assert dates.weekofyear.tolist() == expected
assert [d.weekofyear for d in dates] == expected
# GH 12806
@pytest.mark.parametrize(
"time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales()
)
def test_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
# GH#11128
dti = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert dti.weekday_name[day] == eng_name
assert dti.day_name(locale=time_locale)[day] == name
ts = Timestamp(datetime(2016, 4, day))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert ts.weekday_name == eng_name
assert ts.day_name(locale=time_locale) == name
dti = dti.append(DatetimeIndex([pd.NaT]))
assert np.isnan(dti.day_name(locale=time_locale)[-1])
ts = Timestamp(pd.NaT)
assert np.isnan(ts.day_name(locale=time_locale))
# GH#12805
dti = pd.date_range(freq="M", start="2012", end="2013")
result = dti.month_name(locale=time_locale)
expected = Index([month.capitalize() for month in expected_months])
# work around different normalization schemes
# https://github.com/pandas-dev/pandas/issues/22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_index_equal(result, expected)
for date, expected in zip(dti, expected_months):
result = date.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", result)
assert result == expected
dti = dti.append(DatetimeIndex([pd.NaT]))
assert np.isnan(dti.month_name(locale=time_locale)[-1])
def test_nanosecond_field(self):
dti = DatetimeIndex(np.arange(10))
tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64)))

View File

@@ -0,0 +1,62 @@
import pytest
import pandas as pd
import pandas.util.testing as tm
class TestDatetimeIndex:
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
def test_fillna_datetime64(self, tz):
# GH 11343
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"]
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# tz mismatch
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00", tz=tz),
pd.Timestamp("2011-01-01 11:00"),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
# object
exp = pd.Index(
[pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz)
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# object
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
"x",
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)

View File

@@ -0,0 +1,548 @@
from datetime import datetime
import warnings
import numpy as np
import pytest
from pandas.core.dtypes.generic import ABCDateOffset
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
PeriodIndex,
Series,
Timestamp,
bdate_range,
date_range,
)
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexOps(Ops):
def setup_method(self, method):
super().setup_method(method)
mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex))
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = [o for o in self.objs if not mask(o)]
def test_ops_properties(self):
f = lambda x: isinstance(x, DatetimeIndex)
self.check_ops_properties(DatetimeIndex._field_ops, f)
self.check_ops_properties(DatetimeIndex._object_ops, f)
self.check_ops_properties(DatetimeIndex._bool_ops, f)
def test_ops_properties_basic(self):
# sanity check that the behavior didn't change
# GH#7206
msg = "'Series' object has no attribute '{}'"
for op in ["year", "day", "second", "weekday"]:
with pytest.raises(AttributeError, match=msg.format(op)):
getattr(self.dt_series, op)
# attribute access should still work!
s = Series(dict(year=2000, month=1, day=10))
assert s.year == 2000
assert s.month == 1
assert s.day == 10
msg = "'Series' object has no attribute 'weekday'"
with pytest.raises(AttributeError, match=msg):
s.weekday
def test_repeat_range(self, tz_naive_fixture):
tz = tz_naive_fixture
rng = date_range("1/1/2000", "1/1/2001")
result = rng.repeat(5)
assert result.freq is None
assert len(result) == 5 * len(rng)
index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz)
exp = pd.DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz)
exp = pd.DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
exp = pd.DatetimeIndex(
[
"2001-01-01",
"2001-01-01",
"2001-01-01",
"NaT",
"NaT",
"NaT",
"2003-01-01",
"2003-01-01",
"2003-01-01",
],
tz=tz,
)
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat(self, tz_naive_fixture):
tz = tz_naive_fixture
reps = 2
msg = "the 'axis' parameter is not supported"
rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
]
)
res = rng.repeat(reps)
tm.assert_index_equal(res, expected_rng)
assert res.freq is None
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
with pytest.raises(ValueError, match=msg):
np.repeat(rng, reps, axis=1)
def test_resolution(self, tz_naive_fixture):
tz = tz_naive_fixture
for freq, expected in zip(
["A", "Q", "M", "D", "H", "T", "S", "L", "U"],
[
"day",
"day",
"day",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
],
):
idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
assert idx.resolution == expected
def test_value_counts_unique(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH 7735
idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)
exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
tm.assert_index_equal(idx.unique(), expected)
idx = DatetimeIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
pd.NaT,
],
tz=tz,
)
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_nonunique_contains(self):
# GH 9512
for idx in map(
DatetimeIndex,
(
[0, 1, 0],
[0, 0, -1],
[0, -1, -1],
["2015", "2015", "2016"],
["2015", "2015", "2014"],
),
):
assert idx[0] in idx
@pytest.mark.parametrize(
"idx",
[
DatetimeIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
),
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="H",
name="tzidx",
tz="Asia/Tokyo",
),
],
)
def test_order_with_freq(self, idx):
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
assert ordered.freq == idx.freq
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
@pytest.mark.parametrize(
"index_dates,expected_dates",
[
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
[pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
[pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
),
],
)
def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
tz = tz_naive_fixture
# without freq
index = DatetimeIndex(index_dates, tz=tz, name="idx")
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
ordered = index.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq is None
ordered = index.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq is None
ordered, indexer = index.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx)
assert idx_dup.freq is None # freq is reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert result.freq is None
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep="last")
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep="last")
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
@pytest.mark.parametrize(
"freq",
[
"A",
"2A",
"-2A",
"Q",
"-1Q",
"M",
"-1M",
"D",
"3D",
"-3D",
"W",
"-1W",
"H",
"2H",
"-2H",
"T",
"2T",
"S",
"-3S",
],
)
def test_infer_freq(self, freq):
# GH 11018
idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
result = pd.DatetimeIndex(idx.asi8, freq="infer")
tm.assert_index_equal(idx, result)
assert result.freq == freq
def test_nat(self, tz_naive_fixture):
tz = tz_naive_fixture
assert pd.DatetimeIndex._na_value is pd.NaT
assert pd.DatetimeIndex([])._na_value is pd.NaT
idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
def test_equals(self):
# GH 13107
idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific")
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_freq_setter(self, values, freq, tz):
# GH 20678
idx = DatetimeIndex(values, tz=tz)
# can set to an offset, converting from string if necessary
idx.freq = freq
assert idx.freq == freq
assert isinstance(idx.freq, ABCDateOffset)
# can reset to None
idx.freq = None
assert idx.freq is None
def test_freq_setter_errors(self):
# GH 20678
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
# setting with an incompatible freq
msg = (
"Inferred frequency 2D from passed values does not conform to "
"passed frequency 5D"
)
with pytest.raises(ValueError, match=msg):
idx.freq = "5D"
# setting with non-freq string
with pytest.raises(ValueError, match="Invalid frequency"):
idx.freq = "foo"
def test_offset_deprecated(self):
# GH 20716
idx = pd.DatetimeIndex(["20180101", "20180102"])
# getter deprecated
with tm.assert_produces_warning(FutureWarning):
idx.offset
# setter deprecated
with tm.assert_produces_warning(FutureWarning):
idx.offset = BDay()
class TestBusinessDatetimeIndex:
def setup_method(self, method):
self.rng = bdate_range(START, END)
def test_comparison(self):
d = self.rng[10]
comp = self.rng > d
assert comp[11]
assert not comp[9]
def test_pickle_unpickle(self):
unpickled = tm.round_trip_pickle(self.rng)
assert unpickled.freq is not None
def test_copy(self):
cp = self.rng.copy()
repr(cp)
tm.assert_index_equal(cp, self.rng)
def test_shift(self):
shifted = self.rng.shift(5)
assert shifted[0] == self.rng[5]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(-5)
assert shifted[5] == self.rng[0]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(0)
assert shifted[0] == self.rng[0]
assert shifted.freq == self.rng.freq
rng = date_range(START, END, freq=BMonthEnd())
shifted = rng.shift(1, freq=BDay())
assert shifted[0] == rng[0] + BDay()
def test_equals(self):
assert not self.rng.equals(list(self.rng))
def test_identical(self):
t1 = self.rng.copy()
t2 = self.rng.copy()
assert t1.identical(t2)
# name
t1 = t1.rename("foo")
assert t1.equals(t2)
assert not t1.identical(t2)
t2 = t2.rename("foo")
assert t1.identical(t2)
# freq
t2v = Index(t2.values)
assert t1.equals(t2v)
assert not t1.identical(t2v)
class TestCustomDatetimeIndex:
def setup_method(self, method):
self.rng = bdate_range(START, END, freq="C")
def test_comparison(self):
d = self.rng[10]
comp = self.rng > d
assert comp[11]
assert not comp[9]
def test_copy(self):
cp = self.rng.copy()
repr(cp)
tm.assert_index_equal(cp, self.rng)
def test_shift(self):
shifted = self.rng.shift(5)
assert shifted[0] == self.rng[5]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(-5)
assert shifted[5] == self.rng[0]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(0)
assert shifted[0] == self.rng[0]
assert shifted.freq == self.rng.freq
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
rng = date_range(START, END, freq=BMonthEnd())
shifted = rng.shift(1, freq=CDay())
assert shifted[0] == rng[0] + CDay()
def test_shift_periods(self):
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
idx = pd.date_range(start=START, end=END, periods=3)
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=True):
tm.assert_index_equal(idx.shift(n=0), idx)
def test_pickle_unpickle(self):
unpickled = tm.round_trip_pickle(self.rng)
assert unpickled.freq is not None
def test_equals(self):
assert not self.rng.equals(list(self.rng))

View File

@@ -0,0 +1,470 @@
""" test partial slicing on Series/Frame """
from datetime import datetime
import operator as op
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Series,
Timedelta,
Timestamp,
date_range,
)
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm
class TestSlicing:
def test_dti_slicing(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="M")
dti2 = dti[[1, 3, 5]]
v1 = dti2[0]
v2 = dti2[1]
v3 = dti2[2]
assert v1 == Timestamp("2/28/2005")
assert v2 == Timestamp("4/30/2005")
assert v3 == Timestamp("6/30/2005")
# don't carry freq through irregular slicing
assert dti2.freq is None
def test_slice_keeps_name(self):
# GH4226
st = pd.Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
et = pd.Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
dr = pd.date_range(st, et, freq="H", name="timebucket")
assert dr[1:].name == dr.name
def test_slice_with_negative_step(self):
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Timestamp("2014-10-01") :: -1], SLC[9::-1])
assert_slices_equivalent(SLC["2014-10-01"::-1], SLC[9::-1])
assert_slices_equivalent(SLC[: Timestamp("2014-10-01") : -1], SLC[:8:-1])
assert_slices_equivalent(SLC[:"2014-10-01":-1], SLC[:8:-1])
assert_slices_equivalent(SLC["2015-02-01":"2014-10-01":-1], SLC[13:8:-1])
assert_slices_equivalent(
SLC[Timestamp("2015-02-01") : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
)
assert_slices_equivalent(
SLC["2015-02-01" : Timestamp("2014-10-01") : -1], SLC[13:8:-1]
)
assert_slices_equivalent(
SLC[Timestamp("2015-02-01") : "2014-10-01" : -1], SLC[13:8:-1]
)
assert_slices_equivalent(SLC["2014-10-01":"2015-02-01":-1], SLC[:0])
def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS"))
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]
with pytest.raises(ValueError, match="slice step cannot be zero"):
ts.loc[::0]
def test_slice_bounds_empty(self):
# GH#14354
empty_idx = date_range(freq="1H", periods=0, end="2015")
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right", "loc")
exp = Timestamp("2015-01-02 23:59:59.999999999")
assert right == exp
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left", "loc")
exp = Timestamp("2015-01-02 00:00:00")
assert left == exp
def test_slice_duplicate_monotonic(self):
# https://github.com/pandas-dev/pandas/issues/16515
idx = pd.DatetimeIndex(["2017", "2017"])
result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc")
expected = Timestamp("2017-01-01")
assert result == expected
def test_monotone_DTI_indexing_bug(self):
# GH 19362
# Testing accessing the first element in a monotonic descending
# partial string indexing.
df = pd.DataFrame(list(range(5)))
date_list = [
"2018-01-02",
"2017-02-10",
"2016-03-10",
"2015-03-15",
"2014-03-16",
]
date_index = pd.to_datetime(date_list)
df["date"] = date_index
expected = pd.DataFrame({0: list(range(5)), "date": date_index})
tm.assert_frame_equal(df, expected)
df = pd.DataFrame(
{"A": [1, 2, 3]}, index=pd.date_range("20170101", periods=3)[::-1]
)
expected = pd.DataFrame({"A": 1}, index=pd.date_range("20170103", periods=1))
tm.assert_frame_equal(df.loc["2017-01-03"], expected)
def test_slice_year(self):
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
result = s["2005"]
expected = s[s.index.year == 2005]
tm.assert_series_equal(result, expected)
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
result = df.loc["2005"]
expected = df[df.index.year == 2005]
tm.assert_frame_equal(result, expected)
rng = date_range("1/1/2000", "1/1/2010")
result = rng.get_loc("2009")
expected = slice(3288, 3653)
assert result == expected
def test_slice_quarter(self):
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2001Q1"]) == 90
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
assert len(df.loc["1Q01"]) == 90
def test_slice_month(self):
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2005-11"]) == 30
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
assert len(df.loc["2005-11"]) == 30
tm.assert_series_equal(s["2005-11"], s["11-2005"])
def test_partial_slice(self):
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-05":"2006-02"]
expected = s["20050501":"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-05":]
expected = s["20050501":]
tm.assert_series_equal(result, expected)
result = s[:"2006-02"]
expected = s[:"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-1-1"]
assert result == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
s["2004-12-31"]
def test_partial_slice_daily(self):
rng = date_range(freq="H", start=datetime(2005, 1, 31), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-31"]
tm.assert_series_equal(result, s.iloc[:24])
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
s["2004-12-31 00"]
def test_partial_slice_hourly(self):
rng = date_range(freq="T", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[: 60 * 4])
result = s["2005-1-1 20"]
tm.assert_series_equal(result, s.iloc[:60])
assert s["2005-1-1 20:00"] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
s["2004-12-31 00:15"]
def test_partial_slice_minutely(self):
rng = date_range(freq="S", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1 23:59"]
tm.assert_series_equal(result, s.iloc[:60])
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[:60])
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
s["2004-12-31 00:00:00"]
def test_partial_slice_second_precision(self):
rng = date_range(
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
periods=20,
freq="US",
)
s = Series(np.arange(20), rng)
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
s["2005-1-1 00:00:00"]
def test_partial_slicing_dataframe(self):
# GH14856
# Test various combinations of string slicing resolution vs.
# index resolution
# - If string resolution is less precise than index resolution,
# string is considered a slice
# - If string resolution is equal to or more precise than index
# resolution, string is considered an exact match
formats = [
"%Y",
"%Y-%m",
"%Y-%m-%d",
"%Y-%m-%d %H",
"%Y-%m-%d %H:%M",
"%Y-%m-%d %H:%M:%S",
]
resolutions = ["year", "month", "day", "hour", "minute", "second"]
for rnum, resolution in enumerate(resolutions[2:], 2):
# we check only 'day', 'hour', 'minute' and 'second'
unit = Timedelta("1 " + resolution)
middate = datetime(2012, 1, 1, 0, 0, 0)
index = DatetimeIndex([middate - unit, middate, middate + unit])
values = [1, 2, 3]
df = DataFrame({"a": values}, index, dtype=np.int64)
assert df.index.resolution == resolution
# Timestamp with the same resolution as index
# Should be exact match for Series (return scalar)
# and raise KeyError for Frame
for timestamp, expected in zip(index, values):
ts_string = timestamp.strftime(formats[rnum])
# make ts_string as precise as index
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == expected
msg = r"^'{}'$".format(ts_string)
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Timestamp with resolution less precise than index
for fmt in formats[:rnum]:
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
ts_string = index[element].strftime(fmt)
# Series should return slice
result = df["a"][ts_string]
expected = df["a"][theslice]
tm.assert_series_equal(result, expected)
# Frame should return slice as well
result = df[ts_string]
expected = df[theslice]
tm.assert_frame_equal(result, expected)
# Timestamp with resolution more precise than index
# Compatible with existing key
# Should return scalar for Series
# and raise KeyError for Frame
for fmt in formats[rnum + 1 :]:
ts_string = index[1].strftime(fmt)
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == 2
msg = r"^'{}'$".format(ts_string)
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Not compatible with existing key
# Should raise KeyError
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
ts = index[1] + Timedelta("1 " + res)
ts_string = ts.strftime(fmt)
msg = r"^'{}'$".format(ts_string)
with pytest.raises(KeyError, match=msg):
df["a"][ts_string]
with pytest.raises(KeyError, match=msg):
df[ts_string]
def test_partial_slicing_with_multiindex(self):
# GH 4758
# partial string indexing with a multi-index buggy
df = DataFrame(
{
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
"val": [1, 2, 3, 4],
},
index=date_range("2013-06-19 09:30:00", periods=4, freq="5T"),
)
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
expected = DataFrame(
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
)
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
tm.assert_frame_equal(result, expected)
expected = df_multi.loc[
(pd.Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
]
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
tm.assert_series_equal(result, expected)
# this is an IndexingError as we don't do partial string selection on
# multi-levels.
msg = "Too many indexers"
with pytest.raises(IndexingError, match=msg):
df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
# GH 4294
# partial slice on a series mi
s = pd.DataFrame(
np.random.rand(1000, 1000), index=pd.date_range("2000-1-1", periods=1000)
).stack()
s2 = s[:-1].copy()
expected = s2["2000-1-4"]
result = s2[pd.Timestamp("2000-1-4")]
tm.assert_series_equal(result, expected)
result = s[pd.Timestamp("2000-1-4")]
expected = s["2000-1-4"]
tm.assert_series_equal(result, expected)
df2 = pd.DataFrame(s)
expected = df2.xs("2000-1-4")
result = df2.loc[pd.Timestamp("2000-1-4")]
tm.assert_frame_equal(result, expected)
def test_partial_slice_doesnt_require_monotonicity(self):
# For historical reasons.
s = pd.Series(np.arange(10), pd.date_range("2014-01-01", periods=10))
nonmonotonic = s[[3, 5, 4]]
expected = nonmonotonic.iloc[:0]
timestamp = pd.Timestamp("2014-01-10")
tm.assert_series_equal(nonmonotonic["2014-01-10":], expected)
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic[timestamp:]
tm.assert_series_equal(nonmonotonic.loc["2014-01-10":], expected)
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic.loc[timestamp:]
def test_loc_datetime_length_one(self):
# GH16071
df = pd.DataFrame(
columns=["1"],
index=pd.date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
)
result = df.loc[datetime(2016, 10, 1) :]
tm.assert_frame_equal(result, df)
result = df.loc["2016-10-01T00:00:00":]
tm.assert_frame_equal(result, df)
@pytest.mark.parametrize(
"datetimelike",
[
Timestamp("20130101"),
datetime(2013, 1, 1),
np.datetime64("2013-01-01T00:00", "ns"),
],
)
@pytest.mark.parametrize(
"op,expected",
[
(op.lt, [True, False, False, False]),
(op.le, [True, True, False, False]),
(op.eq, [False, True, False, False]),
(op.gt, [False, False, False, True]),
],
)
def test_selection_by_datetimelike(self, datetimelike, op, expected):
# GH issue #17965, test for ability to compare datetime64[ns] columns
# to datetimelike
df = DataFrame(
{
"A": [
pd.Timestamp("20120101"),
pd.Timestamp("20130101"),
np.nan,
pd.Timestamp("20130103"),
]
}
)
result = op(df.A, datetimelike)
expected = Series(expected, name="A")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"start",
[
"2018-12-02 21:50:00+00:00",
pd.Timestamp("2018-12-02 21:50:00+00:00"),
pd.Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
],
)
@pytest.mark.parametrize(
"end",
[
"2018-12-02 21:52:00+00:00",
pd.Timestamp("2018-12-02 21:52:00+00:00"),
pd.Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
],
)
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
# GH 24076
idx = pd.date_range(
start="2018-12-02 14:50:00-07:00",
end="2018-12-02 14:50:00-07:00",
freq="1min",
)
df = pd.DataFrame(1, index=idx, columns=["A"])
result = df[start:end]
expected = df.iloc[0:3, :]
tm.assert_frame_equal(result, expected)
# GH 16785
start = str(start)
end = str(end)
with pytest.raises(ValueError, match="Both dates must"):
df[start : end[:-4] + "1:00"]
with pytest.raises(ValueError, match="The index must be timezone"):
df = df.tz_localize(None)
df[start:end]

View File

@@ -0,0 +1,343 @@
"""
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
"""
from datetime import datetime
import numpy as np
import pytest
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
import pandas as pd
from pandas import DatetimeIndex, Timestamp, date_range
import pandas.util.testing as tm
from pandas.tseries.frequencies import to_offset
class TestDatetimeIndexOps:
def test_dti_time(self):
rng = date_range("1/1/2000", freq="12min", periods=10)
result = pd.Index(rng).time
expected = [t.time() for t in rng]
assert (result == expected).all()
def test_dti_date(self):
rng = date_range("1/1/2000", freq="12H", periods=10)
result = pd.Index(rng).date
expected = [t.date() for t in rng]
assert (result == expected).all()
@pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
def test_dti_date_out_of_range(self, data):
# GH#1475
msg = "Out of bounds nanosecond timestamp: 1400-01-01 00:00:00"
with pytest.raises(OutOfBoundsDatetime, match=msg):
DatetimeIndex(data)
@pytest.mark.parametrize(
"field",
[
"dayofweek",
"dayofyear",
"week",
"weekofyear",
"quarter",
"days_in_month",
"is_month_start",
"is_month_end",
"is_quarter_start",
"is_quarter_end",
"is_year_start",
"is_year_end",
"weekday_name",
],
)
def test_dti_timestamp_fields(self, field):
# extra fields from DatetimeIndex like quarter and week
idx = tm.makeDateIndex(100)
expected = getattr(idx, field)[-1]
if field == "weekday_name":
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = getattr(Timestamp(idx[-1]), field)
else:
result = getattr(Timestamp(idx[-1]), field)
assert result == expected
def test_dti_timestamp_freq_fields(self):
# extra fields from DatetimeIndex like quarter and week
idx = tm.makeDateIndex(100)
assert idx.freq == Timestamp(idx[-1], idx.freq).freq
assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr
# ----------------------------------------------------------------
# DatetimeIndex.round
def test_round_daily(self):
dti = date_range("20130101 09:10:11", periods=5)
result = dti.round("D")
expected = date_range("20130101", periods=5)
tm.assert_index_equal(result, expected)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
result = dti.round("D")
expected = date_range("20130101", periods=5).tz_localize("US/Eastern")
tm.assert_index_equal(result, expected)
result = dti.round("s")
tm.assert_index_equal(result, dti)
@pytest.mark.parametrize(
"freq, error_msg",
[
("Y", "<YearEnd: month=12> is a non-fixed frequency"),
("M", "<MonthEnd> is a non-fixed frequency"),
("foobar", "Invalid frequency: foobar"),
],
)
def test_round_invalid(self, freq, error_msg):
dti = date_range("20130101 09:10:11", periods=5)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
with pytest.raises(ValueError, match=error_msg):
dti.round(freq)
def test_round(self, tz_naive_fixture):
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz)
elt = rng[1]
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 01:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"),
Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"),
]
)
expected_elt = expected_rng[1]
tm.assert_index_equal(rng.round(freq="H"), expected_rng)
assert elt.round(freq="H") == expected_elt
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
rng.round(freq="foo")
with pytest.raises(ValueError, match=msg):
elt.round(freq="foo")
msg = "<MonthEnd> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
rng.round(freq="M")
with pytest.raises(ValueError, match=msg):
elt.round(freq="M")
# GH#14440 & GH#15578
index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz)
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz)
tm.assert_index_equal(result, expected)
for freq in ["us", "ns"]:
tm.assert_index_equal(index, index.round(freq))
index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz)
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz)
tm.assert_index_equal(result, expected)
index = DatetimeIndex(["2016-10-17 12:00:00.001501031"])
result = index.round("10ns")
expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"])
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(False):
ts = "2016-10-17 12:00:00.001501031"
DatetimeIndex([ts]).round("1010ns")
def test_no_rounding_occurs(self, tz_naive_fixture):
# GH 21262
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz, freq="2T"),
Timestamp("2016-01-01 00:02:00", tz=tz, freq="2T"),
Timestamp("2016-01-01 00:04:00", tz=tz, freq="2T"),
Timestamp("2016-01-01 00:06:00", tz=tz, freq="2T"),
Timestamp("2016-01-01 00:08:00", tz=tz, freq="2T"),
]
)
tm.assert_index_equal(rng.round(freq="2T"), expected_rng)
@pytest.mark.parametrize(
"test_input, rounder, freq, expected",
[
(["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]),
(["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]),
(
["2117-01-01 00:00:45.000000012"],
"floor",
"10ns",
["2117-01-01 00:00:45.000000010"],
),
(
["1823-01-01 00:00:01.000000012"],
"ceil",
"10ns",
["1823-01-01 00:00:01.000000020"],
),
(["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]),
(["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]),
(["2018-01-01 00:15:00"], "ceil", "15T", ["2018-01-01 00:15:00"]),
(["2018-01-01 00:15:00"], "floor", "15T", ["2018-01-01 00:15:00"]),
(["1823-01-01 03:00:00"], "ceil", "3H", ["1823-01-01 03:00:00"]),
(["1823-01-01 03:00:00"], "floor", "3H", ["1823-01-01 03:00:00"]),
(
("NaT", "1823-01-01 00:00:01"),
"floor",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
(
("NaT", "1823-01-01 00:00:01"),
"ceil",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
],
)
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = DatetimeIndex(list(test_input))
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex(list(expected))
assert expected.equals(result)
@pytest.mark.parametrize(
"start, index_freq, periods",
[("2018-01-01", "12H", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)],
)
@pytest.mark.parametrize(
"round_freq",
[
"2ns",
"3ns",
"4ns",
"5ns",
"6ns",
"7ns",
"250ns",
"500ns",
"750ns",
"1us",
"19us",
"250us",
"500us",
"750us",
"1s",
"2s",
"3s",
"12H",
"1D",
],
)
def test_round_int64(self, start, index_freq, periods, round_freq):
dt = date_range(start=start, freq=index_freq, periods=periods)
unit = to_offset(round_freq).nanos
# test floor
result = dt.floor(round_freq)
diff = dt.asi8 - result.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
# test ceil
result = dt.ceil(round_freq)
diff = result.asi8 - dt.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
# test round
result = dt.round(round_freq)
diff = abs(result.asi8 - dt.asi8)
mod = result.asi8 % unit
assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
assert (diff <= unit // 2).all(), "round error"
if unit % 2 == 0:
assert (
result.asi8[diff == unit // 2] % 2 == 0
).all(), "round half to even error"
# ----------------------------------------------------------------
# DatetimeIndex.normalize
def test_normalize(self):
rng = date_range("1/1/2000 9:30", periods=10, freq="D")
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D")
tm.assert_index_equal(result, expected)
arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype(
"datetime64[ns]"
)
rng_ns = DatetimeIndex(arr_ns)
rng_ns_normalized = rng_ns.normalize()
arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype(
"datetime64[ns]"
)
expected = DatetimeIndex(arr_ns)
tm.assert_index_equal(rng_ns_normalized, expected)
assert result.is_normalized
assert not rng.is_normalized
def test_normalize_nat(self):
dti = DatetimeIndex([pd.NaT, Timestamp("2018-01-01 01:00:00")])
result = dti.normalize()
expected = DatetimeIndex([pd.NaT, Timestamp("2018-01-01")])
tm.assert_index_equal(result, expected)
class TestDateTimeIndexToJulianDate:
def test_1700(self):
dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D")
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_2000(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D")
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_hour(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="H")
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_minute(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="T")
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_second(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="S")
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)

View File

@@ -0,0 +1,581 @@
from datetime import datetime
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Int64Index,
Series,
bdate_range,
date_range,
to_datetime,
)
import pandas.util.testing as tm
from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexSetOps:
tz = [
None,
"UTC",
"Asia/Tokyo",
"US/Eastern",
"dateutil/Asia/Singapore",
"dateutil/US/Pacific",
]
# TODO: moved from test_datetimelike; dedup with version below
@pytest.mark.parametrize("sort", [None, False])
def test_union2(self, sort):
everything = tm.makeDateIndex(10)
first = everything[:5]
second = everything[5:]
union = first.union(second, sort=sort)
tm.assert_index_equal(union, everything)
@pytest.mark.parametrize("box", [np.array, Series, list])
@pytest.mark.parametrize("sort", [None, False])
def test_union3(self, sort, box):
everything = tm.makeDateIndex(10)
first = everything[:5]
second = everything[5:]
# GH 10149
expected = (
first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O")
)
case = box(second.values)
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", tz)
@pytest.mark.parametrize("sort", [None, False])
def test_union(self, tz, sort):
rng1 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = pd.date_range("1/1/2000", freq="D", periods=10, tz=tz)
expected1_notsorted = pd.DatetimeIndex(list(other1) + list(rng1))
rng2 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = pd.date_range("1/1/2000", freq="D", periods=8, tz=tz)
expected2_notsorted = pd.DatetimeIndex(list(other2) + list(rng2[:3]))
rng3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz)
expected3_notsorted = rng3
for rng, other, exp, exp_notsorted in [
(rng1, other1, expected1, expected1_notsorted),
(rng2, other2, expected2, expected2_notsorted),
(rng3, other3, expected3, expected3_notsorted),
]:
result_union = rng.union(other, sort=sort)
tm.assert_index_equal(result_union, exp)
result_union = other.union(rng, sort=sort)
if sort is None:
tm.assert_index_equal(result_union, exp)
else:
tm.assert_index_equal(result_union, exp_notsorted)
@pytest.mark.parametrize("sort", [None, False])
def test_union_coverage(self, sort):
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
result = ordered.union(idx, sort=sort)
tm.assert_index_equal(result, ordered)
result = ordered[:0].union(ordered, sort=sort)
tm.assert_index_equal(result, ordered)
assert result.freq == ordered.freq
@pytest.mark.parametrize("sort", [None, False])
def test_union_bug_1730(self, sort):
rng_a = date_range("1/1/2012", periods=4, freq="3H")
rng_b = date_range("1/1/2012", periods=4, freq="4H")
result = rng_a.union(rng_b, sort=sort)
exp = list(rng_a) + list(rng_b[1:])
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("sort", [None, False])
def test_union_bug_1745(self, sort):
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
right = DatetimeIndex(
[
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
result = left.union(right, sort=sort)
exp = DatetimeIndex(
[
"2012-05-11 15:19:49.695000",
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
if sort is None:
exp = exp.sort_values()
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("sort", [None, False])
def test_union_bug_4564(self, sort):
from pandas import DateOffset
left = date_range("2013-01-01", "2013-02-01")
right = left + DateOffset(minutes=15)
result = left.union(right, sort=sort)
exp = list(left) + list(right)
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("sort", [None, False])
def test_union_freq_both_none(self, sort):
# GH11086
expected = bdate_range("20150101", periods=10)
expected.freq = None
result = expected.union(expected, sort=sort)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_union_dataframe_index(self):
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
s1 = Series(np.random.randn(len(rng1)), rng1)
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
s2 = Series(np.random.randn(len(rng2)), rng2)
df = DataFrame({"s1": s1, "s2": s2})
exp = pd.date_range("1/1/1980", "1/1/2012", freq="MS")
tm.assert_index_equal(df.index, exp)
@pytest.mark.parametrize("sort", [None, False])
def test_union_with_DatetimeIndex(self, sort):
i1 = Int64Index(np.arange(0, 20, 2))
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
# Works
i1.union(i2, sort=sort)
# Fails with "AttributeError: can't set attribute"
i2.union(i1, sort=sort)
# TODO: moved from test_datetimelike; de-duplicate with version below
def test_intersection2(self):
first = tm.makeDateIndex(10)
second = first[5:]
intersect = first.intersection(second)
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case)
assert tm.equalContents(result, second)
third = Index(["a", "b", "c"])
result = first.intersection(third)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection(self, tz, sort):
# GH 4690 (with tz)
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
# if target has the same name, it is preserved
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
# if target name is different, it will be reset
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = DatetimeIndex([], name="idx")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# non-monotonic
base = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
)
rng2 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
)
expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx")
rng3 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
tz=tz,
name="other",
)
expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None)
# GH 7880
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
expected4 = DatetimeIndex([], tz=tz, name="idx")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
assert result.tz == expected.tz
def test_intersection_empty(self):
# empty same freq GH2129
rng = date_range("6/1/2000", "6/15/2000", freq="T")
result = rng[0:0].intersection(rng)
assert len(result) == 0
result = rng.intersection(rng[0:0])
assert len(result) == 0
def test_intersection_bug_1708(self):
from pandas import DateOffset
index_1 = date_range("1/1/2012", periods=4, freq="12H")
index_2 = index_1 + DateOffset(hours=1)
result = index_1 & index_2
assert len(result) == 0
@pytest.mark.parametrize("tz", tz)
@pytest.mark.parametrize("sort", [None, False])
def test_difference(self, tz, sort):
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = pd.DatetimeIndex(rng_dates, tz=tz)
rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)
rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.DatetimeIndex(rng_dates, tz=tz)
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
]:
result_diff = rng.difference(other, sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result_diff, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_difference_freq(self, sort):
# GH14323: difference of DatetimeIndex should not preserve frequency
index = date_range("20160920", "20160925", freq="D")
other = date_range("20160921", "20160924", freq="D")
expected = DatetimeIndex(["20160920", "20160925"], freq=None)
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
other = date_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = DatetimeIndex(["20160920", "20160921"], freq=None)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_datetimeindex_diff(self, sort):
dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100)
dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98)
assert len(dti1.difference(dti2, sort)) == 2
@pytest.mark.parametrize("sort", [None, False])
def test_datetimeindex_union_join_empty(self, sort):
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
empty = Index([])
result = dti.union(empty, sort=sort)
expected = dti.astype("O")
tm.assert_index_equal(result, expected)
result = dti.join(empty)
assert isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, dti)
def test_join_nonunique(self):
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
rs = idx1.join(idx2, how="outer")
assert rs.is_monotonic
class TestBusinessDatetimeIndex:
def setup_method(self, method):
self.rng = bdate_range(START, END)
@pytest.mark.parametrize("sort", [None, False])
def test_union(self, sort):
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
else:
expected = pd.DatetimeIndex(list(right) + list(left))
tm.assert_index_equal(right.union(left, sort=sort), expected)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = self.rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_outer_join(self):
# should just behave as union
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_join = self.rng.join(rng, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
@pytest.mark.parametrize("sort", [None, False])
def test_union_not_cacheable(self, sort):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_union = rng1.union(rng2, sort=sort)
if sort is None:
tm.assert_index_equal(the_union, rng)
else:
expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10]))
tm.assert_index_equal(the_union, expected)
rng1 = rng[10:]
rng2 = rng[15:35]
the_union = rng1.union(rng2, sort=sort)
expected = rng[10:]
tm.assert_index_equal(the_union, expected)
def test_intersection(self):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_int = rng1.intersection(rng2)
expected = rng[10:25]
tm.assert_index_equal(the_int, expected)
assert isinstance(the_int, DatetimeIndex)
assert the_int.freq == rng.freq
the_int = rng1.intersection(rng2.view(DatetimeIndex))
tm.assert_index_equal(the_int, expected)
# non-overlapping
the_int = rng[:10].intersection(rng[10:])
expected = DatetimeIndex([])
tm.assert_index_equal(the_int, expected)
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011")
b = bdate_range("12/10/2011", "12/20/2011")
result = a.intersection(b)
tm.assert_index_equal(result, b)
@pytest.mark.parametrize("sort", [None, False])
def test_month_range_union_tz_pytz(self, sort):
from pytz import timezone
tz = timezone("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
@td.skip_if_windows_python_3
@pytest.mark.parametrize("sort", [None, False])
def test_month_range_union_tz_dateutil(self, sort):
from pandas._libs.tslibs.timezones import dateutil_gettz
tz = dateutil_gettz("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
class TestCustomDatetimeIndex:
def setup_method(self, method):
self.rng = bdate_range(START, END, freq="C")
@pytest.mark.parametrize("sort", [None, False])
def test_union(self, sort):
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_union = left.union(right, sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = self.rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_outer_join(self):
# should just behave as union
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_join = self.rng.join(rng, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
result = a.intersection(b)
tm.assert_index_equal(result, b)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff