""" test indexing with ix """ from warnings import catch_warnings import numpy as np import pytest from pandas.core.dtypes.common import is_scalar import pandas as pd from pandas import DataFrame, Series, option_context from pandas.util import testing as tm def test_ix_deprecation(): # GH 15114 df = DataFrame({"A": [1, 2, 3]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=True): df.ix[1, "A"] @pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning") class TestIX: def test_ix_loc_setitem_consistency(self): # GH 5771 # loc with slice and series s = Series(0, index=[4, 5, 6]) s.loc[4:5] += 1 expected = Series([1, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) # GH 5928 # chained indexing assignment df = DataFrame({"a": [0, 1, 2]}) expected = df.copy() with catch_warnings(record=True): expected.ix[[0, 1, 2], "a"] = -expected.ix[[0, 1, 2], "a"] with catch_warnings(record=True): df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]] tm.assert_frame_equal(df, expected) df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2]}) with catch_warnings(record=True): df["a"].ix[[0, 1, 2]] = -df["a"].ix[[0, 1, 2]].astype("float64") + 0.5 expected = DataFrame({"a": [0.5, -0.5, -1.5], "b": [0, 1, 2]}) tm.assert_frame_equal(df, expected) # GH 8607 # ix setitem consistency df = DataFrame( { "delta": [1174, 904, 161], "elapsed": [7673, 9277, 1470], "timestamp": [1413840976, 1413842580, 1413760580], } ) expected = DataFrame( { "delta": [1174, 904, 161], "elapsed": [7673, 9277, 1470], "timestamp": pd.to_datetime( [1413840976, 1413842580, 1413760580], unit="s" ), } ) df2 = df.copy() df2["timestamp"] = pd.to_datetime(df["timestamp"], unit="s") tm.assert_frame_equal(df2, expected) df2 = df.copy() df2.loc[:, "timestamp"] = pd.to_datetime(df["timestamp"], unit="s") tm.assert_frame_equal(df2, expected) df2 = df.copy() with catch_warnings(record=True): df2.ix[:, 2] = pd.to_datetime(df["timestamp"], unit="s") tm.assert_frame_equal(df2, expected) def test_ix_loc_consistency(self): # GH 8613 # some edge cases where ix/loc should return the same # this is not an exhaustive case def compare(result, expected): if is_scalar(expected): assert result == expected else: assert expected.equals(result) # failure cases for .loc, but these work for .ix df = DataFrame(np.random.randn(5, 4), columns=list("ABCD")) for key in [ slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), tuple([slice(0, 2), df.columns[0:2]]), ]: for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeDateIndex, tm.makePeriodIndex, tm.makeTimedeltaIndex, ]: df.index = index(len(df.index)) with catch_warnings(record=True): df.ix[key] msg = ( r"cannot do slice indexing" r" on {klass} with these indexers \[(0|1)\] of" r" {kind}".format(klass=type(df.index), kind=str(int)) ) with pytest.raises(TypeError, match=msg): df.loc[key] df = DataFrame( np.random.randn(5, 4), columns=list("ABCD"), index=pd.date_range("2012-01-01", periods=5), ) for key in [ "2012-01-03", "2012-01-31", slice("2012-01-03", "2012-01-03"), slice("2012-01-03", "2012-01-04"), slice("2012-01-03", "2012-01-06", 2), slice("2012-01-03", "2012-01-31"), tuple([[True, True, True, False, True]]), ]: # getitem # if the expected raises, then compare the exceptions try: with catch_warnings(record=True): expected = df.ix[key] except KeyError: with pytest.raises(KeyError, match=r"^'2012-01-31'$"): df.loc[key] continue result = df.loc[key] compare(result, expected) # setitem df1 = df.copy() df2 = df.copy() with catch_warnings(record=True): df1.ix[key] = 10 df2.loc[key] = 10 compare(df2, df1) # edge cases s = Series([1, 2, 3, 4], index=list("abde")) result1 = s["a":"c"] with catch_warnings(record=True): result2 = s.ix["a":"c"] result3 = s.loc["a":"c"] tm.assert_series_equal(result1, result2) tm.assert_series_equal(result1, result3) # now work rather than raising KeyError s = Series(range(5), [-2, -1, 1, 2, 3]) with catch_warnings(record=True): result1 = s.ix[-10:3] result2 = s.loc[-10:3] tm.assert_series_equal(result1, result2) with catch_warnings(record=True): result1 = s.ix[0:3] result2 = s.loc[0:3] tm.assert_series_equal(result1, result2) def test_ix_weird_slicing(self): # http://stackoverflow.com/q/17056560/1240268 df = DataFrame({"one": [1, 2, 3, np.nan, np.nan], "two": [1, 2, 3, 4, 5]}) df.loc[df["one"] > 1, "two"] = -df["two"] expected = DataFrame( { "one": {0: 1.0, 1: 2.0, 2: 3.0, 3: np.nan, 4: np.nan}, "two": {0: 1, 1: -2, 2: -3, 3: 4, 4: 5}, } ) tm.assert_frame_equal(df, expected) def test_ix_assign_column_mixed(self, float_frame): # GH #1142 df = float_frame df["foo"] = "bar" orig = df.loc[:, "B"].copy() df.loc[:, "B"] = df.loc[:, "B"] + 1 tm.assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value df = DataFrame({"x": np.arange(10), "y": np.arange(10, 20), "z": "bar"}) expected = df.copy() for i in range(5): indexer = i * 2 v = 1000 + i * 200 expected.loc[indexer, "y"] = v assert expected.loc[indexer, "y"] == v df.loc[df.x % 2 == 0, "y"] = df.loc[df.x % 2 == 0, "y"] * 100 tm.assert_frame_equal(df, expected) # GH 4508, making sure consistency of assignments df = DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]}) df.loc[[0, 2], "b"] = [100, -100] expected = DataFrame({"a": [1, 2, 3], "b": [100, 1, -100]}) tm.assert_frame_equal(df, expected) df = DataFrame({"a": list(range(4))}) df["b"] = np.nan df.loc[[1, 3], "b"] = [100, -100] expected = DataFrame({"a": [0, 1, 2, 3], "b": [np.nan, 100, np.nan, -100]}) tm.assert_frame_equal(df, expected) # ok, but chained assignments are dangerous # if we turn off chained assignment it will work with option_context("chained_assignment", None): df = DataFrame({"a": list(range(4))}) df["b"] = np.nan df["b"].loc[[1, 3]] = [100, -100] tm.assert_frame_equal(df, expected) def test_ix_get_set_consistency(self): # GH 4544 # ix/loc get/set not consistent when # a mixed int/string index df = DataFrame( np.arange(16).reshape((4, 4)), columns=["a", "b", 8, "c"], index=["e", 7, "f", "g"], ) with catch_warnings(record=True): assert df.ix["e", 8] == 2 assert df.loc["e", 8] == 2 with catch_warnings(record=True): df.ix["e", 8] = 42 assert df.ix["e", 8] == 42 assert df.loc["e", 8] == 42 df.loc["e", 8] = 45 with catch_warnings(record=True): assert df.ix["e", 8] == 45 assert df.loc["e", 8] == 45 def test_ix_slicing_strings(self): # see gh-3836 data = { "Classification": ["SA EQUITY CFD", "bbb", "SA EQUITY", "SA SSF", "aaa"], "Random": [1, 2, 3, 4, 5], "X": ["correct", "wrong", "correct", "correct", "wrong"], } df = DataFrame(data) x = df[~df.Classification.isin(["SA EQUITY CFD", "SA EQUITY", "SA SSF"])] with catch_warnings(record=True): df.ix[x.index, "X"] = df["Classification"] expected = DataFrame( { "Classification": { 0: "SA EQUITY CFD", 1: "bbb", 2: "SA EQUITY", 3: "SA SSF", 4: "aaa", }, "Random": {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}, "X": {0: "correct", 1: "bbb", 2: "correct", 3: "correct", 4: "aaa"}, } ) # bug was 4: 'bbb' tm.assert_frame_equal(df, expected) def test_ix_setitem_out_of_bounds_axis_0(self): df = DataFrame( np.random.randn(2, 5), index=["row{i}".format(i=i) for i in range(2)], columns=["col{i}".format(i=i) for i in range(5)], ) with catch_warnings(record=True): msg = "cannot set by positional indexing with enlargement" with pytest.raises(ValueError, match=msg): df.ix[2, 0] = 100 def test_ix_setitem_out_of_bounds_axis_1(self): df = DataFrame( np.random.randn(5, 2), index=["row{i}".format(i=i) for i in range(5)], columns=["col{i}".format(i=i) for i in range(2)], ) with catch_warnings(record=True): msg = "cannot set by positional indexing with enlargement" with pytest.raises(ValueError, match=msg): df.ix[0, 2] = 100 def test_ix_empty_list_indexer_is_ok(self): with catch_warnings(record=True): from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) # vertical empty tm.assert_frame_equal( df.ix[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.ix[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.ix[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) def test_ix_duplicate_returns_series(self): df = DataFrame( np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") ) with catch_warnings(record=True): r = df.ix[0.2, "a"] e = df.loc[0.2, "a"] tm.assert_series_equal(r, e)