1293 lines
46 KiB
Python
1293 lines
46 KiB
Python
from datetime import datetime
|
|
from io import StringIO
|
|
import re
|
|
from typing import Dict
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame, Index, Series, Timestamp, date_range
|
|
from pandas.tests.frame.common import TestData
|
|
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
|
|
|
|
|
@pytest.fixture
|
|
def mix_ab() -> Dict[str, list]:
|
|
return {"a": list(range(4)), "b": list("ab..")}
|
|
|
|
|
|
@pytest.fixture
|
|
def mix_abc() -> Dict[str, list]:
|
|
return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
|
|
|
|
|
class TestDataFrameReplace(TestData):
|
|
def test_replace_inplace(self):
|
|
self.tsframe["A"][:5] = np.nan
|
|
self.tsframe["A"][-5:] = np.nan
|
|
|
|
tsframe = self.tsframe.copy()
|
|
tsframe.replace(np.nan, 0, inplace=True)
|
|
assert_frame_equal(tsframe, self.tsframe.fillna(0))
|
|
|
|
# mixed type
|
|
mf = self.mixed_frame
|
|
mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
|
|
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
|
|
|
|
result = self.mixed_frame.replace(np.nan, 0)
|
|
expected = self.mixed_frame.fillna(value=0)
|
|
assert_frame_equal(result, expected)
|
|
|
|
tsframe = self.tsframe.copy()
|
|
tsframe.replace([np.nan], [0], inplace=True)
|
|
assert_frame_equal(tsframe, self.tsframe.fillna(0))
|
|
|
|
def test_regex_replace_scalar(self, mix_ab):
|
|
obj = {"a": list("ab.."), "b": list("efgh")}
|
|
dfobj = DataFrame(obj)
|
|
dfmix = DataFrame(mix_ab)
|
|
|
|
# simplest cases
|
|
# regex -> value
|
|
# obj frame
|
|
res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# everything with compiled regexs as well
|
|
res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1")
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1")
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_scalar_inplace(self, mix_ab):
|
|
obj = {"a": list("ab.."), "b": list("efgh")}
|
|
dfobj = DataFrame(obj)
|
|
dfmix = DataFrame(mix_ab)
|
|
|
|
# simplest cases
|
|
# regex -> value
|
|
# obj frame
|
|
res = dfobj.copy()
|
|
res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.copy()
|
|
res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.copy()
|
|
res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.copy()
|
|
res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# everything with compiled regexs as well
|
|
res = dfobj.copy()
|
|
res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.copy()
|
|
res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.copy()
|
|
res.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True)
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.copy()
|
|
res.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True)
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
res = dfobj.copy()
|
|
res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.copy()
|
|
res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.copy()
|
|
res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.copy()
|
|
res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# everything with compiled regexs as well
|
|
res = dfobj.copy()
|
|
res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True)
|
|
assert_frame_equal(dfobj, res.fillna("."))
|
|
|
|
# mixed
|
|
res = dfmix.copy()
|
|
res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True)
|
|
assert_frame_equal(dfmix, res.fillna("."))
|
|
|
|
# regex -> regex
|
|
# obj frame
|
|
res = dfobj.copy()
|
|
res.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True)
|
|
objc = obj.copy()
|
|
objc["a"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(objc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# with mixed
|
|
res = dfmix.copy()
|
|
res.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True)
|
|
mixc = mix_ab.copy()
|
|
mixc["b"] = ["a", "b", "...", "..."]
|
|
expec = DataFrame(mixc)
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_list_obj(self):
|
|
obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
|
|
dfobj = DataFrame(obj)
|
|
|
|
# lists of regexes and values
|
|
# list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
|
|
to_replace_res = [r"\s*\.\s*", r"e|f|g"]
|
|
values = [np.nan, "crap"]
|
|
res = dfobj.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", np.nan, np.nan],
|
|
"b": ["crap"] * 3 + ["h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
|
|
to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"]
|
|
values = [r"\1\1", r"\1_crap"]
|
|
res = dfobj.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["e_crap", "f_crap", "g_crap", "h"],
|
|
"c": ["h", "e_crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
|
|
# or vN)]
|
|
to_replace_res = [r"\s*(\.)\s*", r"e"]
|
|
values = [r"\1\1", r"crap"]
|
|
res = dfobj.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["crap", "f", "g", "h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
to_replace_res = [r"\s*(\.)\s*", r"e"]
|
|
values = [r"\1\1", r"crap"]
|
|
res = dfobj.replace(value=values, regex=to_replace_res)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["crap", "f", "g", "h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_list_obj_inplace(self):
|
|
# same as above with inplace=True
|
|
# lists of regexes and values
|
|
obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
|
|
dfobj = DataFrame(obj)
|
|
|
|
# lists of regexes and values
|
|
# list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
|
|
to_replace_res = [r"\s*\.\s*", r"e|f|g"]
|
|
values = [np.nan, "crap"]
|
|
res = dfobj.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", np.nan, np.nan],
|
|
"b": ["crap"] * 3 + ["h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
|
|
to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"]
|
|
values = [r"\1\1", r"\1_crap"]
|
|
res = dfobj.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["e_crap", "f_crap", "g_crap", "h"],
|
|
"c": ["h", "e_crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
|
|
# or vN)]
|
|
to_replace_res = [r"\s*(\.)\s*", r"e"]
|
|
values = [r"\1\1", r"crap"]
|
|
res = dfobj.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["crap", "f", "g", "h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
to_replace_res = [r"\s*(\.)\s*", r"e"]
|
|
values = [r"\1\1", r"crap"]
|
|
res = dfobj.copy()
|
|
res.replace(value=values, regex=to_replace_res, inplace=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["crap", "f", "g", "h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_list_mixed(self, mix_ab):
|
|
# mixed frame to make sure this doesn't break things
|
|
dfmix = DataFrame(mix_ab)
|
|
|
|
# lists of regexes and values
|
|
# list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
|
|
to_replace_res = [r"\s*\.\s*", r"a"]
|
|
values = [np.nan, "crap"]
|
|
mix2 = {"a": list(range(4)), "b": list("ab.."), "c": list("halo")}
|
|
dfmix2 = DataFrame(mix2)
|
|
res = dfmix2.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame(
|
|
{
|
|
"a": mix2["a"],
|
|
"b": ["crap", "b", np.nan, np.nan],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
|
|
to_replace_res = [r"\s*(\.)\s*", r"(a|b)"]
|
|
values = [r"\1\1", r"\1_crap"]
|
|
res = dfmix.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
|
|
# or vN)]
|
|
to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
|
|
values = [r"\1\1", r"crap", r"\1_crap"]
|
|
res = dfmix.replace(to_replace_res, values, regex=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
|
|
values = [r"\1\1", r"crap", r"\1_crap"]
|
|
res = dfmix.replace(regex=to_replace_res, value=values)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_list_mixed_inplace(self, mix_ab):
|
|
dfmix = DataFrame(mix_ab)
|
|
# the same inplace
|
|
# lists of regexes and values
|
|
# list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
|
|
to_replace_res = [r"\s*\.\s*", r"a"]
|
|
values = [np.nan, "crap"]
|
|
res = dfmix.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
|
|
to_replace_res = [r"\s*(\.)\s*", r"(a|b)"]
|
|
values = [r"\1\1", r"\1_crap"]
|
|
res = dfmix.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
|
|
# or vN)]
|
|
to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
|
|
values = [r"\1\1", r"crap", r"\1_crap"]
|
|
res = dfmix.copy()
|
|
res.replace(to_replace_res, values, inplace=True, regex=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"]
|
|
values = [r"\1\1", r"crap", r"\1_crap"]
|
|
res = dfmix.copy()
|
|
res.replace(regex=to_replace_res, value=values, inplace=True)
|
|
expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]})
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_regex_replace_dict_mixed(self, mix_abc):
|
|
dfmix = DataFrame(mix_abc)
|
|
|
|
# dicts
|
|
# single dict {re1: v1}, search the whole frame
|
|
# need test for this...
|
|
|
|
# list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
|
|
# frame
|
|
res = dfmix.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True)
|
|
res2 = dfmix.copy()
|
|
res2.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, inplace=True, regex=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
|
|
# list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
|
|
# whole frame
|
|
res = dfmix.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True)
|
|
res2 = dfmix.copy()
|
|
res2.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, inplace=True, regex=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
|
|
res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"})
|
|
res2 = dfmix.copy()
|
|
res2.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}, inplace=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
|
|
# scalar -> dict
|
|
# to_replace regex, {value: value}
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]}
|
|
)
|
|
res = dfmix.replace("a", {"b": np.nan}, regex=True)
|
|
res2 = dfmix.copy()
|
|
res2.replace("a", {"b": np.nan}, regex=True, inplace=True)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
|
|
res = dfmix.replace("a", {"b": np.nan}, regex=True)
|
|
res2 = dfmix.copy()
|
|
res2.replace(regex="a", value={"b": np.nan}, inplace=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
|
|
def test_regex_replace_dict_nested(self, mix_abc):
|
|
# nested dicts will not work until this is implemented for Series
|
|
dfmix = DataFrame(mix_abc)
|
|
res = dfmix.replace({"b": {r"\s*\.\s*": np.nan}}, regex=True)
|
|
res2 = dfmix.copy()
|
|
res4 = dfmix.copy()
|
|
res2.replace({"b": {r"\s*\.\s*": np.nan}}, inplace=True, regex=True)
|
|
res3 = dfmix.replace(regex={"b": {r"\s*\.\s*": np.nan}})
|
|
res4.replace(regex={"b": {r"\s*\.\s*": np.nan}}, inplace=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
assert_frame_equal(res3, expec)
|
|
assert_frame_equal(res4, expec)
|
|
|
|
def test_regex_replace_dict_nested_non_first_character(self):
|
|
# GH 25259
|
|
df = pd.DataFrame({"first": ["abc", "bca", "cab"]})
|
|
expected = pd.DataFrame({"first": [".bc", "bc.", "c.b"]})
|
|
result = df.replace({"a": "."}, regex=True)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_regex_replace_dict_nested_gh4115(self):
|
|
df = pd.DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
|
|
expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
|
|
result = df.replace({"Type": {"Q": 0, "T": 1}})
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_regex_replace_list_to_scalar(self, mix_abc):
|
|
df = DataFrame(mix_abc)
|
|
expec = DataFrame(
|
|
{
|
|
"a": mix_abc["a"],
|
|
"b": np.array([np.nan] * 4),
|
|
"c": [np.nan, np.nan, np.nan, "d"],
|
|
}
|
|
)
|
|
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
|
|
res2 = df.copy()
|
|
res3 = df.copy()
|
|
res2.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True)
|
|
res3.replace(regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
assert_frame_equal(res3, expec)
|
|
|
|
def test_regex_replace_str_to_numeric(self, mix_abc):
|
|
# what happens when you try to replace a numeric value with a regex?
|
|
df = DataFrame(mix_abc)
|
|
res = df.replace(r"\s*\.\s*", 0, regex=True)
|
|
res2 = df.copy()
|
|
res2.replace(r"\s*\.\s*", 0, inplace=True, regex=True)
|
|
res3 = df.copy()
|
|
res3.replace(regex=r"\s*\.\s*", value=0, inplace=True)
|
|
expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]})
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
assert_frame_equal(res3, expec)
|
|
|
|
def test_regex_replace_regex_list_to_numeric(self, mix_abc):
|
|
df = DataFrame(mix_abc)
|
|
res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
|
|
res2 = df.copy()
|
|
res2.replace([r"\s*\.\s*", "b"], 0, regex=True, inplace=True)
|
|
res3 = df.copy()
|
|
res3.replace(regex=[r"\s*\.\s*", "b"], value=0, inplace=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
assert_frame_equal(res3, expec)
|
|
|
|
def test_regex_replace_series_of_regexes(self, mix_abc):
|
|
df = DataFrame(mix_abc)
|
|
s1 = Series({"b": r"\s*\.\s*"})
|
|
s2 = Series({"b": np.nan})
|
|
res = df.replace(s1, s2, regex=True)
|
|
res2 = df.copy()
|
|
res2.replace(s1, s2, inplace=True, regex=True)
|
|
res3 = df.copy()
|
|
res3.replace(regex=s1, value=s2, inplace=True)
|
|
expec = DataFrame(
|
|
{"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
assert_frame_equal(res2, expec)
|
|
assert_frame_equal(res3, expec)
|
|
|
|
def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
|
|
df = DataFrame(mix_abc)
|
|
expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]})
|
|
res = df.replace(0, "a")
|
|
assert_frame_equal(res, expec)
|
|
assert res.a.dtype == np.object_
|
|
|
|
@pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"])
|
|
def test_replace_regex_metachar(self, metachar):
|
|
df = DataFrame({"a": [metachar, "else"]})
|
|
result = df.replace({"a": {metachar: "paren"}})
|
|
expected = DataFrame({"a": ["paren", "else"]})
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace(self):
|
|
self.tsframe["A"][:5] = np.nan
|
|
self.tsframe["A"][-5:] = np.nan
|
|
|
|
zero_filled = self.tsframe.replace(np.nan, -1e8)
|
|
assert_frame_equal(zero_filled, self.tsframe.fillna(-1e8))
|
|
assert_frame_equal(zero_filled.replace(-1e8, np.nan), self.tsframe)
|
|
|
|
self.tsframe["A"][:5] = np.nan
|
|
self.tsframe["A"][-5:] = np.nan
|
|
self.tsframe["B"][:5] = -1e8
|
|
|
|
# empty
|
|
df = DataFrame(index=["a", "b"])
|
|
assert_frame_equal(df, df.replace(5, 7))
|
|
|
|
# GH 11698
|
|
# test for mixed data types.
|
|
df = pd.DataFrame(
|
|
[("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))]
|
|
)
|
|
df1 = df.replace("-", np.nan)
|
|
expected_df = pd.DataFrame(
|
|
[(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))]
|
|
)
|
|
assert_frame_equal(df1, expected_df)
|
|
|
|
def test_replace_list(self):
|
|
obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
|
|
dfobj = DataFrame(obj)
|
|
|
|
# lists of regexes and values
|
|
# list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
|
|
to_replace_res = [r".", r"e"]
|
|
values = [np.nan, "crap"]
|
|
res = dfobj.replace(to_replace_res, values)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", np.nan, np.nan],
|
|
"b": ["crap", "f", "g", "h"],
|
|
"c": ["h", "crap", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
# list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
|
|
to_replace_res = [r".", r"f"]
|
|
values = [r"..", r"crap"]
|
|
res = dfobj.replace(to_replace_res, values)
|
|
expec = DataFrame(
|
|
{
|
|
"a": ["a", "b", "..", ".."],
|
|
"b": ["e", "crap", "g", "h"],
|
|
"c": ["h", "e", "l", "o"],
|
|
}
|
|
)
|
|
assert_frame_equal(res, expec)
|
|
|
|
def test_replace_with_empty_list(self):
|
|
# GH 21977
|
|
s = pd.Series([["a", "b"], [], np.nan, [1]])
|
|
df = pd.DataFrame({"col": s})
|
|
expected = df
|
|
result = df.replace([], np.nan)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# GH 19266
|
|
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
|
df.replace({np.nan: []})
|
|
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
|
df.replace({np.nan: ["dummy", "alt"]})
|
|
|
|
def test_replace_series_dict(self):
|
|
# from GH 3064
|
|
df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}})
|
|
result = df.replace(0, {"zero": 0.5, "one": 1.0})
|
|
expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}})
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.replace(0, df.mean())
|
|
assert_frame_equal(result, expected)
|
|
|
|
# series to series/dict
|
|
df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}})
|
|
s = Series({"zero": 0.0, "one": 2.0})
|
|
result = df.replace(s, {"zero": 0.5, "one": 1.0})
|
|
expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}})
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.replace(s, df.mean())
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_convert(self):
|
|
# gh 3907
|
|
df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
|
|
m = {"foo": 1, "bar": 2, "bah": 3}
|
|
rep = df.replace(m)
|
|
expec = Series([np.int64] * 3)
|
|
res = rep.dtypes
|
|
assert_series_equal(expec, res)
|
|
|
|
def test_replace_mixed(self):
|
|
mf = self.mixed_frame
|
|
mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
|
|
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
|
|
|
|
result = self.mixed_frame.replace(np.nan, -18)
|
|
expected = self.mixed_frame.fillna(value=-18)
|
|
assert_frame_equal(result, expected)
|
|
assert_frame_equal(result.replace(-18, np.nan), self.mixed_frame)
|
|
|
|
result = self.mixed_frame.replace(np.nan, -1e8)
|
|
expected = self.mixed_frame.fillna(value=-1e8)
|
|
assert_frame_equal(result, expected)
|
|
assert_frame_equal(result.replace(-1e8, np.nan), self.mixed_frame)
|
|
|
|
# int block upcasting
|
|
df = DataFrame(
|
|
{
|
|
"A": Series([1.0, 2.0], dtype="float64"),
|
|
"B": Series([0, 1], dtype="int64"),
|
|
}
|
|
)
|
|
expected = DataFrame(
|
|
{
|
|
"A": Series([1.0, 2.0], dtype="float64"),
|
|
"B": Series([0.5, 1], dtype="float64"),
|
|
}
|
|
)
|
|
result = df.replace(0, 0.5)
|
|
assert_frame_equal(result, expected)
|
|
|
|
df.replace(0, 0.5, inplace=True)
|
|
assert_frame_equal(df, expected)
|
|
|
|
# int block splitting
|
|
df = DataFrame(
|
|
{
|
|
"A": Series([1.0, 2.0], dtype="float64"),
|
|
"B": Series([0, 1], dtype="int64"),
|
|
"C": Series([1, 2], dtype="int64"),
|
|
}
|
|
)
|
|
expected = DataFrame(
|
|
{
|
|
"A": Series([1.0, 2.0], dtype="float64"),
|
|
"B": Series([0.5, 1], dtype="float64"),
|
|
"C": Series([1, 2], dtype="int64"),
|
|
}
|
|
)
|
|
result = df.replace(0, 0.5)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# to object block upcasting
|
|
df = DataFrame(
|
|
{
|
|
"A": Series([1.0, 2.0], dtype="float64"),
|
|
"B": Series([0, 1], dtype="int64"),
|
|
}
|
|
)
|
|
expected = DataFrame(
|
|
{
|
|
"A": Series([1, "foo"], dtype="object"),
|
|
"B": Series([0, 1], dtype="int64"),
|
|
}
|
|
)
|
|
result = df.replace(2, "foo")
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = DataFrame(
|
|
{
|
|
"A": Series(["foo", "bar"], dtype="object"),
|
|
"B": Series([0, "foo"], dtype="object"),
|
|
}
|
|
)
|
|
result = df.replace([1, 2], ["foo", "bar"])
|
|
assert_frame_equal(result, expected)
|
|
|
|
# test case from
|
|
df = DataFrame(
|
|
{"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")}
|
|
)
|
|
result = df.replace(3, df.mean().to_dict())
|
|
expected = df.copy().astype("float64")
|
|
m = df.mean()
|
|
expected.iloc[0, 0] = m[0]
|
|
expected.iloc[1, 1] = m[1]
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_simple_nested_dict(self):
|
|
df = DataFrame({"col": range(1, 5)})
|
|
expected = DataFrame({"col": ["a", 2, 3, "b"]})
|
|
|
|
result = df.replace({"col": {1: "a", 4: "b"}})
|
|
assert_frame_equal(expected, result)
|
|
|
|
# in this case, should be the same as the not nested version
|
|
result = df.replace({1: "a", 4: "b"})
|
|
assert_frame_equal(expected, result)
|
|
|
|
def test_replace_simple_nested_dict_with_nonexistent_value(self):
|
|
df = DataFrame({"col": range(1, 5)})
|
|
expected = DataFrame({"col": ["a", 2, 3, "b"]})
|
|
|
|
result = df.replace({-1: "-", 1: "a", 4: "b"})
|
|
assert_frame_equal(expected, result)
|
|
|
|
result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}})
|
|
assert_frame_equal(expected, result)
|
|
|
|
def test_replace_value_is_none(self):
|
|
orig_value = self.tsframe.iloc[0, 0]
|
|
orig2 = self.tsframe.iloc[1, 0]
|
|
|
|
self.tsframe.iloc[0, 0] = np.nan
|
|
self.tsframe.iloc[1, 0] = 1
|
|
|
|
result = self.tsframe.replace(to_replace={np.nan: 0})
|
|
expected = self.tsframe.T.replace(to_replace={np.nan: 0}).T
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = self.tsframe.replace(to_replace={np.nan: 0, 1: -1e8})
|
|
tsframe = self.tsframe.copy()
|
|
tsframe.iloc[0, 0] = 0
|
|
tsframe.iloc[1, 0] = -1e8
|
|
expected = tsframe
|
|
assert_frame_equal(expected, result)
|
|
self.tsframe.iloc[0, 0] = orig_value
|
|
self.tsframe.iloc[1, 0] = orig2
|
|
|
|
def test_replace_for_new_dtypes(self):
|
|
|
|
# dtypes
|
|
tsframe = self.tsframe.copy().astype(np.float32)
|
|
tsframe["A"][:5] = np.nan
|
|
tsframe["A"][-5:] = np.nan
|
|
|
|
zero_filled = tsframe.replace(np.nan, -1e8)
|
|
assert_frame_equal(zero_filled, tsframe.fillna(-1e8))
|
|
assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe)
|
|
|
|
tsframe["A"][:5] = np.nan
|
|
tsframe["A"][-5:] = np.nan
|
|
tsframe["B"][:5] = -1e8
|
|
|
|
b = tsframe["B"]
|
|
b[b == -1e8] = np.nan
|
|
tsframe["B"] = b
|
|
result = tsframe.fillna(method="bfill")
|
|
assert_frame_equal(result, tsframe.fillna(method="bfill"))
|
|
|
|
@pytest.mark.parametrize(
|
|
"frame, to_replace, value, expected",
|
|
[
|
|
(DataFrame({"ints": [1, 2, 3]}), 1, 0, DataFrame({"ints": [0, 2, 3]})),
|
|
(
|
|
DataFrame({"ints": [1, 2, 3]}, dtype=np.int32),
|
|
1,
|
|
0,
|
|
DataFrame({"ints": [0, 2, 3]}, dtype=np.int32),
|
|
),
|
|
(
|
|
DataFrame({"ints": [1, 2, 3]}, dtype=np.int16),
|
|
1,
|
|
0,
|
|
DataFrame({"ints": [0, 2, 3]}, dtype=np.int16),
|
|
),
|
|
(
|
|
DataFrame({"bools": [True, False, True]}),
|
|
False,
|
|
True,
|
|
DataFrame({"bools": [True, True, True]}),
|
|
),
|
|
(
|
|
DataFrame({"complex": [1j, 2j, 3j]}),
|
|
1j,
|
|
0,
|
|
DataFrame({"complex": [0j, 2j, 3j]}),
|
|
),
|
|
(
|
|
DataFrame(
|
|
{
|
|
"datetime64": Index(
|
|
[
|
|
datetime(2018, 5, 28),
|
|
datetime(2018, 7, 28),
|
|
datetime(2018, 5, 28),
|
|
]
|
|
)
|
|
}
|
|
),
|
|
datetime(2018, 5, 28),
|
|
datetime(2018, 7, 28),
|
|
DataFrame({"datetime64": Index([datetime(2018, 7, 28)] * 3)}),
|
|
),
|
|
# GH 20380
|
|
(
|
|
DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["foo"]}),
|
|
"foo",
|
|
"bar",
|
|
DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["bar"]}),
|
|
),
|
|
(
|
|
DataFrame(
|
|
{
|
|
"A": date_range("20130101", periods=3, tz="US/Eastern"),
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
),
|
|
Timestamp("20130102", tz="US/Eastern"),
|
|
Timestamp("20130104", tz="US/Eastern"),
|
|
DataFrame(
|
|
{
|
|
"A": [
|
|
Timestamp("20130101", tz="US/Eastern"),
|
|
Timestamp("20130104", tz="US/Eastern"),
|
|
Timestamp("20130103", tz="US/Eastern"),
|
|
],
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
),
|
|
),
|
|
],
|
|
)
|
|
def test_replace_dtypes(self, frame, to_replace, value, expected):
|
|
result = getattr(frame, "replace")(to_replace, value)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_input_formats_listlike(self):
|
|
# both dicts
|
|
to_rep = {"A": np.nan, "B": 0, "C": ""}
|
|
values = {"A": 0, "B": -1, "C": "missing"}
|
|
df = DataFrame(
|
|
{"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
|
|
)
|
|
filled = df.replace(to_rep, values)
|
|
expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()}
|
|
assert_frame_equal(filled, DataFrame(expected))
|
|
|
|
result = df.replace([0, 2, 5], [5, 2, 0])
|
|
expected = DataFrame(
|
|
{"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# scalar to dict
|
|
values = {"A": 0, "B": -1, "C": "missing"}
|
|
df = DataFrame(
|
|
{"A": [np.nan, 0, np.nan], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
|
|
)
|
|
filled = df.replace(np.nan, values)
|
|
expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()}
|
|
assert_frame_equal(filled, DataFrame(expected))
|
|
|
|
# list to list
|
|
to_rep = [np.nan, 0, ""]
|
|
values = [-2, -1, "missing"]
|
|
result = df.replace(to_rep, values)
|
|
expected = df.copy()
|
|
for i in range(len(to_rep)):
|
|
expected.replace(to_rep[i], values[i], inplace=True)
|
|
assert_frame_equal(result, expected)
|
|
|
|
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
|
|
with pytest.raises(ValueError, match=msg):
|
|
df.replace(to_rep, values[1:])
|
|
|
|
def test_replace_input_formats_scalar(self):
|
|
df = DataFrame(
|
|
{"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]}
|
|
)
|
|
|
|
# dict to scalar
|
|
to_rep = {"A": np.nan, "B": 0, "C": ""}
|
|
filled = df.replace(to_rep, 0)
|
|
expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()}
|
|
assert_frame_equal(filled, DataFrame(expected))
|
|
|
|
msg = "value argument must be scalar, dict, or Series"
|
|
with pytest.raises(TypeError, match=msg):
|
|
df.replace(to_rep, [np.nan, 0, ""])
|
|
|
|
# list to scalar
|
|
to_rep = [np.nan, 0, ""]
|
|
result = df.replace(to_rep, -1)
|
|
expected = df.copy()
|
|
for i in range(len(to_rep)):
|
|
expected.replace(to_rep[i], -1, inplace=True)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_limit(self):
|
|
pass
|
|
|
|
def test_replace_dict_no_regex(self):
|
|
answer = Series(
|
|
{
|
|
0: "Strongly Agree",
|
|
1: "Agree",
|
|
2: "Neutral",
|
|
3: "Disagree",
|
|
4: "Strongly Disagree",
|
|
}
|
|
)
|
|
weights = {
|
|
"Agree": 4,
|
|
"Disagree": 2,
|
|
"Neutral": 3,
|
|
"Strongly Agree": 5,
|
|
"Strongly Disagree": 1,
|
|
}
|
|
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
|
|
result = answer.replace(weights)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_replace_series_no_regex(self):
|
|
answer = Series(
|
|
{
|
|
0: "Strongly Agree",
|
|
1: "Agree",
|
|
2: "Neutral",
|
|
3: "Disagree",
|
|
4: "Strongly Disagree",
|
|
}
|
|
)
|
|
weights = Series(
|
|
{
|
|
"Agree": 4,
|
|
"Disagree": 2,
|
|
"Neutral": 3,
|
|
"Strongly Agree": 5,
|
|
"Strongly Disagree": 1,
|
|
}
|
|
)
|
|
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
|
|
result = answer.replace(weights)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_replace_dict_tuple_list_ordering_remains_the_same(self):
|
|
df = DataFrame(dict(A=[np.nan, 1]))
|
|
res1 = df.replace(to_replace={np.nan: 0, 1: -1e8})
|
|
res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0])
|
|
res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0])
|
|
|
|
expected = DataFrame({"A": [0, -1e8]})
|
|
assert_frame_equal(res1, res2)
|
|
assert_frame_equal(res2, res3)
|
|
assert_frame_equal(res3, expected)
|
|
|
|
def test_replace_doesnt_replace_without_regex(self):
|
|
raw = """fol T_opp T_Dir T_Enh
|
|
0 1 0 0 vo
|
|
1 2 vr 0 0
|
|
2 2 0 0 0
|
|
3 3 0 bt 0"""
|
|
df = pd.read_csv(StringIO(raw), sep=r"\s+")
|
|
res = df.replace({r"\D": 1})
|
|
assert_frame_equal(df, res)
|
|
|
|
def test_replace_bool_with_string(self):
|
|
df = DataFrame({"a": [True, False], "b": list("ab")})
|
|
result = df.replace(True, "a")
|
|
expected = DataFrame({"a": ["a", False], "b": df.b})
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_pure_bool_with_string_no_op(self):
|
|
df = DataFrame(np.random.rand(2, 2) > 0.5)
|
|
result = df.replace("asdf", "fdsa")
|
|
assert_frame_equal(df, result)
|
|
|
|
def test_replace_bool_with_bool(self):
|
|
df = DataFrame(np.random.rand(2, 2) > 0.5)
|
|
result = df.replace(False, True)
|
|
expected = DataFrame(np.ones((2, 2), dtype=bool))
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_with_dict_with_bool_keys(self):
|
|
df = DataFrame({0: [True, False], 1: [False, True]})
|
|
with pytest.raises(TypeError, match="Cannot compare types .+"):
|
|
df.replace({"asdf": "asdb", True: "yes"})
|
|
|
|
def test_replace_truthy(self):
|
|
df = DataFrame({"a": [True, True]})
|
|
r = df.replace([np.inf, -np.inf], np.nan)
|
|
e = df
|
|
assert_frame_equal(r, e)
|
|
|
|
def test_replace_int_to_int_chain(self):
|
|
df = DataFrame({"a": list(range(1, 5))})
|
|
with pytest.raises(ValueError, match="Replacement not allowed .+"):
|
|
df.replace({"a": dict(zip(range(1, 5), range(2, 6)))})
|
|
|
|
def test_replace_str_to_str_chain(self):
|
|
a = np.arange(1, 5)
|
|
astr = a.astype(str)
|
|
bstr = np.arange(2, 6).astype(str)
|
|
df = DataFrame({"a": astr})
|
|
with pytest.raises(ValueError, match="Replacement not allowed .+"):
|
|
df.replace({"a": dict(zip(astr, bstr))})
|
|
|
|
def test_replace_swapping_bug(self):
|
|
df = pd.DataFrame({"a": [True, False, True]})
|
|
res = df.replace({"a": {True: "Y", False: "N"}})
|
|
expect = pd.DataFrame({"a": ["Y", "N", "Y"]})
|
|
assert_frame_equal(res, expect)
|
|
|
|
df = pd.DataFrame({"a": [0, 1, 0]})
|
|
res = df.replace({"a": {0: "Y", 1: "N"}})
|
|
expect = pd.DataFrame({"a": ["Y", "N", "Y"]})
|
|
assert_frame_equal(res, expect)
|
|
|
|
def test_replace_period(self):
|
|
d = {
|
|
"fname": {
|
|
"out_augmented_AUG_2011.json": pd.Period(year=2011, month=8, freq="M"),
|
|
"out_augmented_JAN_2011.json": pd.Period(year=2011, month=1, freq="M"),
|
|
"out_augmented_MAY_2012.json": pd.Period(year=2012, month=5, freq="M"),
|
|
"out_augmented_SUBSIDY_WEEK.json": pd.Period(
|
|
year=2011, month=4, freq="M"
|
|
),
|
|
"out_augmented_AUG_2012.json": pd.Period(year=2012, month=8, freq="M"),
|
|
"out_augmented_MAY_2011.json": pd.Period(year=2011, month=5, freq="M"),
|
|
"out_augmented_SEP_2013.json": pd.Period(year=2013, month=9, freq="M"),
|
|
}
|
|
}
|
|
|
|
df = pd.DataFrame(
|
|
[
|
|
"out_augmented_AUG_2012.json",
|
|
"out_augmented_SEP_2013.json",
|
|
"out_augmented_SUBSIDY_WEEK.json",
|
|
"out_augmented_MAY_2012.json",
|
|
"out_augmented_MAY_2011.json",
|
|
"out_augmented_AUG_2011.json",
|
|
"out_augmented_JAN_2011.json",
|
|
],
|
|
columns=["fname"],
|
|
)
|
|
assert set(df.fname.values) == set(d["fname"].keys())
|
|
# We don't support converting object -> specialized EA in
|
|
# replace yet.
|
|
expected = DataFrame(
|
|
{"fname": [d["fname"][k] for k in df.fname.values]}, dtype=object
|
|
)
|
|
result = df.replace(d)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_datetime(self):
|
|
d = {
|
|
"fname": {
|
|
"out_augmented_AUG_2011.json": pd.Timestamp("2011-08"),
|
|
"out_augmented_JAN_2011.json": pd.Timestamp("2011-01"),
|
|
"out_augmented_MAY_2012.json": pd.Timestamp("2012-05"),
|
|
"out_augmented_SUBSIDY_WEEK.json": pd.Timestamp("2011-04"),
|
|
"out_augmented_AUG_2012.json": pd.Timestamp("2012-08"),
|
|
"out_augmented_MAY_2011.json": pd.Timestamp("2011-05"),
|
|
"out_augmented_SEP_2013.json": pd.Timestamp("2013-09"),
|
|
}
|
|
}
|
|
|
|
df = pd.DataFrame(
|
|
[
|
|
"out_augmented_AUG_2012.json",
|
|
"out_augmented_SEP_2013.json",
|
|
"out_augmented_SUBSIDY_WEEK.json",
|
|
"out_augmented_MAY_2012.json",
|
|
"out_augmented_MAY_2011.json",
|
|
"out_augmented_AUG_2011.json",
|
|
"out_augmented_JAN_2011.json",
|
|
],
|
|
columns=["fname"],
|
|
)
|
|
assert set(df.fname.values) == set(d["fname"].keys())
|
|
expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
|
|
result = df.replace(d)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_datetimetz(self):
|
|
|
|
# GH 11326
|
|
# behaving poorly when presented with a datetime64[ns, tz]
|
|
df = DataFrame(
|
|
{
|
|
"A": date_range("20130101", periods=3, tz="US/Eastern"),
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
)
|
|
result = df.replace(np.nan, 1)
|
|
expected = DataFrame(
|
|
{
|
|
"A": date_range("20130101", periods=3, tz="US/Eastern"),
|
|
"B": Series([0, 1, 2], dtype="float64"),
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.fillna(1)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.replace(0, np.nan)
|
|
expected = DataFrame(
|
|
{
|
|
"A": date_range("20130101", periods=3, tz="US/Eastern"),
|
|
"B": [np.nan, np.nan, 2],
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.replace(
|
|
Timestamp("20130102", tz="US/Eastern"),
|
|
Timestamp("20130104", tz="US/Eastern"),
|
|
)
|
|
expected = DataFrame(
|
|
{
|
|
"A": [
|
|
Timestamp("20130101", tz="US/Eastern"),
|
|
Timestamp("20130104", tz="US/Eastern"),
|
|
Timestamp("20130103", tz="US/Eastern"),
|
|
],
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.copy()
|
|
result.iloc[1, 0] = np.nan
|
|
result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern"))
|
|
assert_frame_equal(result, expected)
|
|
|
|
# coerce to object
|
|
result = df.copy()
|
|
result.iloc[1, 0] = np.nan
|
|
result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific"))
|
|
expected = DataFrame(
|
|
{
|
|
"A": [
|
|
Timestamp("20130101", tz="US/Eastern"),
|
|
Timestamp("20130104", tz="US/Pacific"),
|
|
Timestamp("20130103", tz="US/Eastern"),
|
|
],
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.copy()
|
|
result.iloc[1, 0] = np.nan
|
|
result = result.replace({"A": np.nan}, Timestamp("20130104"))
|
|
expected = DataFrame(
|
|
{
|
|
"A": [
|
|
Timestamp("20130101", tz="US/Eastern"),
|
|
Timestamp("20130104"),
|
|
Timestamp("20130103", tz="US/Eastern"),
|
|
],
|
|
"B": [0, np.nan, 2],
|
|
}
|
|
)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_replace_with_empty_dictlike(self, mix_abc):
|
|
# GH 15289
|
|
df = DataFrame(mix_abc)
|
|
assert_frame_equal(df, df.replace({}))
|
|
assert_frame_equal(df, df.replace(Series([])))
|
|
|
|
assert_frame_equal(df, df.replace({"b": {}}))
|
|
assert_frame_equal(df, df.replace(Series({"b": {}})))
|
|
|
|
@pytest.mark.parametrize(
|
|
"to_replace, method, expected",
|
|
[
|
|
(0, "bfill", {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}),
|
|
(
|
|
np.nan,
|
|
"bfill",
|
|
{"A": [0, 1, 2], "B": [5.0, 7.0, 7.0], "C": ["a", "b", "c"]},
|
|
),
|
|
("d", "ffill", {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}),
|
|
(
|
|
[0, 2],
|
|
"bfill",
|
|
{"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
|
|
),
|
|
(
|
|
[1, 2],
|
|
"pad",
|
|
{"A": [0, 0, 0], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
|
|
),
|
|
(
|
|
(1, 2),
|
|
"bfill",
|
|
{"A": [0, 2, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]},
|
|
),
|
|
(
|
|
["b", "c"],
|
|
"ffill",
|
|
{"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "a", "a"]},
|
|
),
|
|
],
|
|
)
|
|
def test_replace_method(self, to_replace, method, expected):
|
|
# GH 19632
|
|
df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]})
|
|
|
|
result = df.replace(to_replace=to_replace, value=None, method=method)
|
|
expected = DataFrame(expected)
|
|
assert_frame_equal(result, expected)
|