8th day of python challenges 111-117

This commit is contained in:
abd.shallal
2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions

View File

@@ -0,0 +1,70 @@
import pytest
from pandas._config import detect_console_encoding
class MockEncoding: # TODO(py27): replace with mock
"""
Used to add a side effect when accessing the 'encoding' property. If the
side effect is a str in nature, the value will be returned. Otherwise, the
side effect should be an exception that will be raised.
"""
def __init__(self, encoding):
super().__init__()
self.val = encoding
@property
def encoding(self):
return self.raise_or_return(self.val)
@staticmethod
def raise_or_return(val):
if isinstance(val, str):
return val
else:
raise val
@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
# Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
# they have values filled.
# GH 21552
with monkeypatch.context() as context:
context.setattr("sys.{}".format(empty), MockEncoding(""))
context.setattr("sys.{}".format(filled), MockEncoding(filled))
assert detect_console_encoding() == filled
@pytest.mark.parametrize("encoding", [AttributeError, IOError, "ascii"])
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
# GH 21552
with monkeypatch.context() as context:
context.setattr("locale.getpreferredencoding", lambda: "foo")
context.setattr("sys.stdout", MockEncoding(encoding))
assert detect_console_encoding() == "foo"
@pytest.mark.parametrize(
"std,locale",
[
["ascii", "ascii"],
["ascii", Exception],
[AttributeError, "ascii"],
[AttributeError, Exception],
[IOError, "ascii"],
[IOError, Exception],
],
)
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
# When both the stdout/stdin encoding and locale preferred encoding checks
# fail (or return 'ascii', we should default to the sys default encoding.
# GH 21552
with monkeypatch.context() as context:
context.setattr(
"locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
)
context.setattr("sys.stdout", MockEncoding(std))
context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
assert detect_console_encoding() == "sysDefaultEncoding"

View File

@@ -0,0 +1,240 @@
import pytest
from pandas.util import testing as tm
from pandas.io.formats.css import CSSResolver, CSSWarning
def assert_resolves(css, props, inherited=None):
resolve = CSSResolver()
actual = resolve(css, inherited=inherited)
assert props == actual
def assert_same_resolution(css1, css2, inherited=None):
resolve = CSSResolver()
resolved1 = resolve(css1, inherited=inherited)
resolved2 = resolve(css2, inherited=inherited)
assert resolved1 == resolved2
@pytest.mark.parametrize(
"name,norm,abnorm",
[
(
"whitespace",
"hello: world; foo: bar",
" \t hello \t :\n world \n ; \n foo: \tbar\n\n",
),
("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
("empty-list", "", ";"),
],
)
def test_css_parse_normalisation(name, norm, abnorm):
assert_same_resolution(norm, abnorm)
@pytest.mark.parametrize(
"invalid_css,remainder",
[
# No colon
("hello-world", ""),
("border-style: solid; hello-world", "border-style: solid"),
(
"border-style: solid; hello-world; font-weight: bold",
"border-style: solid; font-weight: bold",
),
# Unclosed string fail
# Invalid size
("font-size: blah", "font-size: 1em"),
("font-size: 1a2b", "font-size: 1em"),
("font-size: 1e5pt", "font-size: 1em"),
("font-size: 1+6pt", "font-size: 1em"),
("font-size: 1unknownunit", "font-size: 1em"),
("font-size: 10", "font-size: 1em"),
("font-size: 10 pt", "font-size: 1em"),
],
)
def test_css_parse_invalid(invalid_css, remainder):
with tm.assert_produces_warning(CSSWarning):
assert_same_resolution(invalid_css, remainder)
# TODO: we should be checking that in other cases no warnings are raised
@pytest.mark.parametrize(
"shorthand,expansions",
[
("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
(
"border-width",
[
"border-top-width",
"border-right-width",
"border-bottom-width",
"border-left-width",
],
),
(
"border-color",
[
"border-top-color",
"border-right-color",
"border-bottom-color",
"border-left-color",
],
),
(
"border-style",
[
"border-top-style",
"border-right-style",
"border-bottom-style",
"border-left-style",
],
),
],
)
def test_css_side_shorthands(shorthand, expansions):
top, right, bottom, left = expansions
assert_resolves(
"{shorthand}: 1pt".format(shorthand=shorthand),
{top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"},
)
assert_resolves(
"{shorthand}: 1pt 4pt".format(shorthand=shorthand),
{top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"},
)
assert_resolves(
"{shorthand}: 1pt 4pt 2pt".format(shorthand=shorthand),
{top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
)
assert_resolves(
"{shorthand}: 1pt 4pt 2pt 0pt".format(shorthand=shorthand),
{top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
)
with tm.assert_produces_warning(CSSWarning):
assert_resolves(
"{shorthand}: 1pt 1pt 1pt 1pt 1pt".format(shorthand=shorthand), {}
)
@pytest.mark.parametrize(
"style,inherited,equiv",
[
("margin: 1px; margin: 2px", "", "margin: 2px"),
("margin: 1px", "margin: 2px", "margin: 1px"),
("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
(
"margin: 1px; margin-top: 2px",
"",
"margin-left: 1px; margin-right: 1px; "
+ "margin-bottom: 1px; margin-top: 2px",
),
("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
("margin: 1px", "margin-top: 2px", "margin: 1px"),
(
"margin: 1px; margin-top: inherit",
"margin: 2px",
"margin: 1px; margin-top: 2px",
),
],
)
def test_css_precedence(style, inherited, equiv):
resolve = CSSResolver()
inherited_props = resolve(inherited)
style_props = resolve(style, inherited=inherited_props)
equiv_props = resolve(equiv)
assert style_props == equiv_props
@pytest.mark.parametrize(
"style,equiv",
[
(
"margin: 1px; margin-top: inherit",
"margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
),
("margin-top: inherit", ""),
("margin-top: initial", ""),
],
)
def test_css_none_absent(style, equiv):
assert_same_resolution(style, equiv)
@pytest.mark.parametrize(
"size,resolved",
[
("xx-small", "6pt"),
("x-small", "{pt:f}pt".format(pt=7.5)),
("small", "{pt:f}pt".format(pt=9.6)),
("medium", "12pt"),
("large", "{pt:f}pt".format(pt=13.5)),
("x-large", "18pt"),
("xx-large", "24pt"),
("8px", "6pt"),
("1.25pc", "15pt"),
(".25in", "18pt"),
("02.54cm", "72pt"),
("25.4mm", "72pt"),
("101.6q", "72pt"),
("101.6q", "72pt"),
],
)
@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size
def test_css_absolute_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(
"font-size: {size}".format(size=size),
{"font-size": resolved},
inherited=inherited,
)
@pytest.mark.parametrize(
"size,relative_to,resolved",
[
("1em", None, "12pt"),
("1.0em", None, "12pt"),
("1.25em", None, "15pt"),
("1em", "16pt", "16pt"),
("1.0em", "16pt", "16pt"),
("1.25em", "16pt", "20pt"),
("1rem", "16pt", "12pt"),
("1.0rem", "16pt", "12pt"),
("1.25rem", "16pt", "15pt"),
("100%", None, "12pt"),
("125%", None, "15pt"),
("100%", "16pt", "16pt"),
("125%", "16pt", "20pt"),
("2ex", None, "12pt"),
("2.0ex", None, "12pt"),
("2.50ex", None, "15pt"),
("inherit", "16pt", "16pt"),
("smaller", None, "10pt"),
("smaller", "18pt", "15pt"),
("larger", None, "{pt:f}pt".format(pt=14.4)),
("larger", "15pt", "18pt"),
],
)
def test_css_relative_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(
"font-size: {size}".format(size=size),
{"font-size": resolved},
inherited=inherited,
)

View File

@@ -0,0 +1,235 @@
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas.util import testing as tm
import pandas.io.formats.format as fmt
class TestEngFormatter:
def test_eng_float_formatter(self):
df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
fmt.set_eng_float_format()
result = df.to_string()
expected = (
" A\n"
"0 1.410E+00\n"
"1 141.000E+00\n"
"2 14.100E+03\n"
"3 1.410E+06"
)
assert result == expected
fmt.set_eng_float_format(use_eng_prefix=True)
result = df.to_string()
expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M"
assert result == expected
fmt.set_eng_float_format(accuracy=0)
result = df.to_string()
expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06"
assert result == expected
tm.reset_display_options()
def compare(self, formatter, input, output):
formatted_input = formatter(input)
assert formatted_input == output
def compare_all(self, formatter, in_out):
"""
Parameters:
-----------
formatter: EngFormatter under test
in_out: list of tuples. Each tuple = (number, expected_formatting)
It is tested if 'formatter(number) == expected_formatting'.
*number* should be >= 0 because formatter(-number) == fmt is also
tested. *fmt* is derived from *expected_formatting*
"""
for input, output in in_out:
self.compare(formatter, input, output)
self.compare(formatter, -input, "-" + output[1:])
def test_exponents_with_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
f = np.sqrt(2)
in_out = [
(f * 10 ** -24, " 1.414y"),
(f * 10 ** -23, " 14.142y"),
(f * 10 ** -22, " 141.421y"),
(f * 10 ** -21, " 1.414z"),
(f * 10 ** -20, " 14.142z"),
(f * 10 ** -19, " 141.421z"),
(f * 10 ** -18, " 1.414a"),
(f * 10 ** -17, " 14.142a"),
(f * 10 ** -16, " 141.421a"),
(f * 10 ** -15, " 1.414f"),
(f * 10 ** -14, " 14.142f"),
(f * 10 ** -13, " 141.421f"),
(f * 10 ** -12, " 1.414p"),
(f * 10 ** -11, " 14.142p"),
(f * 10 ** -10, " 141.421p"),
(f * 10 ** -9, " 1.414n"),
(f * 10 ** -8, " 14.142n"),
(f * 10 ** -7, " 141.421n"),
(f * 10 ** -6, " 1.414u"),
(f * 10 ** -5, " 14.142u"),
(f * 10 ** -4, " 141.421u"),
(f * 10 ** -3, " 1.414m"),
(f * 10 ** -2, " 14.142m"),
(f * 10 ** -1, " 141.421m"),
(f * 10 ** 0, " 1.414"),
(f * 10 ** 1, " 14.142"),
(f * 10 ** 2, " 141.421"),
(f * 10 ** 3, " 1.414k"),
(f * 10 ** 4, " 14.142k"),
(f * 10 ** 5, " 141.421k"),
(f * 10 ** 6, " 1.414M"),
(f * 10 ** 7, " 14.142M"),
(f * 10 ** 8, " 141.421M"),
(f * 10 ** 9, " 1.414G"),
(f * 10 ** 10, " 14.142G"),
(f * 10 ** 11, " 141.421G"),
(f * 10 ** 12, " 1.414T"),
(f * 10 ** 13, " 14.142T"),
(f * 10 ** 14, " 141.421T"),
(f * 10 ** 15, " 1.414P"),
(f * 10 ** 16, " 14.142P"),
(f * 10 ** 17, " 141.421P"),
(f * 10 ** 18, " 1.414E"),
(f * 10 ** 19, " 14.142E"),
(f * 10 ** 20, " 141.421E"),
(f * 10 ** 21, " 1.414Z"),
(f * 10 ** 22, " 14.142Z"),
(f * 10 ** 23, " 141.421Z"),
(f * 10 ** 24, " 1.414Y"),
(f * 10 ** 25, " 14.142Y"),
(f * 10 ** 26, " 141.421Y"),
]
self.compare_all(formatter, in_out)
def test_exponents_without_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False)
f = np.pi
in_out = [
(f * 10 ** -24, " 3.1416E-24"),
(f * 10 ** -23, " 31.4159E-24"),
(f * 10 ** -22, " 314.1593E-24"),
(f * 10 ** -21, " 3.1416E-21"),
(f * 10 ** -20, " 31.4159E-21"),
(f * 10 ** -19, " 314.1593E-21"),
(f * 10 ** -18, " 3.1416E-18"),
(f * 10 ** -17, " 31.4159E-18"),
(f * 10 ** -16, " 314.1593E-18"),
(f * 10 ** -15, " 3.1416E-15"),
(f * 10 ** -14, " 31.4159E-15"),
(f * 10 ** -13, " 314.1593E-15"),
(f * 10 ** -12, " 3.1416E-12"),
(f * 10 ** -11, " 31.4159E-12"),
(f * 10 ** -10, " 314.1593E-12"),
(f * 10 ** -9, " 3.1416E-09"),
(f * 10 ** -8, " 31.4159E-09"),
(f * 10 ** -7, " 314.1593E-09"),
(f * 10 ** -6, " 3.1416E-06"),
(f * 10 ** -5, " 31.4159E-06"),
(f * 10 ** -4, " 314.1593E-06"),
(f * 10 ** -3, " 3.1416E-03"),
(f * 10 ** -2, " 31.4159E-03"),
(f * 10 ** -1, " 314.1593E-03"),
(f * 10 ** 0, " 3.1416E+00"),
(f * 10 ** 1, " 31.4159E+00"),
(f * 10 ** 2, " 314.1593E+00"),
(f * 10 ** 3, " 3.1416E+03"),
(f * 10 ** 4, " 31.4159E+03"),
(f * 10 ** 5, " 314.1593E+03"),
(f * 10 ** 6, " 3.1416E+06"),
(f * 10 ** 7, " 31.4159E+06"),
(f * 10 ** 8, " 314.1593E+06"),
(f * 10 ** 9, " 3.1416E+09"),
(f * 10 ** 10, " 31.4159E+09"),
(f * 10 ** 11, " 314.1593E+09"),
(f * 10 ** 12, " 3.1416E+12"),
(f * 10 ** 13, " 31.4159E+12"),
(f * 10 ** 14, " 314.1593E+12"),
(f * 10 ** 15, " 3.1416E+15"),
(f * 10 ** 16, " 31.4159E+15"),
(f * 10 ** 17, " 314.1593E+15"),
(f * 10 ** 18, " 3.1416E+18"),
(f * 10 ** 19, " 31.4159E+18"),
(f * 10 ** 20, " 314.1593E+18"),
(f * 10 ** 21, " 3.1416E+21"),
(f * 10 ** 22, " 31.4159E+21"),
(f * 10 ** 23, " 314.1593E+21"),
(f * 10 ** 24, " 3.1416E+24"),
(f * 10 ** 25, " 31.4159E+24"),
(f * 10 ** 26, " 314.1593E+24"),
]
self.compare_all(formatter, in_out)
def test_rounding(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
in_out = [
(5.55555, " 5.556"),
(55.5555, " 55.556"),
(555.555, " 555.555"),
(5555.55, " 5.556k"),
(55555.5, " 55.556k"),
(555555, " 555.555k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
in_out = [
(5.55555, " 5.6"),
(55.5555, " 55.6"),
(555.555, " 555.6"),
(5555.55, " 5.6k"),
(55555.5, " 55.6k"),
(555555, " 555.6k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True)
in_out = [
(5.55555, " 6"),
(55.5555, " 56"),
(555.555, " 556"),
(5555.55, " 6k"),
(55555.5, " 56k"),
(555555, " 556k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
result = formatter(0)
assert result == " 0.000"
def test_nan(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.nan)
assert result == "NaN"
df = pd.DataFrame(
{
"a": [1.5, 10.3, 20.5],
"b": [50.3, 60.67, 70.12],
"c": [100.2, 101.33, 120.33],
}
)
pt = df.pivot_table(values="a", index="b", columns="c")
fmt.set_eng_float_format(accuracy=1)
result = pt.to_string()
assert "NaN" in result
tm.reset_display_options()
def test_inf(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.inf)
assert result == "inf"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,205 @@
import numpy as np
import pytest
import pandas._config.config as cf
import pandas as pd
import pandas.io.formats.format as fmt
import pandas.io.formats.printing as printing
def test_adjoin():
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_repr_binary_type():
import string
letters = string.ascii_letters
try:
raw = bytes(letters, encoding=cf.get_option("display.encoding"))
except TypeError:
raw = bytes(letters)
b = str(raw.decode("utf-8"))
res = printing.pprint_thing(b, quote_strings=True)
assert res == repr(b)
res = printing.pprint_thing(b, quote_strings=False)
assert res == b
class TestFormattBase:
def test_adjoin(self):
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_adjoin_unicode(self):
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
expected = "あ dd ggg\nb ええ hhh\nc ff いいい"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
adj = fmt.EastAsianTextAdjustment()
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 13
assert adj.len(cols[1]) == 13
assert adj.len(cols[2]) == 16
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(7, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 23
assert adj.len(cols[1]) == 23
assert adj.len(cols[2]) == 26
def test_justify(self):
adj = fmt.EastAsianTextAdjustment()
def just(x, *args, **kwargs):
# wrapper to test single str
return adj.justify([x], *args, **kwargs)[0]
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("パンダ", 5, mode="left") == "パンダ"
assert just("パンダ", 5, mode="center") == "パンダ"
assert just("パンダ", 5, mode="right") == "パンダ"
assert just("パンダ", 10, mode="left") == "パンダ "
assert just("パンダ", 10, mode="center") == " パンダ "
assert just("パンダ", 10, mode="right") == " パンダ"
def test_east_asian_len(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("abc") == 3
assert adj.len("abc") == 3
assert adj.len("パンダ") == 6
assert adj.len("パンダ") == 5
assert adj.len("パンダpanda") == 11
assert adj.len("パンダpanda") == 10
def test_ambiguous_width(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 4
with cf.option_context("display.unicode.ambiguous_as_wide", True):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 6
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい"
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
class TestTableSchemaRepr:
@classmethod
def setup_class(cls):
pytest.importorskip("IPython")
from IPython.core.interactiveshell import InteractiveShell
cls.display_formatter = InteractiveShell.instance().display_formatter
def test_publishes(self):
df = pd.DataFrame({"A": [1, 2]})
objects = [df["A"], df, df] # dataframe / series
expected_keys = [
{"text/plain", "application/vnd.dataresource+json"},
{"text/plain", "text/html", "application/vnd.dataresource+json"},
]
opt = pd.option_context("display.html.table_schema", True)
for obj, expected in zip(objects, expected_keys):
with opt:
formatted = self.display_formatter.format(obj)
assert set(formatted[0].keys()) == expected
with_latex = pd.option_context("display.latex.repr", True)
with opt, with_latex:
formatted = self.display_formatter.format(obj)
expected = {
"text/plain",
"text/html",
"text/latex",
"application/vnd.dataresource+json",
}
assert set(formatted[0].keys()) == expected
def test_publishes_not_implemented(self):
# column MultiIndex
# GH 15996
midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx)
opt = pd.option_context("display.html.table_schema", True)
with opt:
formatted = self.display_formatter.format(df)
expected = {"text/plain", "text/html"}
assert set(formatted[0].keys()) == expected
def test_config_on(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", True):
result = df._repr_data_resource_()
assert result is not None
def test_config_default_off(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", False):
result = df._repr_data_resource_()
assert result is None
def test_enable_data_resource_formatter(self):
# GH 10491
formatters = self.display_formatter.formatters
mimetype = "application/vnd.dataresource+json"
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# still there, just disabled
assert "application/vnd.dataresource+json" in formatters
assert not formatters[mimetype].enabled
# able to re-set
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# smoke test that it works
self.display_formatter.format(cf)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,517 @@
import os
import sys
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, compat
from pandas.util import testing as tm
class TestToCSV:
@pytest.mark.xfail(
(3, 6, 5) > sys.version_info >= (3, 5),
reason=("Python csv library bug (see https://bugs.python.org/issue32255)"),
)
def test_to_csv_with_single_column(self):
# see gh-18676, https://bugs.python.org/issue32255
#
# Python's CSV library adds an extraneous '""'
# before the newline when the NaN-value is in
# the first row. Otherwise, only the newline
# character is added. This behavior is inconsistent
# and was patched in https://bugs.python.org/pull_request4672.
df1 = DataFrame([None, 1])
expected1 = """\
""
1.0
"""
with tm.ensure_clean("test.csv") as path:
df1.to_csv(path, header=None, index=None)
with open(path, "r") as f:
assert f.read() == expected1
df2 = DataFrame([1, None])
expected2 = """\
1.0
""
"""
with tm.ensure_clean("test.csv") as path:
df2.to_csv(path, header=None, index=None)
with open(path, "r") as f:
assert f.read() == expected2
def test_to_csv_defualt_encoding(self):
# GH17097
df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
with tm.ensure_clean("test.csv") as path:
# the default to_csv encoding is uft-8.
df.to_csv(path)
tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
def test_to_csv_quotechar(self):
df = DataFrame({"col": [1, 2]})
expected = """\
"","col"
"0","1"
"1","2"
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
with open(path, "r") as f:
assert f.read() == expected
expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, quotechar="$")
with open(path, "r") as f:
assert f.read() == expected
with tm.ensure_clean("test.csv") as path:
with pytest.raises(TypeError, match="quotechar"):
df.to_csv(path, quoting=1, quotechar=None)
def test_to_csv_doublequote(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
with open(path, "r") as f:
assert f.read() == expected
from _csv import Error
with tm.ensure_clean("test.csv") as path:
with pytest.raises(Error, match="escapechar"):
df.to_csv(path, doublequote=False) # no escapechar set
def test_to_csv_escapechar(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = """\
"","col"
"0","a\\"a"
"1","\\"bb\\""
"""
with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
with open(path, "r") as f:
assert f.read() == expected
df = DataFrame({"col": ["a,a", ",bb,"]})
expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
with open(path, "r") as f:
assert f.read() == expected
def test_csv_to_string(self):
df = DataFrame({"col": [1, 2]})
expected_rows = [",col", "0,1", "1,2"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected
def test_to_csv_decimal(self):
# see gh-781
df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected_default
expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(decimal=",", sep=";") == expected_european_excel
expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(float_format="%.2f") == expected_float_format_default
expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
assert (
df.to_csv(decimal=",", sep=";", float_format="%.2f")
== expected_float_format
)
# see gh-11553: testing if decimal is taken into account for '0.0'
df = pd.DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False, decimal="^") == expected
# same but for an index
assert df.set_index("a").to_csv(decimal="^") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
def test_to_csv_float_format(self):
# testing if float_format is taken into account for the index
# GH 11553
df = pd.DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(float_format="%.2f") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
def test_to_csv_na_rep(self):
# see gh-11553
#
# Testing if NaN values are correctly represented in the index.
df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# now with an index containing only NaNs
df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# check if na_rep parameter does not break anything when no NaN
df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
def test_to_csv_date_format(self):
# GH 10209
df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-01 00:00:01",
"2,2013-01-01 00:00:02",
"3,2013-01-01 00:00:03",
"4,2013-01-01 00:00:04",
]
expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv() == expected_default_sec
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-02 00:00:00",
"2,2013-01-03 00:00:00",
"3,2013-01-04 00:00:00",
"4,2013-01-05 00:00:00",
]
expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-01",
"2,2013-01-01",
"3,2013-01-01",
"4,2013-01-01",
]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-02",
"2,2013-01-03",
"3,2013-01-04",
"4,2013-01-05",
]
expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv() == expected_default_day
assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
# see gh-7791
#
# Testing if date_format parameter is taken into account
# for multi-indexed DataFrames.
df_sec["B"] = 0
df_sec["C"] = 1
expected_rows = ["A,B,C", "2013-01-01,0,1"]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
def test_to_csv_multi_index(self):
# see gh-6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
exp_rows = [",1", ",2", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame(
[1],
columns=pd.MultiIndex.from_arrays([[1], [2]]),
index=pd.MultiIndex.from_arrays([[1], [2]]),
)
exp_rows = [",,1", ",,2", "1,2,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
exp_rows = [",foo", ",bar", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["foo", "bar", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
@pytest.mark.parametrize(
"ind,expected",
[
(
pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
"x,data\n1.0,1\n",
),
(
pd.MultiIndex(
levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
),
"x,y,data\n1.0,2.0,1\n",
),
],
)
@pytest.mark.parametrize("klass", [pd.DataFrame, pd.Series])
def test_to_csv_single_level_multi_index(self, ind, expected, klass):
# see gh-19589
result = klass(pd.Series([1], ind, name="data")).to_csv(
line_terminator="\n", header=True
)
assert result == expected
def test_to_csv_string_array_ascii(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = pd.DataFrame(str_array)
expected_ascii = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("str_test.csv") as path:
df.to_csv(path, encoding="ascii")
with open(path, "r") as f:
assert f.read() == expected_ascii
@pytest.mark.xfail(strict=False)
def test_to_csv_string_array_utf8(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = pd.DataFrame(str_array)
expected_utf8 = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("unicode_test.csv") as path:
df.to_csv(path, encoding="utf-8")
with open(path, "r") as f:
assert f.read() == expected_utf8
def test_to_csv_string_with_lf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
df = pd.DataFrame(data)
with tm.ensure_clean("lf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_lf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\nef"'
+ os_linesep
+ b'3,"g\nh\n\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("lf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b"int,str_lf\n" b"1,abc\n" b'2,"d\nef"\n' b'3,"g\nh\n\ni"\n'
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("lf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = (
b"int,str_lf\r\n" b"1,abc\r\n" b'2,"d\nef"\r\n' b'3,"g\nh\n\ni"\r\n'
)
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_string_with_crlf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
df = pd.DataFrame(data)
with tm.ensure_clean("crlf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_crlf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\r\nef"'
+ os_linesep
+ b'3,"g\r\nh\r\n\r\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("crlf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = (
b"int,str_crlf\n" b"1,abc\n" b'2,"d\r\nef"\n' b'3,"g\r\nh\r\n\r\ni"\n'
)
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("crlf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = (
b"int,str_crlf\r\n"
b"1,abc\r\n"
b'2,"d\r\nef"\r\n'
b'3,"g\r\nh\r\n\r\ni"\r\n'
)
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_stdout_file(self, capsys):
# GH 21561
df = pd.DataFrame(
[["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"]
)
expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
df.to_csv(sys.stdout, encoding="ascii")
captured = capsys.readouterr()
assert captured.out == expected_ascii
assert not sys.stdout.closed
@pytest.mark.xfail(
compat.is_platform_windows(),
reason=(
"Especially in Windows, file stream should not be passed"
"to csv writer without newline='' option."
"(https://docs.python.org/3.6/library/csv.html#csv.writer)"
),
)
def test_to_csv_write_to_open_file(self):
# GH 21696
df = pd.DataFrame({"a": ["x", "y", "z"]})
expected = """\
manual header
x
y
z
"""
with tm.ensure_clean("test.txt") as path:
with open(path, "w") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, "r") as f:
assert f.read() == expected
def test_to_csv_write_to_open_file_with_newline_py3(self):
# see gh-21696
# see gh-20353
df = pd.DataFrame({"a": ["x", "y", "z"]})
expected_rows = ["x", "y", "z"]
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
with tm.ensure_clean("test.txt") as path:
with open(path, "w", newline="") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, "rb") as f:
assert f.read() == bytes(expected, "utf-8")
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_csv_compression(self, compression_only, read_infer, to_infer):
# see gh-15008
compression = compression_only
if compression == "zip":
pytest.skip(
"{compression} is not supported "
"for to_csv".format(compression=compression)
)
# We'll complete file extension subsequently.
filename = "test."
if compression == "gzip":
filename += "gz"
else:
# xz --> .xz
# bz2 --> .bz2
filename += compression
df = DataFrame({"A": [1]})
to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression=to_compression)
result = pd.read_csv(path, index_col=0, compression=read_compression)
tm.assert_frame_equal(result, df)

View File

@@ -0,0 +1,315 @@
"""Tests formatting as writer-agnostic ExcelCells
ExcelFormatter is tested implicitly in pandas/tests/io/test_excel.py
"""
import pytest
import pandas.util.testing as tm
from pandas.io.formats.css import CSSWarning
from pandas.io.formats.excel import CSSToExcelConverter
@pytest.mark.parametrize(
"css,expected",
[
# FONT
# - name
("font-family: foo,bar", {"font": {"name": "foo"}}),
('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
("font-family: foo,\nbar", {"font": {"name": "foo"}}),
("font-family: foo, bar, baz", {"font": {"name": "foo"}}),
("font-family: bar, foo", {"font": {"name": "bar"}}),
("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
# - family
("font-family: serif", {"font": {"name": "serif", "family": 1}}),
("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
("font-family: roman, sans serif", {"font": {"name": "roman"}}),
("font-family: roman, sansserif", {"font": {"name": "roman"}}),
("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
# - size
("font-size: 1em", {"font": {"size": 12}}),
("font-size: xx-small", {"font": {"size": 6}}),
("font-size: x-small", {"font": {"size": 7.5}}),
("font-size: small", {"font": {"size": 9.6}}),
("font-size: medium", {"font": {"size": 12}}),
("font-size: large", {"font": {"size": 13.5}}),
("font-size: x-large", {"font": {"size": 18}}),
("font-size: xx-large", {"font": {"size": 24}}),
("font-size: 50%", {"font": {"size": 6}}),
# - bold
("font-weight: 100", {"font": {"bold": False}}),
("font-weight: 200", {"font": {"bold": False}}),
("font-weight: 300", {"font": {"bold": False}}),
("font-weight: 400", {"font": {"bold": False}}),
("font-weight: normal", {"font": {"bold": False}}),
("font-weight: lighter", {"font": {"bold": False}}),
("font-weight: bold", {"font": {"bold": True}}),
("font-weight: bolder", {"font": {"bold": True}}),
("font-weight: 700", {"font": {"bold": True}}),
("font-weight: 800", {"font": {"bold": True}}),
("font-weight: 900", {"font": {"bold": True}}),
# - italic
("font-style: italic", {"font": {"italic": True}}),
("font-style: oblique", {"font": {"italic": True}}),
# - underline
("text-decoration: underline", {"font": {"underline": "single"}}),
("text-decoration: overline", {}),
("text-decoration: none", {}),
# - strike
("text-decoration: line-through", {"font": {"strike": True}}),
(
"text-decoration: underline line-through",
{"font": {"strike": True, "underline": "single"}},
),
(
"text-decoration: underline; text-decoration: line-through",
{"font": {"strike": True}},
),
# - color
("color: red", {"font": {"color": "FF0000"}}),
("color: #ff0000", {"font": {"color": "FF0000"}}),
("color: #f0a", {"font": {"color": "FF00AA"}}),
# - shadow
("text-shadow: none", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
("text-shadow: 0px -2em", {"font": {"shadow": True}}),
# FILL
# - color, fillType
(
"background-color: red",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #ff0000",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #f0a",
{"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
),
# BORDER
# - style
(
"border-style: solid",
{
"border": {
"top": {"style": "medium"},
"bottom": {"style": "medium"},
"left": {"style": "medium"},
"right": {"style": "medium"},
}
},
),
(
"border-style: solid; border-width: thin",
{
"border": {
"top": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
"right": {"style": "thin"},
}
},
),
(
"border-top-style: solid; border-top-width: thin",
{"border": {"top": {"style": "thin"}}},
),
(
"border-top-style: solid; border-top-width: 1pt",
{"border": {"top": {"style": "thin"}}},
),
("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
(
"border-top-style: solid; border-top-width: medium",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: 2pt",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: thick",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: solid; border-top-width: 4pt",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: dotted",
{"border": {"top": {"style": "mediumDashDotDot"}}},
),
(
"border-top-style: dotted; border-top-width: thin",
{"border": {"top": {"style": "dotted"}}},
),
("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
(
"border-top-style: dashed; border-top-width: thin",
{"border": {"top": {"style": "dashed"}}},
),
("border-top-style: double", {"border": {"top": {"style": "double"}}}),
# - color
(
"border-style: solid; border-color: #0000ff",
{
"border": {
"top": {"style": "medium", "color": "0000FF"},
"right": {"style": "medium", "color": "0000FF"},
"bottom": {"style": "medium", "color": "0000FF"},
"left": {"style": "medium", "color": "0000FF"},
}
},
),
(
"border-top-style: double; border-top-color: blue",
{"border": {"top": {"style": "double", "color": "0000FF"}}},
),
(
"border-top-style: solid; border-top-color: #06c",
{"border": {"top": {"style": "medium", "color": "0066CC"}}},
),
# ALIGNMENT
# - horizontal
("text-align: center", {"alignment": {"horizontal": "center"}}),
("text-align: left", {"alignment": {"horizontal": "left"}}),
("text-align: right", {"alignment": {"horizontal": "right"}}),
("text-align: justify", {"alignment": {"horizontal": "justify"}}),
# - vertical
("vertical-align: top", {"alignment": {"vertical": "top"}}),
("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
("vertical-align: middle", {"alignment": {"vertical": "center"}}),
("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
# - wrap_text
("white-space: nowrap", {"alignment": {"wrap_text": False}}),
("white-space: pre", {"alignment": {"wrap_text": False}}),
("white-space: pre-line", {"alignment": {"wrap_text": False}}),
("white-space: normal", {"alignment": {"wrap_text": True}}),
# NUMBER FORMAT
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
],
)
def test_css_to_excel(css, expected):
convert = CSSToExcelConverter()
assert expected == convert(css)
def test_css_to_excel_multiple():
convert = CSSToExcelConverter()
actual = convert(
"""
font-weight: bold;
text-decoration: underline;
color: red;
border-width: thin;
text-align: center;
vertical-align: top;
unused: something;
"""
)
assert {
"font": {"bold": True, "underline": "single", "color": "FF0000"},
"border": {
"top": {"style": "thin"},
"right": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
},
"alignment": {"horizontal": "center", "vertical": "top"},
} == actual
@pytest.mark.parametrize(
"css,inherited,expected",
[
("font-weight: bold", "", {"font": {"bold": True}}),
("", "font-weight: bold", {"font": {"bold": True}}),
(
"font-weight: bold",
"font-style: italic",
{"font": {"bold": True, "italic": True}},
),
("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
("font-style: inherit", "", {}),
(
"font-style: normal; font-style: inherit",
"font-style: italic",
{"font": {"italic": True}},
),
],
)
def test_css_to_excel_inherited(css, inherited, expected):
convert = CSSToExcelConverter(inherited)
assert expected == convert(css)
@pytest.mark.parametrize(
"input_color,output_color",
(
[(name, rgb) for name, rgb in CSSToExcelConverter.NAMED_COLORS.items()]
+ [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
+ [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
),
)
def test_css_to_excel_good_colors(input_color, output_color):
# see gh-18392
css = (
"border-top-color: {color}; "
"border-right-color: {color}; "
"border-bottom-color: {color}; "
"border-left-color: {color}; "
"background-color: {color}; "
"color: {color}"
).format(color=input_color)
expected = dict()
expected["fill"] = {"patternType": "solid", "fgColor": output_color}
expected["font"] = {"color": output_color}
expected["border"] = {
k: {"color": output_color} for k in ("top", "right", "bottom", "left")
}
with tm.assert_produces_warning(None):
convert = CSSToExcelConverter()
assert expected == convert(css)
@pytest.mark.parametrize("input_color", [None, "not-a-color"])
def test_css_to_excel_bad_colors(input_color):
# see gh-18392
css = (
"border-top-color: {color}; "
"border-right-color: {color}; "
"border-bottom-color: {color}; "
"border-left-color: {color}; "
"background-color: {color}; "
"color: {color}"
).format(color=input_color)
expected = dict()
if input_color is not None:
expected["fill"] = {"patternType": "solid"}
with tm.assert_produces_warning(CSSWarning):
convert = CSSToExcelConverter()
assert expected == convert(css)

View File

@@ -0,0 +1,715 @@
from datetime import datetime
from io import StringIO
import re
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, option_context
from pandas.util import testing as tm
import pandas.io.formats.format as fmt
lorem_ipsum = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod"
" tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim"
" veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex"
" ea commodo consequat. Duis aute irure dolor in reprehenderit in"
" voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur"
" sint occaecat cupidatat non proident, sunt in culpa qui officia"
" deserunt mollit anim id est laborum."
)
def expected_html(datapath, name):
"""
Read HTML file from formats data directory.
Parameters
----------
datapath : pytest fixture
The datapath fixture injected into a test by pytest.
name : str
The name of the HTML file without the suffix.
Returns
-------
str : contents of HTML file.
"""
filename = ".".join([name, "html"])
filepath = datapath("io", "formats", "data", "html", filename)
with open(filepath, encoding="utf-8") as f:
html = f.read()
return html.rstrip()
@pytest.fixture(params=["mixed", "empty"])
def biggie_df_fixture(request):
"""Fixture for a big mixed Dataframe and an empty Dataframe"""
if request.param == "mixed":
df = DataFrame(
{"A": np.random.randn(200), "B": tm.makeStringIndex(200)},
index=np.arange(200),
)
df.loc[:20, "A"] = np.nan
df.loc[:20, "B"] = np.nan
return df
elif request.param == "empty":
df = DataFrame(index=np.arange(200))
return df
@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS)
def justify(request):
return request.param
@pytest.mark.parametrize("col_space", [30, 50])
def test_to_html_with_col_space(col_space):
df = DataFrame(np.random.random(size=(1, 3)))
# check that col_space affects HTML generation
# and be very brittle about it.
result = df.to_html(col_space=col_space)
hdrs = [x for x in result.split(r"\n") if re.search(r"<th[>\s]", x)]
assert len(hdrs) > 0
for h in hdrs:
assert "min-width" in h
assert str(col_space) in h
def test_to_html_with_empty_string_label():
# GH 3547, to_html regards empty string labels as repeated labels
data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}
df = DataFrame(data).set_index(["c1", "c2"])
result = df.to_html()
assert "rowspan" not in result
@pytest.mark.parametrize(
"df,expected",
[
(DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"),
(DataFrame({"A": ["\u03c3"]}), "unicode_2"),
],
)
def test_to_html_unicode(df, expected, datapath):
expected = expected_html(datapath, expected)
result = df.to_html()
assert result == expected
def test_to_html_decimal(datapath):
# GH 12031
df = DataFrame({"A": [6.0, 3.1, 2.2]})
result = df.to_html(decimal=",")
expected = expected_html(datapath, "gh12031_expected_output")
assert result == expected
@pytest.mark.parametrize(
"kwargs,string,expected",
[
(dict(), "<type 'str'>", "escaped"),
(dict(escape=False), "<b>bold</b>", "escape_disabled"),
],
)
def test_to_html_escaped(kwargs, string, expected, datapath):
a = "str<ing1 &amp;"
b = "stri>ng2 &amp;"
test_dict = {"co<l1": {a: string, b: string}, "co>l2": {a: string, b: string}}
result = DataFrame(test_dict).to_html(**kwargs)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize("index_is_named", [True, False])
def test_to_html_multiindex_index_false(index_is_named, datapath):
# GH 8452
df = DataFrame(
{"a": range(2), "b": range(3, 5), "c": range(5, 7), "d": range(3, 5)}
)
df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
if index_is_named:
df.index = Index(df.index.values, name="idx")
result = df.to_html(index=False)
expected = expected_html(datapath, "gh8452_expected_output")
assert result == expected
@pytest.mark.parametrize(
"multi_sparse,expected",
[
(False, "multiindex_sparsify_false_multi_sparse_1"),
(False, "multiindex_sparsify_false_multi_sparse_2"),
(True, "multiindex_sparsify_1"),
(True, "multiindex_sparsify_2"),
],
)
def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath):
index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None])
df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
if expected.endswith("2"):
df.columns = index[::2]
with option_context("display.multi_sparse", multi_sparse):
result = df.to_html()
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"max_rows,expected",
[
(60, "gh14882_expected_output_1"),
# Test that ... appears in a middle level
(56, "gh14882_expected_output_2"),
],
)
def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath):
# GH 14882 - Issue on truncation with odd length DataFrame
index = MultiIndex.from_product(
[[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=["a", "b", "c"]
)
df = DataFrame({"n": range(len(index))}, index=index)
result = df.to_html(max_rows=max_rows)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"df,formatters,expected",
[
(
DataFrame(
[[0, 1], [2, 3], [4, 5], [6, 7]],
columns=["foo", None],
index=np.arange(4),
),
{"__index__": lambda x: "abcd"[x]},
"index_formatter",
),
(
DataFrame({"months": [datetime(2016, 1, 1), datetime(2016, 2, 2)]}),
{"months": lambda x: x.strftime("%Y-%m")},
"datetime64_monthformatter",
),
(
DataFrame(
{
"hod": pd.to_datetime(
["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f"
)
}
),
{"hod": lambda x: x.strftime("%H:%M")},
"datetime64_hourformatter",
),
],
)
def test_to_html_formatters(df, formatters, expected, datapath):
expected = expected_html(datapath, expected)
result = df.to_html(formatters=formatters)
assert result == expected
def test_to_html_regression_GH6098():
df = DataFrame(
{
"clé1": ["a", "a", "b", "b", "a"],
"clé2": ["1er", "2ème", "1er", "2ème", "1er"],
"données1": np.random.randn(5),
"données2": np.random.randn(5),
}
)
# it works
df.pivot_table(index=["clé1"], columns=["clé2"])._repr_html_()
def test_to_html_truncate(datapath):
index = pd.date_range(start="20010101", freq="D", periods=20)
df = DataFrame(index=index, columns=range(20))
result = df.to_html(max_rows=8, max_cols=4)
expected = expected_html(datapath, "truncate")
assert result == expected
@pytest.mark.parametrize(
"sparsify,expected",
[(True, "truncate_multi_index"), (False, "truncate_multi_index_sparse_off")],
)
def test_to_html_truncate_multi_index(sparsify, expected, datapath):
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
df = DataFrame(index=arrays, columns=arrays)
result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"option,result,expected",
[
(None, lambda df: df.to_html(), "1"),
(None, lambda df: df.to_html(border=0), "0"),
(0, lambda df: df.to_html(), "0"),
(0, lambda df: df._repr_html_(), "0"),
],
)
def test_to_html_border(option, result, expected):
df = DataFrame({"A": [1, 2]})
if option is None:
result = result(df)
else:
with option_context("display.html.border", option):
result = result(df)
expected = 'border="{}"'.format(expected)
assert expected in result
@pytest.mark.parametrize("biggie_df_fixture", ["mixed"], indirect=True)
def test_to_html(biggie_df_fixture):
# TODO: split this test
df = biggie_df_fixture
s = df.to_html()
buf = StringIO()
retval = df.to_html(buf=buf)
assert retval is None
assert buf.getvalue() == s
assert isinstance(s, str)
df.to_html(columns=["B", "A"], col_space=17)
df.to_html(columns=["B", "A"], formatters={"A": lambda x: "{x:.1f}".format(x=x)})
df.to_html(columns=["B", "A"], float_format=str)
df.to_html(columns=["B", "A"], col_space=12, float_format=str)
@pytest.mark.parametrize("biggie_df_fixture", ["empty"], indirect=True)
def test_to_html_empty_dataframe(biggie_df_fixture):
df = biggie_df_fixture
df.to_html()
def test_to_html_filename(biggie_df_fixture, tmpdir):
df = biggie_df_fixture
expected = df.to_html()
path = tmpdir.join("test.html")
df.to_html(path)
result = path.read()
assert result == expected
def test_to_html_with_no_bold():
df = DataFrame({"x": np.random.randn(5)})
html = df.to_html(bold_rows=False)
result = html[html.find("</thead>")]
assert "<strong" not in result
def test_to_html_columns_arg(float_frame):
result = float_frame.to_html(columns=["A"])
assert "<th>B</th>" not in result
@pytest.mark.parametrize(
"columns,justify,expected",
[
(
MultiIndex.from_tuples(
list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
names=["CL0", "CL1"],
),
"left",
"multiindex_1",
),
(
MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
"right",
"multiindex_2",
),
],
)
def test_to_html_multiindex(columns, justify, expected, datapath):
df = DataFrame([list("abcd"), list("efgh")], columns=columns)
result = df.to_html(justify=justify)
expected = expected_html(datapath, expected)
assert result == expected
def test_to_html_justify(justify, datapath):
df = DataFrame(
{"A": [6, 30000, 2], "B": [1, 2, 70000], "C": [223442, 0, 1]},
columns=["A", "B", "C"],
)
result = df.to_html(justify=justify)
expected = expected_html(datapath, "justify").format(justify=justify)
assert result == expected
@pytest.mark.parametrize(
"justify", ["super-right", "small-left", "noinherit", "tiny", "pandas"]
)
def test_to_html_invalid_justify(justify):
# GH 17527
df = DataFrame()
msg = "Invalid value for justify parameter"
with pytest.raises(ValueError, match=msg):
df.to_html(justify=justify)
def test_to_html_index(datapath):
# TODO: split this test
index = ["foo", "bar", "baz"]
df = DataFrame(
{"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]},
columns=["A", "B", "C"],
index=index,
)
expected_with_index = expected_html(datapath, "index_1")
assert df.to_html() == expected_with_index
expected_without_index = expected_html(datapath, "index_2")
result = df.to_html(index=False)
for i in index:
assert i not in result
assert result == expected_without_index
df.index = Index(["foo", "bar", "baz"], name="idx")
expected_with_index = expected_html(datapath, "index_3")
assert df.to_html() == expected_with_index
assert df.to_html(index=False) == expected_without_index
tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")]
df.index = MultiIndex.from_tuples(tuples)
expected_with_index = expected_html(datapath, "index_4")
assert df.to_html() == expected_with_index
result = df.to_html(index=False)
for i in ["foo", "bar", "car", "bike"]:
assert i not in result
# must be the same result as normal index
assert result == expected_without_index
df.index = MultiIndex.from_tuples(tuples, names=["idx1", "idx2"])
expected_with_index = expected_html(datapath, "index_5")
assert df.to_html() == expected_with_index
assert df.to_html(index=False) == expected_without_index
@pytest.mark.parametrize("classes", ["sortable draggable", ["sortable", "draggable"]])
def test_to_html_with_classes(classes, datapath):
df = DataFrame()
expected = expected_html(datapath, "with_classes")
result = df.to_html(classes=classes)
assert result == expected
def test_to_html_no_index_max_rows(datapath):
# GH 14998
df = DataFrame({"A": [1, 2, 3, 4]})
result = df.to_html(index=False, max_rows=1)
expected = expected_html(datapath, "gh14998_expected_output")
assert result == expected
def test_to_html_multiindex_max_cols(datapath):
# GH 6131
index = MultiIndex(
levels=[["ba", "bb", "bc"], ["ca", "cb", "cc"]],
codes=[[0, 1, 2], [0, 1, 2]],
names=["b", "c"],
)
columns = MultiIndex(
levels=[["d"], ["aa", "ab", "ac"]],
codes=[[0, 0, 0], [0, 1, 2]],
names=[None, "a"],
)
data = np.array(
[[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]]
)
df = DataFrame(data, index, columns)
result = df.to_html(max_cols=2)
expected = expected_html(datapath, "gh6131_expected_output")
assert result == expected
def test_to_html_multi_indexes_index_false(datapath):
# GH 22579
df = DataFrame(
{"a": range(10), "b": range(10, 20), "c": range(10, 20), "d": range(10, 20)}
)
df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
df.index = MultiIndex.from_product([["a", "b"], ["c", "d", "e", "f", "g"]])
result = df.to_html(index=False)
expected = expected_html(datapath, "gh22579_expected_output")
assert result == expected
@pytest.mark.parametrize("index_names", [True, False])
@pytest.mark.parametrize("header", [True, False])
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"column_index, column_type",
[
(Index([0, 1]), "unnamed_standard"),
(Index([0, 1], name="columns.name"), "named_standard"),
(MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
(
MultiIndex.from_product(
[["a"], ["b", "c"]], names=["columns.name.0", "columns.name.1"]
),
"named_multi",
),
],
)
@pytest.mark.parametrize(
"row_index, row_type",
[
(Index([0, 1]), "unnamed_standard"),
(Index([0, 1], name="index.name"), "named_standard"),
(MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
(
MultiIndex.from_product(
[["a"], ["b", "c"]], names=["index.name.0", "index.name.1"]
),
"named_multi",
),
],
)
def test_to_html_basic_alignment(
datapath, row_index, row_type, column_index, column_type, index, header, index_names
):
# GH 22747, GH 22579
df = DataFrame(np.zeros((2, 2), dtype=int), index=row_index, columns=column_index)
result = df.to_html(index=index, header=header, index_names=index_names)
if not index:
row_type = "none"
elif not index_names and row_type.startswith("named"):
row_type = "un" + row_type
if not header:
column_type = "none"
elif not index_names and column_type.startswith("named"):
column_type = "un" + column_type
filename = "index_" + row_type + "_columns_" + column_type
expected = expected_html(datapath, filename)
assert result == expected
@pytest.mark.parametrize("index_names", [True, False])
@pytest.mark.parametrize("header", [True, False])
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"column_index, column_type",
[
(Index(np.arange(8)), "unnamed_standard"),
(Index(np.arange(8), name="columns.name"), "named_standard"),
(
MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
"unnamed_multi",
),
(
MultiIndex.from_product(
[["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
),
"named_multi",
),
],
)
@pytest.mark.parametrize(
"row_index, row_type",
[
(Index(np.arange(8)), "unnamed_standard"),
(Index(np.arange(8), name="index.name"), "named_standard"),
(
MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
"unnamed_multi",
),
(
MultiIndex.from_product(
[["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
),
"named_multi",
),
],
)
def test_to_html_alignment_with_truncation(
datapath, row_index, row_type, column_index, column_type, index, header, index_names
):
# GH 22747, GH 22579
df = DataFrame(np.arange(64).reshape(8, 8), index=row_index, columns=column_index)
result = df.to_html(
max_rows=4, max_cols=4, index=index, header=header, index_names=index_names
)
if not index:
row_type = "none"
elif not index_names and row_type.startswith("named"):
row_type = "un" + row_type
if not header:
column_type = "none"
elif not index_names and column_type.startswith("named"):
column_type = "un" + column_type
filename = "trunc_df_index_" + row_type + "_columns_" + column_type
expected = expected_html(datapath, filename)
assert result == expected
@pytest.mark.parametrize("index", [False, 0])
def test_to_html_truncation_index_false_max_rows(datapath, index):
# GH 15019
data = [
[1.764052, 0.400157],
[0.978738, 2.240893],
[1.867558, -0.977278],
[0.950088, -0.151357],
[-0.103219, 0.410599],
]
df = DataFrame(data)
result = df.to_html(max_rows=4, index=index)
expected = expected_html(datapath, "gh15019_expected_output")
assert result == expected
@pytest.mark.parametrize("index", [False, 0])
@pytest.mark.parametrize(
"col_index_named, expected_output",
[(False, "gh22783_expected_output"), (True, "gh22783_named_columns_index")],
)
def test_to_html_truncation_index_false_max_cols(
datapath, index, col_index_named, expected_output
):
# GH 22783
data = [
[1.764052, 0.400157, 0.978738, 2.240893, 1.867558],
[-0.977278, 0.950088, -0.151357, -0.103219, 0.410599],
]
df = DataFrame(data)
if col_index_named:
df.columns.rename("columns.name", inplace=True)
result = df.to_html(max_cols=4, index=index)
expected = expected_html(datapath, expected_output)
assert result == expected
@pytest.mark.parametrize("notebook", [True, False])
def test_to_html_notebook_has_style(notebook):
df = DataFrame({"A": [1, 2, 3]})
result = df.to_html(notebook=notebook)
if notebook:
assert "tbody tr th:only-of-type" in result
assert "vertical-align: middle;" in result
assert "thead th" in result
else:
assert "tbody tr th:only-of-type" not in result
assert "vertical-align: middle;" not in result
assert "thead th" not in result
def test_to_html_with_index_names_false():
# GH 16493
df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
result = df.to_html(index_names=False)
assert "myindexname" not in result
def test_to_html_with_id():
# GH 8496
df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
result = df.to_html(index_names=False, table_id="TEST_ID")
assert ' id="TEST_ID"' in result
@pytest.mark.parametrize(
"value,float_format,expected",
[
(0.19999, "%.3f", "gh21625_expected_output"),
(100.0, "%.0f", "gh22270_expected_output"),
],
)
def test_to_html_float_format_no_fixed_width(value, float_format, expected, datapath):
# GH 21625, GH 22270
df = DataFrame({"x": [value]})
expected = expected_html(datapath, expected)
result = df.to_html(float_format=float_format)
assert result == expected
@pytest.mark.parametrize(
"render_links,expected",
[(True, "render_links_true"), (False, "render_links_false")],
)
def test_to_html_render_links(render_links, expected, datapath):
# GH 2679
data = [
[0, "http://pandas.pydata.org/?q1=a&q2=b", "pydata.org"],
[0, "www.pydata.org", "pydata.org"],
]
df = DataFrame(data, columns=["foo", "bar", None])
result = df.to_html(render_links=render_links)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"method,expected",
[
("to_html", lambda x: lorem_ipsum),
("_repr_html_", lambda x: lorem_ipsum[: x - 4] + "..."), # regression case
],
)
@pytest.mark.parametrize("max_colwidth", [10, 20, 50, 100])
def test_ignore_display_max_colwidth(method, expected, max_colwidth):
# see gh-17004
df = DataFrame([lorem_ipsum])
with pd.option_context("display.max_colwidth", max_colwidth):
result = getattr(df, method)()
expected = expected(max_colwidth)
assert expected in result
@pytest.mark.parametrize("classes", [True, 0])
def test_to_html_invalid_classes_type(classes):
# GH 25608
df = DataFrame()
msg = "classes must be a string, list, or tuple"
with pytest.raises(TypeError, match=msg):
df.to_html(classes=classes)
def test_to_html_round_column_headers():
# GH 17280
df = DataFrame([1], columns=[0.55555])
with pd.option_context("display.precision", 3):
html = df.to_html(notebook=False)
notebook = df.to_html(notebook=True)
assert "0.55555" in html
assert "0.556" in notebook
@pytest.mark.parametrize("unit", ["100px", "10%", "5em", 150])
def test_to_html_with_col_space_units(unit):
# GH 25941
df = DataFrame(np.random.random(size=(1, 3)))
result = df.to_html(col_space=unit)
result = result.split("tbody")[0]
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
if isinstance(unit, int):
unit = str(unit) + "px"
for h in hdrs:
expected = '<th style="min-width: {unit};">'.format(unit=unit)
assert expected in h

View File

@@ -0,0 +1,750 @@
import codecs
from datetime import datetime
import pytest
import pandas as pd
from pandas import DataFrame, Series
from pandas.util import testing as tm
class TestToLatex:
def test_to_latex_filename(self, float_frame):
with tm.ensure_clean("test.tex") as path:
float_frame.to_latex(path)
with open(path, "r") as f:
assert float_frame.to_latex() == f.read()
# test with utf-8 and encoding option (GH 7061)
df = DataFrame([["au\xdfgangen"]])
with tm.ensure_clean("test.tex") as path:
df.to_latex(path, encoding="utf-8")
with codecs.open(path, "r", encoding="utf-8") as f:
assert df.to_latex() == f.read()
# test with utf-8 without encoding option
with tm.ensure_clean("test.tex") as path:
df.to_latex(path)
with codecs.open(path, "r", encoding="utf-8") as f:
assert df.to_latex() == f.read()
def test_to_latex(self, float_frame):
# it works!
float_frame.to_latex()
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
withindex_result = df.to_latex()
withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
withoutindex_result = df.to_latex(index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withoutindex_result == withoutindex_expected
def test_to_latex_format(self, float_frame):
# GH Bug #9402
float_frame.to_latex(column_format="ccc")
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
withindex_result = df.to_latex(column_format="ccc")
withindex_expected = r"""\begin{tabular}{ccc}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
def test_to_latex_empty(self):
df = DataFrame()
result = df.to_latex()
expected = r"""\begin{tabular}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\bottomrule
\end{tabular}
"""
assert result == expected
result = df.to_latex(longtable=True)
expected = r"""\begin{longtable}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\end{longtable}
"""
assert result == expected
def test_to_latex_with_formatters(self):
df = DataFrame(
{
"datetime64": [
datetime(2016, 1, 1),
datetime(2016, 2, 5),
datetime(2016, 3, 3),
],
"float": [1.0, 2.0, 3.0],
"int": [1, 2, 3],
"object": [(1, 2), True, False],
}
)
formatters = {
"datetime64": lambda x: x.strftime("%Y-%m"),
"float": lambda x: "[{x: 4.1f}]".format(x=x),
"int": lambda x: "0x{x:x}".format(x=x),
"object": lambda x: "-{x!s}-".format(x=x),
"__index__": lambda x: "index: {x}".format(x=x),
}
result = df.to_latex(formatters=dict(formatters))
expected = r"""\begin{tabular}{llrrl}
\toprule
{} & datetime64 & float & int & object \\
\midrule
index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\
index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\
index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\
\bottomrule
\end{tabular}
"""
assert result == expected
def test_to_latex_multiindex(self):
df = DataFrame({("x", "y"): ["a"]})
result = df.to_latex()
expected = r"""\begin{tabular}{ll}
\toprule
{} & x \\
{} & y \\
\midrule
0 & a \\
\bottomrule
\end{tabular}
"""
assert result == expected
result = df.T.to_latex()
expected = r"""\begin{tabular}{lll}
\toprule
& & 0 \\
\midrule
x & y & a \\
\bottomrule
\end{tabular}
"""
assert result == expected
df = DataFrame.from_dict(
{
("c1", 0): pd.Series({x: x for x in range(4)}),
("c1", 1): pd.Series({x: x + 4 for x in range(4)}),
("c2", 0): pd.Series({x: x for x in range(4)}),
("c2", 1): pd.Series({x: x + 4 for x in range(4)}),
("c3", 0): pd.Series({x: x for x in range(4)}),
}
).T
result = df.to_latex()
expected = r"""\begin{tabular}{llrrrr}
\toprule
& & 0 & 1 & 2 & 3 \\
\midrule
c1 & 0 & 0 & 1 & 2 & 3 \\
& 1 & 4 & 5 & 6 & 7 \\
c2 & 0 & 0 & 1 & 2 & 3 \\
& 1 & 4 & 5 & 6 & 7 \\
c3 & 0 & 0 & 1 & 2 & 3 \\
\bottomrule
\end{tabular}
"""
assert result == expected
# GH 14184
df = df.T
df.columns.names = ["a", "b"]
result = df.to_latex()
expected = r"""\begin{tabular}{lrrrrr}
\toprule
a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
b & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 4 & 0 & 4 & 0 \\
1 & 1 & 5 & 1 & 5 & 1 \\
2 & 2 & 6 & 2 & 6 & 2 \\
3 & 3 & 7 & 3 & 7 & 3 \\
\bottomrule
\end{tabular}
"""
assert result == expected
# GH 10660
df = pd.DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]})
result = df.set_index(["a", "b"]).to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & c \\
a & b & \\
\midrule
0 & a & 1 \\
& b & 2 \\
1 & a & 3 \\
& b & 4 \\
\bottomrule
\end{tabular}
"""
assert result == expected
result = df.groupby("a").describe().to_latex()
expected = r"""\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{8}{l}{c} \\
{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\
a & & & & & & & & \\
\midrule
0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\
1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\
\bottomrule
\end{tabular}
"""
assert result == expected
def test_to_latex_multiindex_dupe_level(self):
# see gh-14484
#
# If an index is repeated in subsequent rows, it should be
# replaced with a blank in the created table. This should
# ONLY happen if all higher order indices (to the left) are
# equal too. In this test, 'c' has to be printed both times
# because the higher order index 'A' != 'B'.
df = pd.DataFrame(
index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"]
)
result = df.to_latex()
expected = r"""\begin{tabular}{lll}
\toprule
& & col \\
\midrule
A & c & NaN \\
B & c & NaN \\
\bottomrule
\end{tabular}
"""
assert result == expected
def test_to_latex_multicolumnrow(self):
df = pd.DataFrame(
{
("c1", 0): {x: x for x in range(5)},
("c1", 1): {x: x + 5 for x in range(5)},
("c2", 0): {x: x for x in range(5)},
("c2", 1): {x: x + 5 for x in range(5)},
("c3", 0): {x: x for x in range(5)},
}
)
result = df.to_latex()
expected = r"""\begin{tabular}{lrrrrr}
\toprule
{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
{} & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 5 & 0 & 5 & 0 \\
1 & 1 & 6 & 1 & 6 & 1 \\
2 & 2 & 7 & 2 & 7 & 2 \\
3 & 3 & 8 & 3 & 8 & 3 \\
4 & 4 & 9 & 4 & 9 & 4 \\
\bottomrule
\end{tabular}
"""
assert result == expected
result = df.to_latex(multicolumn=False)
expected = r"""\begin{tabular}{lrrrrr}
\toprule
{} & c1 & & c2 & & c3 \\
{} & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 5 & 0 & 5 & 0 \\
1 & 1 & 6 & 1 & 6 & 1 \\
2 & 2 & 7 & 2 & 7 & 2 \\
3 & 3 & 8 & 3 & 8 & 3 \\
4 & 4 & 9 & 4 & 9 & 4 \\
\bottomrule
\end{tabular}
"""
assert result == expected
result = df.T.to_latex(multirow=True)
expected = r"""\begin{tabular}{llrrrrr}
\toprule
& & 0 & 1 & 2 & 3 & 4 \\
\midrule
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
\bottomrule
\end{tabular}
"""
assert result == expected
df.index = df.T.index
result = df.T.to_latex(multirow=True, multicolumn=True, multicolumn_format="c")
expected = r"""\begin{tabular}{llrrrrr}
\toprule
& & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
& & 0 & 1 & 0 & 1 & 0 \\
\midrule
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
\bottomrule
\end{tabular}
"""
assert result == expected
def test_to_latex_escape(self):
a = "a"
b = "b"
test_dict = {"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}}
unescaped_result = DataFrame(test_dict).to_latex(escape=False)
escaped_result = DataFrame(test_dict).to_latex() # default: escape=True
unescaped_expected = r"""\begin{tabular}{lll}
\toprule
{} & co$e^x$ & co^l1 \\
\midrule
a & a & a \\
b & b & b \\
\bottomrule
\end{tabular}
"""
escaped_expected = r"""\begin{tabular}{lll}
\toprule
{} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\
\midrule
a & a & a \\
b & b & b \\
\bottomrule
\end{tabular}
"""
assert unescaped_result == unescaped_expected
assert escaped_result == escaped_expected
def test_to_latex_special_escape(self):
df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"])
escaped_result = df.to_latex()
escaped_expected = r"""\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & a\textbackslash b\textbackslash c \\
1 & \textasciicircum a\textasciicircum b\textasciicircum c \\
2 & \textasciitilde a\textasciitilde b\textasciitilde c \\
\bottomrule
\end{tabular}
"""
assert escaped_result == escaped_expected
def test_to_latex_longtable(self, float_frame):
float_frame.to_latex(longtable=True)
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
withindex_result = df.to_latex(longtable=True)
withindex_expected = r"""\begin{longtable}{lrl}
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
assert withindex_result == withindex_expected
withoutindex_result = df.to_latex(index=False, longtable=True)
withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
a & b \\
\midrule
\endhead
\midrule
\multicolumn{2}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
\end{longtable}
"""
assert withoutindex_result == withoutindex_expected
df = DataFrame({"a": [1, 2]})
with1column_result = df.to_latex(index=False, longtable=True)
assert r"\multicolumn{1}" in with1column_result
df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
with3columns_result = df.to_latex(index=False, longtable=True)
assert r"\multicolumn{3}" in with3columns_result
def test_to_latex_escape_special_chars(self):
special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"]
df = DataFrame(data=special_characters)
observed = df.to_latex()
expected = r"""\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & \& \\
1 & \% \\
2 & \$ \\
3 & \# \\
4 & \_ \\
5 & \{ \\
6 & \} \\
7 & \textasciitilde \\
8 & \textasciicircum \\
9 & \textbackslash \\
\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_no_header(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
withindex_result = df.to_latex(header=False)
withindex_expected = r"""\begin{tabular}{lrl}
\toprule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
withoutindex_result = df.to_latex(index=False, header=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withoutindex_result == withoutindex_expected
def test_to_latex_specified_header(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
withindex_result = df.to_latex(header=["AA", "BB"])
withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} & AA & BB \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withoutindex_result == withoutindex_expected
withoutescape_result = df.to_latex(header=["$A$", "$B$"], escape=False)
withoutescape_expected = r"""\begin{tabular}{lrl}
\toprule
{} & $A$ & $B$ \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert withoutescape_result == withoutescape_expected
with pytest.raises(ValueError):
df.to_latex(header=["A"])
def test_to_latex_decimal(self, float_frame):
# GH 12031
float_frame.to_latex()
df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]})
withindex_result = df.to_latex(decimal=",")
withindex_expected = r"""\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1,0 & b1 \\
1 & 2,1 & b2 \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
def test_to_latex_series(self):
s = Series(["a", "b", "c"])
withindex_result = s.to_latex()
withindex_expected = r"""\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & a \\
1 & b \\
2 & c \\
\bottomrule
\end{tabular}
"""
assert withindex_result == withindex_expected
def test_to_latex_bold_rows(self):
# GH 16707
df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
observed = df.to_latex(bold_rows=True)
expected = r"""\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
\textbf{0} & 1 & b1 \\
\textbf{1} & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_no_bold_rows(self):
# GH 16707
df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
observed = df.to_latex(bold_rows=False)
expected = r"""\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
assert observed == expected
@pytest.mark.parametrize("name0", [None, "named0"])
@pytest.mark.parametrize("name1", [None, "named1"])
@pytest.mark.parametrize("axes", [[0], [1], [0, 1]])
def test_to_latex_multiindex_names(self, name0, name1, axes):
# GH 18667
names = [name0, name1]
mi = pd.MultiIndex.from_product([[1, 2], [3, 4]])
df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy())
for idx in axes:
df.axes[idx].names = names
idx_names = tuple(n or "{}" for n in names)
idx_names_row = (
"{idx_names[0]} & {idx_names[1]} & & & & \\\\\n".format(
idx_names=idx_names
)
if (0 in axes and any(names))
else ""
)
placeholder = "{}" if any(names) and 1 in axes else " "
col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names]
observed = df.to_latex()
expected = r"""\begin{tabular}{llrrrr}
\toprule
& %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\
& %s & 3 & 4 & 3 & 4 \\
%s\midrule
1 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
2 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
""" % tuple(
list(col_names) + [idx_names_row]
)
assert observed == expected
@pytest.mark.parametrize("one_row", [True, False])
def test_to_latex_multiindex_nans(self, one_row):
# GH 14249
df = pd.DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]})
if one_row:
df = df.iloc[[0]]
observed = df.set_index(["a", "b"]).to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & c \\
a & b & \\
\midrule
NaN & 2 & 4 \\
"""
if not one_row:
expected += r"""1.0 & 3 & 5 \\
"""
expected += r"""\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_non_string_index(self):
# GH 19981
observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & 2 \\
0 & 1 & \\
\midrule
1 & 2 & 3 \\
& 2 & 3 \\
\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_midrule_location(self):
# GH 18326
df = pd.DataFrame({"a": [1, 2]})
df.index.name = "foo"
observed = df.to_latex(index_names=False)
expected = r"""\begin{tabular}{lr}
\toprule
{} & a \\
\midrule
0 & 1 \\
1 & 2 \\
\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_multiindex_empty_name(self):
# GH 18669
mi = pd.MultiIndex.from_product([[1, 2]], names=[""])
df = pd.DataFrame(-1, index=mi, columns=range(4))
observed = df.to_latex()
expected = r"""\begin{tabular}{lrrrr}
\toprule
& 0 & 1 & 2 & 3 \\
{} & & & & \\
\midrule
1 & -1 & -1 & -1 & -1 \\
2 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_float_format_no_fixed_width(self):
# GH 21625
df = DataFrame({"x": [0.19999]})
expected = r"""\begin{tabular}{lr}
\toprule
{} & x \\
\midrule
0 & 0.200 \\
\bottomrule
\end{tabular}
"""
assert df.to_latex(float_format="%.3f") == expected
# GH 22270
df = DataFrame({"x": [100.0]})
expected = r"""\begin{tabular}{lr}
\toprule
{} & x \\
\midrule
0 & 100 \\
\bottomrule
\end{tabular}
"""
assert df.to_latex(float_format="%.0f") == expected
def test_to_latex_multindex_header(self):
# GH 16718
df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index(
["a", "b"]
)
observed = df.to_latex(header=["r1", "r2"])
expected = r"""\begin{tabular}{llrr}
\toprule
& & r1 & r2 \\
a & b & & \\
\midrule
0 & 1 & 2 & 3 \\
\bottomrule
\end{tabular}
"""
assert observed == expected