8th day of python challenges 111-117

2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions
--- a/venv/lib/python3.6/site-packages/pandas/plotting/_matplotlib/boxplot.py
+++ b/venv/lib/python3.6/site-packages/pandas/plotting/_matplotlib/boxplot.py
@@ -0,0 +1,416 @@
+from collections import namedtuple
+import warnings
+
+from matplotlib.artist import setp
+import numpy as np
+
+from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.missing import remove_na_arraylike
+
+import pandas as pd
+
+from pandas.io.formats.printing import pprint_thing
+from pandas.plotting._matplotlib import converter
+from pandas.plotting._matplotlib.core import LinePlot, MPLPlot
+from pandas.plotting._matplotlib.style import _get_standard_colors
+from pandas.plotting._matplotlib.tools import _flatten, _subplots
+
+
+class BoxPlot(LinePlot):
+    _kind = "box"
+    _layout_type = "horizontal"
+
+    _valid_return_types = (None, "axes", "dict", "both")
+    # namedtuple to hold results
+    BP = namedtuple("Boxplot", ["ax", "lines"])
+
+    def __init__(self, data, return_type="axes", **kwargs):
+        # Do not call LinePlot.__init__ which may fill nan
+        if return_type not in self._valid_return_types:
+            raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}")
+
+        self.return_type = return_type
+        MPLPlot.__init__(self, data, **kwargs)
+
+    def _args_adjust(self):
+        if self.subplots:
+            # Disable label ax sharing. Otherwise, all subplots shows last
+            # column label
+            if self.orientation == "vertical":
+                self.sharex = False
+            else:
+                self.sharey = False
+
+    @classmethod
+    def _plot(cls, ax, y, column_num=None, return_type="axes", **kwds):
+        if y.ndim == 2:
+            y = [remove_na_arraylike(v) for v in y]
+            # Boxplot fails with empty arrays, so need to add a NaN
+            #   if any cols are empty
+            # GH 8181
+            y = [v if v.size > 0 else np.array([np.nan]) for v in y]
+        else:
+            y = remove_na_arraylike(y)
+        bp = ax.boxplot(y, **kwds)
+
+        if return_type == "dict":
+            return bp, bp
+        elif return_type == "both":
+            return cls.BP(ax=ax, lines=bp), bp
+        else:
+            return ax, bp
+
+    def _validate_color_args(self):
+        if "color" in self.kwds:
+            if self.colormap is not None:
+                warnings.warn(
+                    "'color' and 'colormap' cannot be used "
+                    "simultaneously. Using 'color'"
+                )
+            self.color = self.kwds.pop("color")
+
+            if isinstance(self.color, dict):
+                valid_keys = ["boxes", "whiskers", "medians", "caps"]
+                for key, values in self.color.items():
+                    if key not in valid_keys:
+                        raise ValueError(
+                            "color dict contains invalid "
+                            "key '{0}' "
+                            "The key must be either {1}".format(key, valid_keys)
+                        )
+        else:
+            self.color = None
+
+        # get standard colors for default
+        colors = _get_standard_colors(num_colors=3, colormap=self.colormap, color=None)
+        # use 2 colors by default, for box/whisker and median
+        # flier colors isn't needed here
+        # because it can be specified by ``sym`` kw
+        self._boxes_c = colors[0]
+        self._whiskers_c = colors[0]
+        self._medians_c = colors[2]
+        self._caps_c = "k"  # mpl default
+
+    def _get_colors(self, num_colors=None, color_kwds="color"):
+        pass
+
+    def maybe_color_bp(self, bp):
+        if isinstance(self.color, dict):
+            boxes = self.color.get("boxes", self._boxes_c)
+            whiskers = self.color.get("whiskers", self._whiskers_c)
+            medians = self.color.get("medians", self._medians_c)
+            caps = self.color.get("caps", self._caps_c)
+        else:
+            # Other types are forwarded to matplotlib
+            # If None, use default colors
+            boxes = self.color or self._boxes_c
+            whiskers = self.color or self._whiskers_c
+            medians = self.color or self._medians_c
+            caps = self.color or self._caps_c
+
+        setp(bp["boxes"], color=boxes, alpha=1)
+        setp(bp["whiskers"], color=whiskers, alpha=1)
+        setp(bp["medians"], color=medians, alpha=1)
+        setp(bp["caps"], color=caps, alpha=1)
+
+    def _make_plot(self):
+        if self.subplots:
+            self._return_obj = pd.Series()
+
+            for i, (label, y) in enumerate(self._iter_data()):
+                ax = self._get_ax(i)
+                kwds = self.kwds.copy()
+
+                ret, bp = self._plot(
+                    ax, y, column_num=i, return_type=self.return_type, **kwds
+                )
+                self.maybe_color_bp(bp)
+                self._return_obj[label] = ret
+
+                label = [pprint_thing(label)]
+                self._set_ticklabels(ax, label)
+        else:
+            y = self.data.values.T
+            ax = self._get_ax(0)
+            kwds = self.kwds.copy()
+
+            ret, bp = self._plot(
+                ax, y, column_num=0, return_type=self.return_type, **kwds
+            )
+            self.maybe_color_bp(bp)
+            self._return_obj = ret
+
+            labels = [l for l, _ in self._iter_data()]
+            labels = [pprint_thing(l) for l in labels]
+            if not self.use_index:
+                labels = [pprint_thing(key) for key in range(len(labels))]
+            self._set_ticklabels(ax, labels)
+
+    def _set_ticklabels(self, ax, labels):
+        if self.orientation == "vertical":
+            ax.set_xticklabels(labels)
+        else:
+            ax.set_yticklabels(labels)
+
+    def _make_legend(self):
+        pass
+
+    def _post_plot_logic(self, ax, data):
+        pass
+
+    @property
+    def orientation(self):
+        if self.kwds.get("vert", True):
+            return "vertical"
+        else:
+            return "horizontal"
+
+    @property
+    def result(self):
+        if self.return_type is None:
+            return super().result
+        else:
+            return self._return_obj
+
+
+def _grouped_plot_by_column(
+    plotf,
+    data,
+    columns=None,
+    by=None,
+    numeric_only=True,
+    grid=False,
+    figsize=None,
+    ax=None,
+    layout=None,
+    return_type=None,
+    **kwargs
+):
+    grouped = data.groupby(by)
+    if columns is None:
+        if not isinstance(by, (list, tuple)):
+            by = [by]
+        columns = data._get_numeric_data().columns.difference(by)
+    naxes = len(columns)
+    fig, axes = _subplots(
+        naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout
+    )
+
+    _axes = _flatten(axes)
+
+    ax_values = []
+
+    for i, col in enumerate(columns):
+        ax = _axes[i]
+        gp_col = grouped[col]
+        keys, values = zip(*gp_col)
+        re_plotf = plotf(keys, values, ax, **kwargs)
+        ax.set_title(col)
+        ax.set_xlabel(pprint_thing(by))
+        ax_values.append(re_plotf)
+        ax.grid(grid)
+
+    result = pd.Series(ax_values, index=columns)
+
+    # Return axes in multiplot case, maybe revisit later # 985
+    if return_type is None:
+        result = axes
+
+    byline = by[0] if len(by) == 1 else by
+    fig.suptitle("Boxplot grouped by {byline}".format(byline=byline))
+    fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
+
+    return result
+
+
+def boxplot(
+    data,
+    column=None,
+    by=None,
+    ax=None,
+    fontsize=None,
+    rot=0,
+    grid=True,
+    figsize=None,
+    layout=None,
+    return_type=None,
+    **kwds
+):
+
+    import matplotlib.pyplot as plt
+
+    # validate return_type:
+    if return_type not in BoxPlot._valid_return_types:
+        raise ValueError("return_type must be {'axes', 'dict', 'both'}")
+
+    if isinstance(data, ABCSeries):
+        data = data.to_frame("x")
+        column = "x"
+
+    def _get_colors():
+        #  num_colors=3 is required as method maybe_color_bp takes the colors
+        #  in positions 0 and 2.
+        return _get_standard_colors(color=kwds.get("color"), num_colors=3)
+
+    def maybe_color_bp(bp):
+        if "color" not in kwds:
+            setp(bp["boxes"], color=colors[0], alpha=1)
+            setp(bp["whiskers"], color=colors[0], alpha=1)
+            setp(bp["medians"], color=colors[2], alpha=1)
+
+    def plot_group(keys, values, ax):
+        keys = [pprint_thing(x) for x in keys]
+        values = [np.asarray(remove_na_arraylike(v)) for v in values]
+        bp = ax.boxplot(values, **kwds)
+        if fontsize is not None:
+            ax.tick_params(axis="both", labelsize=fontsize)
+        if kwds.get("vert", 1):
+            ax.set_xticklabels(keys, rotation=rot)
+        else:
+            ax.set_yticklabels(keys, rotation=rot)
+        maybe_color_bp(bp)
+
+        # Return axes in multiplot case, maybe revisit later # 985
+        if return_type == "dict":
+            return bp
+        elif return_type == "both":
+            return BoxPlot.BP(ax=ax, lines=bp)
+        else:
+            return ax
+
+    colors = _get_colors()
+    if column is None:
+        columns = None
+    else:
+        if isinstance(column, (list, tuple)):
+            columns = column
+        else:
+            columns = [column]
+
+    if by is not None:
+        # Prefer array return type for 2-D plots to match the subplot layout
+        # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580
+        result = _grouped_plot_by_column(
+            plot_group,
+            data,
+            columns=columns,
+            by=by,
+            grid=grid,
+            figsize=figsize,
+            ax=ax,
+            layout=layout,
+            return_type=return_type,
+        )
+    else:
+        if return_type is None:
+            return_type = "axes"
+        if layout is not None:
+            raise ValueError(
+                "The 'layout' keyword is not supported when " "'by' is None"
+            )
+
+        if ax is None:
+            rc = {"figure.figsize": figsize} if figsize is not None else {}
+            with plt.rc_context(rc):
+                ax = plt.gca()
+        data = data._get_numeric_data()
+        if columns is None:
+            columns = data.columns
+        else:
+            data = data[columns]
+
+        result = plot_group(columns, data.values.T, ax)
+        ax.grid(grid)
+
+    return result
+
+
+def boxplot_frame(
+    self,
+    column=None,
+    by=None,
+    ax=None,
+    fontsize=None,
+    rot=0,
+    grid=True,
+    figsize=None,
+    layout=None,
+    return_type=None,
+    **kwds
+):
+    import matplotlib.pyplot as plt
+
+    converter._WARN = False  # no warning for pandas plots
+    ax = boxplot(
+        self,
+        column=column,
+        by=by,
+        ax=ax,
+        fontsize=fontsize,
+        grid=grid,
+        rot=rot,
+        figsize=figsize,
+        layout=layout,
+        return_type=return_type,
+        **kwds
+    )
+    plt.draw_if_interactive()
+    return ax
+
+
+def boxplot_frame_groupby(
+    grouped,
+    subplots=True,
+    column=None,
+    fontsize=None,
+    rot=0,
+    grid=True,
+    ax=None,
+    figsize=None,
+    layout=None,
+    sharex=False,
+    sharey=True,
+    **kwds
+):
+    converter._WARN = False  # no warning for pandas plots
+    if subplots is True:
+        naxes = len(grouped)
+        fig, axes = _subplots(
+            naxes=naxes,
+            squeeze=False,
+            ax=ax,
+            sharex=sharex,
+            sharey=sharey,
+            figsize=figsize,
+            layout=layout,
+        )
+        axes = _flatten(axes)
+
+        ret = pd.Series()
+        for (key, group), ax in zip(grouped, axes):
+            d = group.boxplot(
+                ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds
+            )
+            ax.set_title(pprint_thing(key))
+            ret.loc[key] = d
+        fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
+    else:
+        keys, frames = zip(*grouped)
+        if grouped.axis == 0:
+            df = pd.concat(frames, keys=keys, axis=1)
+        else:
+            if len(frames) > 1:
+                df = frames[0].join(frames[1::])
+            else:
+                df = frames[0]
+        ret = df.boxplot(
+            column=column,
+            fontsize=fontsize,
+            rot=rot,
+            grid=grid,
+            ax=ax,
+            figsize=figsize,
+            layout=layout,
+            **kwds
+        )
+    return ret