8th day of python challenges 111-117
This commit is contained in:
263
venv/lib/python3.6/site-packages/pandas/__init__.py
Normal file
263
venv/lib/python3.6/site-packages/pandas/__init__.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# flake8: noqa
|
||||
|
||||
__docformat__ = "restructuredtext"
|
||||
|
||||
# Let users know if they're missing any of our hard dependencies
|
||||
hard_dependencies = ("numpy", "pytz", "dateutil")
|
||||
missing_dependencies = []
|
||||
|
||||
for dependency in hard_dependencies:
|
||||
try:
|
||||
__import__(dependency)
|
||||
except ImportError as e:
|
||||
missing_dependencies.append("{0}: {1}".format(dependency, str(e)))
|
||||
|
||||
if missing_dependencies:
|
||||
raise ImportError(
|
||||
"Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
|
||||
)
|
||||
del hard_dependencies, dependency, missing_dependencies
|
||||
|
||||
# numpy compat
|
||||
from pandas.compat.numpy import (
|
||||
_np_version_under1p14,
|
||||
_np_version_under1p15,
|
||||
_np_version_under1p16,
|
||||
_np_version_under1p17,
|
||||
)
|
||||
|
||||
try:
|
||||
from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
|
||||
except ImportError as e: # pragma: no cover
|
||||
# hack but overkill to use re
|
||||
module = str(e).replace("cannot import name ", "")
|
||||
raise ImportError(
|
||||
"C extension: {0} not built. If you want to import "
|
||||
"pandas from the source directory, you may need to run "
|
||||
"'python setup.py build_ext --inplace --force' to build "
|
||||
"the C extensions first.".format(module)
|
||||
)
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pandas._config import (
|
||||
get_option,
|
||||
set_option,
|
||||
reset_option,
|
||||
describe_option,
|
||||
option_context,
|
||||
options,
|
||||
)
|
||||
|
||||
# let init-time option registration happen
|
||||
import pandas.core.config_init
|
||||
|
||||
from pandas.core.api import (
|
||||
# dtype
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
CategoricalDtype,
|
||||
PeriodDtype,
|
||||
IntervalDtype,
|
||||
DatetimeTZDtype,
|
||||
# missing
|
||||
isna,
|
||||
isnull,
|
||||
notna,
|
||||
notnull,
|
||||
# indexes
|
||||
Index,
|
||||
CategoricalIndex,
|
||||
Int64Index,
|
||||
UInt64Index,
|
||||
RangeIndex,
|
||||
Float64Index,
|
||||
MultiIndex,
|
||||
IntervalIndex,
|
||||
TimedeltaIndex,
|
||||
DatetimeIndex,
|
||||
PeriodIndex,
|
||||
IndexSlice,
|
||||
# tseries
|
||||
NaT,
|
||||
Period,
|
||||
period_range,
|
||||
Timedelta,
|
||||
timedelta_range,
|
||||
Timestamp,
|
||||
date_range,
|
||||
bdate_range,
|
||||
Interval,
|
||||
interval_range,
|
||||
DateOffset,
|
||||
# conversion
|
||||
to_numeric,
|
||||
to_datetime,
|
||||
to_timedelta,
|
||||
# misc
|
||||
np,
|
||||
Grouper,
|
||||
factorize,
|
||||
unique,
|
||||
value_counts,
|
||||
NamedAgg,
|
||||
array,
|
||||
Categorical,
|
||||
set_eng_float_format,
|
||||
Series,
|
||||
DataFrame,
|
||||
)
|
||||
|
||||
from pandas.core.sparse.api import (
|
||||
SparseArray,
|
||||
SparseDataFrame,
|
||||
SparseSeries,
|
||||
SparseDtype,
|
||||
)
|
||||
|
||||
from pandas.tseries.api import infer_freq
|
||||
from pandas.tseries import offsets
|
||||
|
||||
from pandas.core.computation.api import eval
|
||||
|
||||
from pandas.core.reshape.api import (
|
||||
concat,
|
||||
lreshape,
|
||||
melt,
|
||||
wide_to_long,
|
||||
merge,
|
||||
merge_asof,
|
||||
merge_ordered,
|
||||
crosstab,
|
||||
pivot,
|
||||
pivot_table,
|
||||
get_dummies,
|
||||
cut,
|
||||
qcut,
|
||||
)
|
||||
|
||||
from pandas.util._print_versions import show_versions
|
||||
|
||||
from pandas.io.api import (
|
||||
# excel
|
||||
ExcelFile,
|
||||
ExcelWriter,
|
||||
read_excel,
|
||||
# packers
|
||||
read_msgpack,
|
||||
to_msgpack,
|
||||
# parsers
|
||||
read_csv,
|
||||
read_fwf,
|
||||
read_table,
|
||||
# pickle
|
||||
read_pickle,
|
||||
to_pickle,
|
||||
# pytables
|
||||
HDFStore,
|
||||
read_hdf,
|
||||
# sql
|
||||
read_sql,
|
||||
read_sql_query,
|
||||
read_sql_table,
|
||||
# misc
|
||||
read_clipboard,
|
||||
read_parquet,
|
||||
read_feather,
|
||||
read_gbq,
|
||||
read_html,
|
||||
read_json,
|
||||
read_stata,
|
||||
read_sas,
|
||||
read_spss,
|
||||
)
|
||||
|
||||
from pandas.util._tester import test
|
||||
import pandas.testing
|
||||
import pandas.arrays
|
||||
|
||||
# use the closest tagged version if possible
|
||||
from ._version import get_versions
|
||||
|
||||
v = get_versions()
|
||||
__version__ = v.get("closest-tag", v["version"])
|
||||
__git_version__ = v.get("full-revisionid")
|
||||
del get_versions, v
|
||||
|
||||
|
||||
# GH 27101
|
||||
# TODO: remove Panel compat in 1.0
|
||||
if pandas.compat.PY37:
|
||||
|
||||
def __getattr__(name):
|
||||
if name == "Panel":
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"The Panel class is removed from pandas. Accessing it "
|
||||
"from the top-level namespace will also be removed in "
|
||||
"the next version",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
class Panel:
|
||||
pass
|
||||
|
||||
return Panel
|
||||
raise AttributeError("module 'pandas' has no attribute '{}'".format(name))
|
||||
|
||||
|
||||
else:
|
||||
|
||||
class Panel:
|
||||
pass
|
||||
|
||||
|
||||
# module level doc-string
|
||||
__doc__ = """
|
||||
pandas - a powerful data analysis and manipulation library for Python
|
||||
=====================================================================
|
||||
|
||||
**pandas** is a Python package providing fast, flexible, and expressive data
|
||||
structures designed to make working with "relational" or "labeled" data both
|
||||
easy and intuitive. It aims to be the fundamental high-level building block for
|
||||
doing practical, **real world** data analysis in Python. Additionally, it has
|
||||
the broader goal of becoming **the most powerful and flexible open source data
|
||||
analysis / manipulation tool available in any language**. It is already well on
|
||||
its way toward this goal.
|
||||
|
||||
Main Features
|
||||
-------------
|
||||
Here are just a few of the things that pandas does well:
|
||||
|
||||
- Easy handling of missing data in floating point as well as non-floating
|
||||
point data.
|
||||
- Size mutability: columns can be inserted and deleted from DataFrame and
|
||||
higher dimensional objects
|
||||
- Automatic and explicit data alignment: objects can be explicitly aligned
|
||||
to a set of labels, or the user can simply ignore the labels and let
|
||||
`Series`, `DataFrame`, etc. automatically align the data for you in
|
||||
computations.
|
||||
- Powerful, flexible group by functionality to perform split-apply-combine
|
||||
operations on data sets, for both aggregating and transforming data.
|
||||
- Make it easy to convert ragged, differently-indexed data in other Python
|
||||
and NumPy data structures into DataFrame objects.
|
||||
- Intelligent label-based slicing, fancy indexing, and subsetting of large
|
||||
data sets.
|
||||
- Intuitive merging and joining data sets.
|
||||
- Flexible reshaping and pivoting of data sets.
|
||||
- Hierarchical labeling of axes (possible to have multiple labels per tick).
|
||||
- Robust IO tools for loading data from flat files (CSV and delimited),
|
||||
Excel files, databases, and saving/loading data from the ultrafast HDF5
|
||||
format.
|
||||
- Time series-specific functionality: date range generation and frequency
|
||||
conversion, moving window statistics, moving window linear regressions,
|
||||
date shifting and lagging, etc.
|
||||
"""
|
||||
28
venv/lib/python3.6/site-packages/pandas/_config/__init__.py
Normal file
28
venv/lib/python3.6/site-packages/pandas/_config/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
pandas._config is considered explicitly upstream of everything else in pandas,
|
||||
should have no intra-pandas dependencies.
|
||||
|
||||
importing `dates` and `display` ensures that keys needed by _libs
|
||||
are initialized.
|
||||
"""
|
||||
__all__ = [
|
||||
"config",
|
||||
"detect_console_encoding",
|
||||
"get_option",
|
||||
"set_option",
|
||||
"reset_option",
|
||||
"describe_option",
|
||||
"option_context",
|
||||
"options",
|
||||
]
|
||||
from pandas._config import config
|
||||
from pandas._config import dates # noqa:F401
|
||||
from pandas._config.config import (
|
||||
describe_option,
|
||||
get_option,
|
||||
option_context,
|
||||
options,
|
||||
reset_option,
|
||||
set_option,
|
||||
)
|
||||
from pandas._config.display import detect_console_encoding
|
||||
847
venv/lib/python3.6/site-packages/pandas/_config/config.py
Normal file
847
venv/lib/python3.6/site-packages/pandas/_config/config.py
Normal file
@@ -0,0 +1,847 @@
|
||||
"""
|
||||
The config module holds package-wide configurables and provides
|
||||
a uniform API for working with them.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
This module supports the following requirements:
|
||||
- options are referenced using keys in dot.notation, e.g. "x.y.option - z".
|
||||
- keys are case-insensitive.
|
||||
- functions should accept partial/regex keys, when unambiguous.
|
||||
- options can be registered by modules at import time.
|
||||
- options can be registered at init-time (via core.config_init)
|
||||
- options have a default value, and (optionally) a description and
|
||||
validation function associated with them.
|
||||
- options can be deprecated, in which case referencing them
|
||||
should produce a warning.
|
||||
- deprecated options can optionally be rerouted to a replacement
|
||||
so that accessing a deprecated option reroutes to a differently
|
||||
named option.
|
||||
- options can be reset to their default value.
|
||||
- all option can be reset to their default value at once.
|
||||
- all options in a certain sub - namespace can be reset at once.
|
||||
- the user can set / get / reset or ask for the description of an option.
|
||||
- a developer can register and mark an option as deprecated.
|
||||
- you can register a callback to be invoked when the option value
|
||||
is set or reset. Changing the stored value is considered misuse, but
|
||||
is not verboten.
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
- Data is stored using nested dictionaries, and should be accessed
|
||||
through the provided API.
|
||||
|
||||
- "Registered options" and "Deprecated options" have metadata associated
|
||||
with them, which are stored in auxiliary dictionaries keyed on the
|
||||
fully-qualified key, e.g. "x.y.z.option".
|
||||
|
||||
- the config_init module is imported by the package's __init__.py file.
|
||||
placing any register_option() calls there will ensure those options
|
||||
are available as soon as pandas is loaded. If you use register_option
|
||||
in a module, it will only be available after that module is imported,
|
||||
which you should be aware of.
|
||||
|
||||
- `config_prefix` is a context_manager (for use with the `with` keyword)
|
||||
which can save developers some typing, see the docstring.
|
||||
|
||||
"""
|
||||
|
||||
from collections import namedtuple
|
||||
from contextlib import contextmanager
|
||||
import re
|
||||
from typing import Dict, List
|
||||
import warnings
|
||||
|
||||
DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver")
|
||||
RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb")
|
||||
|
||||
# holds deprecated option metdata
|
||||
_deprecated_options = {} # type: Dict[str, DeprecatedOption]
|
||||
|
||||
# holds registered option metdata
|
||||
_registered_options = {} # type: Dict[str, RegisteredOption]
|
||||
|
||||
# holds the current values for registered options
|
||||
_global_config = {} # type: Dict[str, str]
|
||||
|
||||
# keys which have a special meaning
|
||||
_reserved_keys = ["all"] # type: List[str]
|
||||
|
||||
|
||||
class OptionError(AttributeError, KeyError):
|
||||
"""Exception for pandas.options, backwards compatible with KeyError
|
||||
checks
|
||||
"""
|
||||
|
||||
|
||||
#
|
||||
# User API
|
||||
|
||||
|
||||
def _get_single_key(pat, silent):
|
||||
keys = _select_options(pat)
|
||||
if len(keys) == 0:
|
||||
if not silent:
|
||||
_warn_if_deprecated(pat)
|
||||
raise OptionError("No such keys(s): {pat!r}".format(pat=pat))
|
||||
if len(keys) > 1:
|
||||
raise OptionError("Pattern matched multiple keys")
|
||||
key = keys[0]
|
||||
|
||||
if not silent:
|
||||
_warn_if_deprecated(key)
|
||||
|
||||
key = _translate_key(key)
|
||||
|
||||
return key
|
||||
|
||||
|
||||
def _get_option(pat, silent=False):
|
||||
key = _get_single_key(pat, silent)
|
||||
|
||||
# walk the nested dict
|
||||
root, k = _get_root(key)
|
||||
return root[k]
|
||||
|
||||
|
||||
def _set_option(*args, **kwargs):
|
||||
# must at least 1 arg deal with constraints later
|
||||
nargs = len(args)
|
||||
if not nargs or nargs % 2 != 0:
|
||||
raise ValueError("Must provide an even number of non-keyword " "arguments")
|
||||
|
||||
# default to false
|
||||
silent = kwargs.pop("silent", False)
|
||||
|
||||
if kwargs:
|
||||
msg = '_set_option() got an unexpected keyword argument "{kwarg}"'
|
||||
raise TypeError(msg.format(list(kwargs.keys())[0]))
|
||||
|
||||
for k, v in zip(args[::2], args[1::2]):
|
||||
key = _get_single_key(k, silent)
|
||||
|
||||
o = _get_registered_option(key)
|
||||
if o and o.validator:
|
||||
o.validator(v)
|
||||
|
||||
# walk the nested dict
|
||||
root, k = _get_root(key)
|
||||
root[k] = v
|
||||
|
||||
if o.cb:
|
||||
if silent:
|
||||
with warnings.catch_warnings(record=True):
|
||||
o.cb(key)
|
||||
else:
|
||||
o.cb(key)
|
||||
|
||||
|
||||
def _describe_option(pat="", _print_desc=True):
|
||||
|
||||
keys = _select_options(pat)
|
||||
if len(keys) == 0:
|
||||
raise OptionError("No such keys(s)")
|
||||
|
||||
s = ""
|
||||
for k in keys: # filter by pat
|
||||
s += _build_option_description(k)
|
||||
|
||||
if _print_desc:
|
||||
print(s)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def _reset_option(pat, silent=False):
|
||||
|
||||
keys = _select_options(pat)
|
||||
|
||||
if len(keys) == 0:
|
||||
raise OptionError("No such keys(s)")
|
||||
|
||||
if len(keys) > 1 and len(pat) < 4 and pat != "all":
|
||||
raise ValueError(
|
||||
"You must specify at least 4 characters when "
|
||||
"resetting multiple keys, use the special keyword "
|
||||
'"all" to reset all the options to their default '
|
||||
"value"
|
||||
)
|
||||
|
||||
for k in keys:
|
||||
_set_option(k, _registered_options[k].defval, silent=silent)
|
||||
|
||||
|
||||
def get_default_val(pat):
|
||||
key = _get_single_key(pat, silent=True)
|
||||
return _get_registered_option(key).defval
|
||||
|
||||
|
||||
class DictWrapper:
|
||||
""" provide attribute-style access to a nested dict"""
|
||||
|
||||
def __init__(self, d, prefix=""):
|
||||
object.__setattr__(self, "d", d)
|
||||
object.__setattr__(self, "prefix", prefix)
|
||||
|
||||
def __setattr__(self, key, val):
|
||||
prefix = object.__getattribute__(self, "prefix")
|
||||
if prefix:
|
||||
prefix += "."
|
||||
prefix += key
|
||||
# you can't set new keys
|
||||
# can you can't overwrite subtrees
|
||||
if key in self.d and not isinstance(self.d[key], dict):
|
||||
_set_option(prefix, val)
|
||||
else:
|
||||
raise OptionError("You can only set the value of existing options")
|
||||
|
||||
def __getattr__(self, key):
|
||||
prefix = object.__getattribute__(self, "prefix")
|
||||
if prefix:
|
||||
prefix += "."
|
||||
prefix += key
|
||||
try:
|
||||
v = object.__getattribute__(self, "d")[key]
|
||||
except KeyError:
|
||||
raise OptionError("No such option")
|
||||
if isinstance(v, dict):
|
||||
return DictWrapper(v, prefix)
|
||||
else:
|
||||
return _get_option(prefix)
|
||||
|
||||
def __dir__(self):
|
||||
return list(self.d.keys())
|
||||
|
||||
|
||||
# For user convenience, we'd like to have the available options described
|
||||
# in the docstring. For dev convenience we'd like to generate the docstrings
|
||||
# dynamically instead of maintaining them by hand. To this, we use the
|
||||
# class below which wraps functions inside a callable, and converts
|
||||
# __doc__ into a property function. The doctsrings below are templates
|
||||
# using the py2.6+ advanced formatting syntax to plug in a concise list
|
||||
# of options, and option descriptions.
|
||||
|
||||
|
||||
class CallableDynamicDoc:
|
||||
def __init__(self, func, doc_tmpl):
|
||||
self.__doc_tmpl__ = doc_tmpl
|
||||
self.__func__ = func
|
||||
|
||||
def __call__(self, *args, **kwds):
|
||||
return self.__func__(*args, **kwds)
|
||||
|
||||
@property
|
||||
def __doc__(self):
|
||||
opts_desc = _describe_option("all", _print_desc=False)
|
||||
opts_list = pp_options_list(list(_registered_options.keys()))
|
||||
return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list)
|
||||
|
||||
|
||||
_get_option_tmpl = """
|
||||
get_option(pat)
|
||||
|
||||
Retrieves the value of the specified option.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp which should match a single option.
|
||||
Note: partial matches are supported for convenience, but unless you use the
|
||||
full option name (e.g. x.y.z.option_name), your code may break in future
|
||||
versions if new options with similar names are introduced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : the value of the option
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError : if no such option exists
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_set_option_tmpl = """
|
||||
set_option(pat, value)
|
||||
|
||||
Sets the value of the specified option.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp which should match a single option.
|
||||
Note: partial matches are supported for convenience, but unless you use the
|
||||
full option name (e.g. x.y.z.option_name), your code may break in future
|
||||
versions if new options with similar names are introduced.
|
||||
value : object
|
||||
New value of option.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError if no such option exists
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_describe_option_tmpl = """
|
||||
describe_option(pat, _print_desc=False)
|
||||
|
||||
Prints the description for one or more registered options.
|
||||
|
||||
Call with not arguments to get a listing for all registered options.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp pattern. All matching keys will have their description displayed.
|
||||
_print_desc : bool, default True
|
||||
If True (default) the description(s) will be printed to stdout.
|
||||
Otherwise, the description(s) will be returned as a unicode string
|
||||
(for testing).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None by default, the description(s) as a unicode string if _print_desc
|
||||
is False
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_reset_option_tmpl = """
|
||||
reset_option(pat)
|
||||
|
||||
Reset one or more options to their default value.
|
||||
|
||||
Pass "all" as argument to reset all options.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str/regex
|
||||
If specified only options matching `prefix*` will be reset.
|
||||
Note: partial matches are supported for convenience, but unless you
|
||||
use the full option name (e.g. x.y.z.option_name), your code may break
|
||||
in future versions if new options with similar names are introduced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
# bind the functions with their docstrings into a Callable
|
||||
# and use that as the functions exposed in pd.api
|
||||
get_option = CallableDynamicDoc(_get_option, _get_option_tmpl)
|
||||
set_option = CallableDynamicDoc(_set_option, _set_option_tmpl)
|
||||
reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl)
|
||||
describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl)
|
||||
options = DictWrapper(_global_config)
|
||||
|
||||
#
|
||||
# Functions for use by pandas developers, in addition to User - api
|
||||
|
||||
|
||||
class option_context:
|
||||
"""
|
||||
Context manager to temporarily set options in the `with` statement context.
|
||||
|
||||
You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
|
||||
... ...
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
if not (len(args) % 2 == 0 and len(args) >= 2):
|
||||
raise ValueError(
|
||||
"Need to invoke as" " option_context(pat, val, [(pat, val), ...])."
|
||||
)
|
||||
|
||||
self.ops = list(zip(args[::2], args[1::2]))
|
||||
|
||||
def __enter__(self):
|
||||
self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops]
|
||||
|
||||
for pat, val in self.ops:
|
||||
_set_option(pat, val, silent=True)
|
||||
|
||||
def __exit__(self, *args):
|
||||
if self.undo:
|
||||
for pat, val in self.undo:
|
||||
_set_option(pat, val, silent=True)
|
||||
|
||||
|
||||
def register_option(key, defval, doc="", validator=None, cb=None):
|
||||
"""Register an option in the package-wide pandas config object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key - a fully-qualified key, e.g. "x.y.option - z".
|
||||
defval - the default value of the option
|
||||
doc - a string description of the option
|
||||
validator - a function of a single argument, should raise `ValueError` if
|
||||
called with a value which is not a legal value for the option.
|
||||
cb - a function of a single argument "key", which is called
|
||||
immediately after an option value is set/reset. key is
|
||||
the full name of the option.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError if `validator` is specified and `defval` is not a valid value.
|
||||
|
||||
"""
|
||||
import tokenize
|
||||
import keyword
|
||||
|
||||
key = key.lower()
|
||||
|
||||
if key in _registered_options:
|
||||
msg = "Option '{key}' has already been registered"
|
||||
raise OptionError(msg.format(key=key))
|
||||
if key in _reserved_keys:
|
||||
msg = "Option '{key}' is a reserved key"
|
||||
raise OptionError(msg.format(key=key))
|
||||
|
||||
# the default value should be legal
|
||||
if validator:
|
||||
validator(defval)
|
||||
|
||||
# walk the nested dict, creating dicts as needed along the path
|
||||
path = key.split(".")
|
||||
|
||||
for k in path:
|
||||
if not bool(re.match("^" + tokenize.Name + "$", k)):
|
||||
raise ValueError("{k} is not a valid identifier".format(k=k))
|
||||
if keyword.iskeyword(k):
|
||||
raise ValueError("{k} is a python keyword".format(k=k))
|
||||
|
||||
cursor = _global_config
|
||||
msg = "Path prefix to option '{option}' is already an option"
|
||||
for i, p in enumerate(path[:-1]):
|
||||
if not isinstance(cursor, dict):
|
||||
raise OptionError(msg.format(option=".".join(path[:i])))
|
||||
if p not in cursor:
|
||||
cursor[p] = {}
|
||||
cursor = cursor[p]
|
||||
|
||||
if not isinstance(cursor, dict):
|
||||
raise OptionError(msg.format(option=".".join(path[:-1])))
|
||||
|
||||
cursor[path[-1]] = defval # initialize
|
||||
|
||||
# save the option metadata
|
||||
_registered_options[key] = RegisteredOption(
|
||||
key=key, defval=defval, doc=doc, validator=validator, cb=cb
|
||||
)
|
||||
|
||||
|
||||
def deprecate_option(key, msg=None, rkey=None, removal_ver=None):
|
||||
"""
|
||||
Mark option `key` as deprecated, if code attempts to access this option,
|
||||
a warning will be produced, using `msg` if given, or a default message
|
||||
if not.
|
||||
if `rkey` is given, any access to the key will be re-routed to `rkey`.
|
||||
|
||||
Neither the existence of `key` nor that if `rkey` is checked. If they
|
||||
do not exist, any subsequence access will fail as usual, after the
|
||||
deprecation warning is given.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key - the name of the option to be deprecated. must be a fully-qualified
|
||||
option name (e.g "x.y.z.rkey").
|
||||
|
||||
msg - (Optional) a warning message to output when the key is referenced.
|
||||
if no message is given a default message will be emitted.
|
||||
|
||||
rkey - (Optional) the name of an option to reroute access to.
|
||||
If specified, any referenced `key` will be re-routed to `rkey`
|
||||
including set/get/reset.
|
||||
rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
|
||||
used by the default message if no `msg` is specified.
|
||||
|
||||
removal_ver - (Optional) specifies the version in which this option will
|
||||
be removed. used by the default message if no `msg`
|
||||
is specified.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError - if key has already been deprecated.
|
||||
|
||||
"""
|
||||
|
||||
key = key.lower()
|
||||
|
||||
if key in _deprecated_options:
|
||||
msg = "Option '{key}' has already been defined as deprecated."
|
||||
raise OptionError(msg.format(key=key))
|
||||
|
||||
_deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
|
||||
|
||||
|
||||
#
|
||||
# functions internal to the module
|
||||
|
||||
|
||||
def _select_options(pat):
|
||||
"""returns a list of keys matching `pat`
|
||||
|
||||
if pat=="all", returns all registered options
|
||||
"""
|
||||
|
||||
# short-circuit for exact key
|
||||
if pat in _registered_options:
|
||||
return [pat]
|
||||
|
||||
# else look through all of them
|
||||
keys = sorted(_registered_options.keys())
|
||||
if pat == "all": # reserved key
|
||||
return keys
|
||||
|
||||
return [k for k in keys if re.search(pat, k, re.I)]
|
||||
|
||||
|
||||
def _get_root(key):
|
||||
path = key.split(".")
|
||||
cursor = _global_config
|
||||
for p in path[:-1]:
|
||||
cursor = cursor[p]
|
||||
return cursor, path[-1]
|
||||
|
||||
|
||||
def _is_deprecated(key):
|
||||
""" Returns True if the given option has been deprecated """
|
||||
|
||||
key = key.lower()
|
||||
return key in _deprecated_options
|
||||
|
||||
|
||||
def _get_deprecated_option(key):
|
||||
"""
|
||||
Retrieves the metadata for a deprecated option, if `key` is deprecated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
|
||||
"""
|
||||
|
||||
try:
|
||||
d = _deprecated_options[key]
|
||||
except KeyError:
|
||||
return None
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
def _get_registered_option(key):
|
||||
"""
|
||||
Retrieves the option metadata if `key` is a registered option.
|
||||
|
||||
Returns
|
||||
-------
|
||||
RegisteredOption (namedtuple) if key is deprecated, None otherwise
|
||||
"""
|
||||
return _registered_options.get(key)
|
||||
|
||||
|
||||
def _translate_key(key):
|
||||
"""
|
||||
if key id deprecated and a replacement key defined, will return the
|
||||
replacement key, otherwise returns `key` as - is
|
||||
"""
|
||||
|
||||
d = _get_deprecated_option(key)
|
||||
if d:
|
||||
return d.rkey or key
|
||||
else:
|
||||
return key
|
||||
|
||||
|
||||
def _warn_if_deprecated(key):
|
||||
"""
|
||||
Checks if `key` is a deprecated option and if so, prints a warning.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool - True if `key` is deprecated, False otherwise.
|
||||
"""
|
||||
|
||||
d = _get_deprecated_option(key)
|
||||
if d:
|
||||
if d.msg:
|
||||
print(d.msg)
|
||||
warnings.warn(d.msg, FutureWarning)
|
||||
else:
|
||||
msg = "'{key}' is deprecated".format(key=key)
|
||||
if d.removal_ver:
|
||||
msg += " and will be removed in {version}".format(version=d.removal_ver)
|
||||
if d.rkey:
|
||||
msg += ", please use '{rkey}' instead.".format(rkey=d.rkey)
|
||||
else:
|
||||
msg += ", please refrain from using it."
|
||||
|
||||
warnings.warn(msg, FutureWarning)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _build_option_description(k):
|
||||
""" Builds a formatted description of a registered option and prints it """
|
||||
|
||||
o = _get_registered_option(k)
|
||||
d = _get_deprecated_option(k)
|
||||
|
||||
s = "{k} ".format(k=k)
|
||||
|
||||
if o.doc:
|
||||
s += "\n".join(o.doc.strip().split("\n"))
|
||||
else:
|
||||
s += "No description available."
|
||||
|
||||
if o:
|
||||
s += "\n [default: {default}] [currently: {current}]".format(
|
||||
default=o.defval, current=_get_option(k, True)
|
||||
)
|
||||
|
||||
if d:
|
||||
s += "\n (Deprecated"
|
||||
s += ", use `{rkey}` instead.".format(rkey=d.rkey if d.rkey else "")
|
||||
s += ")"
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def pp_options_list(keys, width=80, _print=False):
|
||||
""" Builds a concise listing of available options, grouped by prefix """
|
||||
|
||||
from textwrap import wrap
|
||||
from itertools import groupby
|
||||
|
||||
def pp(name, ks):
|
||||
pfx = "- " + name + ".[" if name else ""
|
||||
ls = wrap(
|
||||
", ".join(ks),
|
||||
width,
|
||||
initial_indent=pfx,
|
||||
subsequent_indent=" ",
|
||||
break_long_words=False,
|
||||
)
|
||||
if ls and ls[-1] and name:
|
||||
ls[-1] = ls[-1] + "]"
|
||||
return ls
|
||||
|
||||
ls = []
|
||||
singles = [x for x in sorted(keys) if x.find(".") < 0]
|
||||
if singles:
|
||||
ls += pp("", singles)
|
||||
keys = [x for x in keys if x.find(".") >= 0]
|
||||
|
||||
for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]):
|
||||
ks = [x[len(k) + 1 :] for x in list(g)]
|
||||
ls += pp(k, ks)
|
||||
s = "\n".join(ls)
|
||||
if _print:
|
||||
print(s)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
#
|
||||
# helpers
|
||||
|
||||
|
||||
@contextmanager
|
||||
def config_prefix(prefix):
|
||||
"""contextmanager for multiple invocations of API with a common prefix
|
||||
|
||||
supported API functions: (register / get / set )__option
|
||||
|
||||
Warning: This is not thread - safe, and won't work properly if you import
|
||||
the API functions into your module using the "from x import y" construct.
|
||||
|
||||
Example:
|
||||
|
||||
import pandas._config.config as cf
|
||||
with cf.config_prefix("display.font"):
|
||||
cf.register_option("color", "red")
|
||||
cf.register_option("size", " 5 pt")
|
||||
cf.set_option(size, " 6 pt")
|
||||
cf.get_option(size)
|
||||
...
|
||||
|
||||
etc'
|
||||
|
||||
will register options "display.font.color", "display.font.size", set the
|
||||
value of "display.font.size"... and so on.
|
||||
"""
|
||||
|
||||
# Note: reset_option relies on set_option, and on key directly
|
||||
# it does not fit in to this monkey-patching scheme
|
||||
|
||||
global register_option, get_option, set_option, reset_option
|
||||
|
||||
def wrap(func):
|
||||
def inner(key, *args, **kwds):
|
||||
pkey = "{prefix}.{key}".format(prefix=prefix, key=key)
|
||||
return func(pkey, *args, **kwds)
|
||||
|
||||
return inner
|
||||
|
||||
_register_option = register_option
|
||||
_get_option = get_option
|
||||
_set_option = set_option
|
||||
set_option = wrap(set_option)
|
||||
get_option = wrap(get_option)
|
||||
register_option = wrap(register_option)
|
||||
yield None
|
||||
set_option = _set_option
|
||||
get_option = _get_option
|
||||
register_option = _register_option
|
||||
|
||||
|
||||
# These factories and methods are handy for use as the validator
|
||||
# arg in register_option
|
||||
|
||||
|
||||
def is_type_factory(_type):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`_type` - a type to be compared against (e.g. type(x) == `_type`)
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - a function of a single argument x , which raises
|
||||
ValueError if type(x) is not equal to `_type`
|
||||
|
||||
"""
|
||||
|
||||
def inner(x):
|
||||
if type(x) != _type:
|
||||
msg = "Value must have type '{typ!s}'"
|
||||
raise ValueError(msg.format(typ=_type))
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def is_instance_factory(_type):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`_type` - the type to be checked against
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - a function of a single argument x , which raises
|
||||
ValueError if x is not an instance of `_type`
|
||||
|
||||
"""
|
||||
if isinstance(_type, (tuple, list)):
|
||||
_type = tuple(_type)
|
||||
type_repr = "|".join(map(str, _type))
|
||||
else:
|
||||
type_repr = "'{typ}'".format(typ=_type)
|
||||
|
||||
def inner(x):
|
||||
if not isinstance(x, _type):
|
||||
msg = "Value must be an instance of {type_repr}"
|
||||
raise ValueError(msg.format(type_repr=type_repr))
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def is_one_of_factory(legal_values):
|
||||
|
||||
callables = [c for c in legal_values if callable(c)]
|
||||
legal_values = [c for c in legal_values if not callable(c)]
|
||||
|
||||
def inner(x):
|
||||
if x not in legal_values:
|
||||
|
||||
if not any(c(x) for c in callables):
|
||||
uvals = [str(lval) for lval in legal_values]
|
||||
pp_values = "|".join(uvals)
|
||||
msg = "Value must be one of {pp_values}"
|
||||
if len(callables):
|
||||
msg += " or a callable"
|
||||
raise ValueError(msg.format(pp_values=pp_values))
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
# common type validators, for convenience
|
||||
# usage: register_option(... , validator = is_int)
|
||||
is_int = is_type_factory(int)
|
||||
is_bool = is_type_factory(bool)
|
||||
is_float = is_type_factory(float)
|
||||
is_str = is_type_factory(str)
|
||||
is_text = is_instance_factory((str, bytes))
|
||||
|
||||
|
||||
def is_callable(obj):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`obj` - the object to be checked
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - returns True if object is callable
|
||||
raises ValueError otherwise.
|
||||
|
||||
"""
|
||||
if not callable(obj):
|
||||
raise ValueError("Value must be a callable")
|
||||
return True
|
||||
23
venv/lib/python3.6/site-packages/pandas/_config/dates.py
Normal file
23
venv/lib/python3.6/site-packages/pandas/_config/dates.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
config for datetime formatting
|
||||
"""
|
||||
from pandas._config import config as cf
|
||||
|
||||
pc_date_dayfirst_doc = """
|
||||
: boolean
|
||||
When True, prints and parses dates with the day first, eg 20/01/2005
|
||||
"""
|
||||
|
||||
pc_date_yearfirst_doc = """
|
||||
: boolean
|
||||
When True, prints and parses dates with the year first, eg 2005/01/20
|
||||
"""
|
||||
|
||||
with cf.config_prefix("display"):
|
||||
# Needed upstream of `_libs` because these are used in tslibs.parsing
|
||||
cf.register_option(
|
||||
"date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool
|
||||
)
|
||||
cf.register_option(
|
||||
"date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool
|
||||
)
|
||||
56
venv/lib/python3.6/site-packages/pandas/_config/display.py
Normal file
56
venv/lib/python3.6/site-packages/pandas/_config/display.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
Unopinionated display configuration.
|
||||
"""
|
||||
import locale
|
||||
import sys
|
||||
|
||||
from pandas._config import config as cf
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Global formatting options
|
||||
_initial_defencoding = None
|
||||
|
||||
|
||||
def detect_console_encoding():
|
||||
"""
|
||||
Try to find the most capable encoding supported by the console.
|
||||
slightly modified from the way IPython handles the same issue.
|
||||
"""
|
||||
global _initial_defencoding
|
||||
|
||||
encoding = None
|
||||
try:
|
||||
encoding = sys.stdout.encoding or sys.stdin.encoding
|
||||
except (AttributeError, IOError):
|
||||
pass
|
||||
|
||||
# try again for something better
|
||||
if not encoding or "ascii" in encoding.lower():
|
||||
try:
|
||||
encoding = locale.getpreferredencoding()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# when all else fails. this will usually be "ascii"
|
||||
if not encoding or "ascii" in encoding.lower():
|
||||
encoding = sys.getdefaultencoding()
|
||||
|
||||
# GH#3360, save the reported defencoding at import time
|
||||
# MPL backends may change it. Make available for debugging.
|
||||
if not _initial_defencoding:
|
||||
_initial_defencoding = sys.getdefaultencoding()
|
||||
|
||||
return encoding
|
||||
|
||||
|
||||
pc_encoding_doc = """
|
||||
: str/unicode
|
||||
Defaults to the detected encoding of the console.
|
||||
Specifies the encoding to be used for strings returned by to_string,
|
||||
these are generally strings meant to be displayed on the console.
|
||||
"""
|
||||
|
||||
with cf.config_prefix("display"):
|
||||
cf.register_option(
|
||||
"encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text
|
||||
)
|
||||
162
venv/lib/python3.6/site-packages/pandas/_config/localization.py
Normal file
162
venv/lib/python3.6/site-packages/pandas/_config/localization.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Helpers for configuring locale settings.
|
||||
|
||||
Name `localization` is chosen to avoid overlap with builtin `locale` module.
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
import locale
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from pandas._config.config import options
|
||||
|
||||
|
||||
@contextmanager
|
||||
def set_locale(new_locale, lc_var=locale.LC_ALL):
|
||||
"""
|
||||
Context manager for temporarily setting a locale.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
new_locale : str or tuple
|
||||
A string of the form <language_country>.<encoding>. For example to set
|
||||
the current locale to US English with a UTF8 encoding, you would pass
|
||||
"en_US.UTF-8".
|
||||
lc_var : int, default `locale.LC_ALL`
|
||||
The category of the locale being set.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is useful when you want to run a particular block of code under a
|
||||
particular locale, without globally setting the locale. This probably isn't
|
||||
thread-safe.
|
||||
"""
|
||||
current_locale = locale.getlocale()
|
||||
|
||||
try:
|
||||
locale.setlocale(lc_var, new_locale)
|
||||
normalized_locale = locale.getlocale()
|
||||
if all(x is not None for x in normalized_locale):
|
||||
yield ".".join(normalized_locale)
|
||||
else:
|
||||
yield new_locale
|
||||
finally:
|
||||
locale.setlocale(lc_var, current_locale)
|
||||
|
||||
|
||||
def can_set_locale(lc, lc_var=locale.LC_ALL):
|
||||
"""
|
||||
Check to see if we can set a locale, and subsequently get the locale,
|
||||
without raising an Exception.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lc : str
|
||||
The locale to attempt to set.
|
||||
lc_var : int, default `locale.LC_ALL`
|
||||
The category of the locale being set.
|
||||
|
||||
Returns
|
||||
-------
|
||||
is_valid : bool
|
||||
Whether the passed locale can be set
|
||||
"""
|
||||
|
||||
try:
|
||||
with set_locale(lc, lc_var=lc_var):
|
||||
pass
|
||||
except (ValueError, locale.Error):
|
||||
# horrible name for a Exception subclass
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _valid_locales(locales, normalize):
|
||||
"""
|
||||
Return a list of normalized locales that do not throw an ``Exception``
|
||||
when set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
locales : str
|
||||
A string where each locale is separated by a newline.
|
||||
normalize : bool
|
||||
Whether to call ``locale.normalize`` on each locale.
|
||||
|
||||
Returns
|
||||
-------
|
||||
valid_locales : list
|
||||
A list of valid locales.
|
||||
"""
|
||||
if normalize:
|
||||
normalizer = lambda x: locale.normalize(x.strip())
|
||||
else:
|
||||
normalizer = lambda x: x.strip()
|
||||
|
||||
return list(filter(can_set_locale, map(normalizer, locales)))
|
||||
|
||||
|
||||
def _default_locale_getter():
|
||||
try:
|
||||
raw_locales = subprocess.check_output(["locale -a"], shell=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise type(e)(
|
||||
"{exception}, the 'locale -a' command cannot be found "
|
||||
"on your system".format(exception=e)
|
||||
)
|
||||
return raw_locales
|
||||
|
||||
|
||||
def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter):
|
||||
"""
|
||||
Get all the locales that are available on the system.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prefix : str
|
||||
If not ``None`` then return only those locales with the prefix
|
||||
provided. For example to get all English language locales (those that
|
||||
start with ``"en"``), pass ``prefix="en"``.
|
||||
normalize : bool
|
||||
Call ``locale.normalize`` on the resulting list of available locales.
|
||||
If ``True``, only locales that can be set without throwing an
|
||||
``Exception`` are returned.
|
||||
locale_getter : callable
|
||||
The function to use to retrieve the current locales. This should return
|
||||
a string with each locale separated by a newline character.
|
||||
|
||||
Returns
|
||||
-------
|
||||
locales : list of strings
|
||||
A list of locale strings that can be set with ``locale.setlocale()``.
|
||||
For example::
|
||||
|
||||
locale.setlocale(locale.LC_ALL, locale_string)
|
||||
|
||||
On error will return None (no locale available, e.g. Windows)
|
||||
|
||||
"""
|
||||
try:
|
||||
raw_locales = locale_getter()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
# raw_locales is "\n" separated list of locales
|
||||
# it may contain non-decodable parts, so split
|
||||
# extract what we can and then rejoin.
|
||||
raw_locales = raw_locales.split(b"\n")
|
||||
out_locales = []
|
||||
for x in raw_locales:
|
||||
out_locales.append(str(x, encoding=options.display.encoding))
|
||||
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
if prefix is None:
|
||||
return _valid_locales(out_locales, normalize)
|
||||
|
||||
pattern = re.compile("{prefix}.*".format(prefix=prefix))
|
||||
found = pattern.findall("\n".join(out_locales))
|
||||
return _valid_locales(found, normalize)
|
||||
11
venv/lib/python3.6/site-packages/pandas/_libs/__init__.py
Normal file
11
venv/lib/python3.6/site-packages/pandas/_libs/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
# flake8: noqa
|
||||
|
||||
from .tslibs import (
|
||||
NaT,
|
||||
NaTType,
|
||||
OutOfBoundsDatetime,
|
||||
Period,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
iNaT,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,9 @@
|
||||
# flake8: noqa
|
||||
|
||||
from .conversion import localize_pydatetime, normalize_date
|
||||
from .nattype import NaT, NaTType, iNaT, is_null_datetimelike
|
||||
from .np_datetime import OutOfBoundsDatetime
|
||||
from .period import IncompatibleFrequency, Period
|
||||
from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta
|
||||
from .timestamps import Timestamp
|
||||
from .tzconversion import tz_convert_single
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
34
venv/lib/python3.6/site-packages/pandas/_typing.py
Normal file
34
venv/lib/python3.6/site-packages/pandas/_typing.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from pathlib import Path
|
||||
from typing import IO, AnyStr, TypeVar, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import Timestamp
|
||||
from pandas._libs.tslibs.period import Period
|
||||
from pandas._libs.tslibs.timedeltas import Timedelta
|
||||
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCExtensionArray,
|
||||
ABCIndexClass,
|
||||
ABCSeries,
|
||||
ABCSparseSeries,
|
||||
)
|
||||
|
||||
AnyArrayLike = TypeVar(
|
||||
"AnyArrayLike",
|
||||
ABCExtensionArray,
|
||||
ABCIndexClass,
|
||||
ABCSeries,
|
||||
ABCSparseSeries,
|
||||
np.ndarray,
|
||||
)
|
||||
ArrayLike = TypeVar("ArrayLike", ABCExtensionArray, np.ndarray)
|
||||
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta)
|
||||
Dtype = Union[str, np.dtype, ExtensionDtype]
|
||||
FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
|
||||
|
||||
FrameOrSeries = TypeVar("FrameOrSeries", ABCSeries, ABCDataFrame)
|
||||
Scalar = Union[str, int, float]
|
||||
Axis = Union[str, int]
|
||||
23
venv/lib/python3.6/site-packages/pandas/_version.py
Normal file
23
venv/lib/python3.6/site-packages/pandas/_version.py
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
# This file was generated by 'versioneer.py' (0.15) from
|
||||
# revision-control system data, or from the parent directory name of an
|
||||
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
||||
# of this file.
|
||||
|
||||
from warnings import catch_warnings
|
||||
with catch_warnings(record=True):
|
||||
import json
|
||||
import sys
|
||||
|
||||
version_json = '''
|
||||
{
|
||||
"dirty": false,
|
||||
"error": null,
|
||||
"full-revisionid": "d1accd032b648c9affd6dce1f81feb9c99422483",
|
||||
"version": "0.25.0"
|
||||
}
|
||||
''' # END VERSION_JSON
|
||||
|
||||
|
||||
def get_versions():
|
||||
return json.loads(version_json)
|
||||
2
venv/lib/python3.6/site-packages/pandas/api/__init__.py
Normal file
2
venv/lib/python3.6/site-packages/pandas/api/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
""" public toolkit API """
|
||||
from . import extensions, types # noqa
|
||||
@@ -0,0 +1,13 @@
|
||||
"""Public API for extending pandas objects."""
|
||||
from pandas.core.dtypes.dtypes import ( # noqa: F401
|
||||
ExtensionDtype,
|
||||
register_extension_dtype,
|
||||
)
|
||||
|
||||
from pandas.core.accessor import ( # noqa: F401
|
||||
register_dataframe_accessor,
|
||||
register_index_accessor,
|
||||
register_series_accessor,
|
||||
)
|
||||
from pandas.core.algorithms import take # noqa: F401
|
||||
from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin # noqa: F401
|
||||
@@ -0,0 +1,12 @@
|
||||
""" public toolkit API """
|
||||
|
||||
from pandas._libs.lib import infer_dtype # noqa: F401
|
||||
|
||||
from pandas.core.dtypes.api import * # noqa: F403, F401
|
||||
from pandas.core.dtypes.concat import union_categoricals # noqa: F401
|
||||
from pandas.core.dtypes.dtypes import ( # noqa: F401
|
||||
CategoricalDtype,
|
||||
DatetimeTZDtype,
|
||||
IntervalDtype,
|
||||
PeriodDtype,
|
||||
)
|
||||
26
venv/lib/python3.6/site-packages/pandas/arrays/__init__.py
Normal file
26
venv/lib/python3.6/site-packages/pandas/arrays/__init__.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
All of pandas' ExtensionArrays.
|
||||
|
||||
See :ref:`extending.extension-types` for more.
|
||||
"""
|
||||
from pandas.core.arrays import (
|
||||
Categorical,
|
||||
DatetimeArray,
|
||||
IntegerArray,
|
||||
IntervalArray,
|
||||
PandasArray,
|
||||
PeriodArray,
|
||||
SparseArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Categorical",
|
||||
"DatetimeArray",
|
||||
"IntegerArray",
|
||||
"IntervalArray",
|
||||
"PandasArray",
|
||||
"PeriodArray",
|
||||
"SparseArray",
|
||||
"TimedeltaArray",
|
||||
]
|
||||
66
venv/lib/python3.6/site-packages/pandas/compat/__init__.py
Normal file
66
venv/lib/python3.6/site-packages/pandas/compat/__init__.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
compat
|
||||
======
|
||||
|
||||
Cross-compatible functions for different versions of Python.
|
||||
|
||||
Other items:
|
||||
* platform checker
|
||||
"""
|
||||
import platform
|
||||
import struct
|
||||
import sys
|
||||
|
||||
PY36 = sys.version_info >= (3, 6)
|
||||
PY37 = sys.version_info >= (3, 7)
|
||||
PYPY = platform.python_implementation() == "PyPy"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# functions largely based / taken from the six module
|
||||
|
||||
# Much of the code in this module comes from Benjamin Peterson's six library.
|
||||
# The license for this library can be found in LICENSES/SIX and the code can be
|
||||
# found at https://bitbucket.org/gutworth/six
|
||||
|
||||
|
||||
def set_function_name(f, name, cls):
|
||||
"""
|
||||
Bind the name/qualname attributes of the function
|
||||
"""
|
||||
f.__name__ = name
|
||||
f.__qualname__ = "{klass}.{name}".format(klass=cls.__name__, name=name)
|
||||
f.__module__ = cls.__module__
|
||||
return f
|
||||
|
||||
|
||||
def raise_with_traceback(exc, traceback=Ellipsis):
|
||||
"""
|
||||
Raise exception with existing traceback.
|
||||
If traceback is not passed, uses sys.exc_info() to get traceback.
|
||||
"""
|
||||
if traceback == Ellipsis:
|
||||
_, _, traceback = sys.exc_info()
|
||||
raise exc.with_traceback(traceback)
|
||||
|
||||
|
||||
# https://github.com/pandas-dev/pandas/pull/9123
|
||||
def is_platform_little_endian():
|
||||
""" am I little endian """
|
||||
return sys.byteorder == "little"
|
||||
|
||||
|
||||
def is_platform_windows():
|
||||
return sys.platform == "win32" or sys.platform == "cygwin"
|
||||
|
||||
|
||||
def is_platform_linux():
|
||||
return sys.platform == "linux2"
|
||||
|
||||
|
||||
def is_platform_mac():
|
||||
return sys.platform == "darwin"
|
||||
|
||||
|
||||
def is_platform_32bit():
|
||||
return struct.calcsize("P") * 8 < 64
|
||||
111
venv/lib/python3.6/site-packages/pandas/compat/_optional.py
Normal file
111
venv/lib/python3.6/site-packages/pandas/compat/_optional.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import distutils.version
|
||||
import importlib
|
||||
import types
|
||||
import warnings
|
||||
|
||||
# Update install.rst when updating versions!
|
||||
|
||||
VERSIONS = {
|
||||
"bs4": "4.6.0",
|
||||
"bottleneck": "1.2.1",
|
||||
"fastparquet": "0.2.1",
|
||||
"gcsfs": "0.2.2",
|
||||
"lxml.etree": "3.8.0",
|
||||
"matplotlib": "2.2.2",
|
||||
"numexpr": "2.6.2",
|
||||
"odfpy": "1.3.0",
|
||||
"openpyxl": "2.4.8",
|
||||
"pandas_gbq": "0.8.0",
|
||||
"pyarrow": "0.9.0",
|
||||
"pytables": "3.4.2",
|
||||
"s3fs": "0.0.8",
|
||||
"scipy": "0.19.0",
|
||||
"sqlalchemy": "1.1.4",
|
||||
"tables": "3.4.2",
|
||||
"xarray": "0.8.2",
|
||||
"xlrd": "1.1.0",
|
||||
"xlwt": "1.2.0",
|
||||
"xlsxwriter": "0.9.8",
|
||||
}
|
||||
|
||||
message = (
|
||||
"Missing optional dependency '{name}'. {extra} "
|
||||
"Use pip or conda to install {name}."
|
||||
)
|
||||
version_message = (
|
||||
"Pandas requires version '{minimum_version}' or newer of '{name}' "
|
||||
"(version '{actual_version}' currently installed)."
|
||||
)
|
||||
|
||||
|
||||
def _get_version(module: types.ModuleType) -> str:
|
||||
version = getattr(module, "__version__", None)
|
||||
if version is None:
|
||||
# xlrd uses a capitalized attribute name
|
||||
version = getattr(module, "__VERSION__", None)
|
||||
|
||||
if version is None:
|
||||
raise ImportError("Can't determine version for {}".format(module.__name__))
|
||||
return version
|
||||
|
||||
|
||||
def import_optional_dependency(
|
||||
name: str, extra: str = "", raise_on_missing: bool = True, on_version: str = "raise"
|
||||
):
|
||||
"""
|
||||
Import an optional dependency.
|
||||
|
||||
By default, if a dependency is missing an ImportError with a nice
|
||||
message will be raised. If a dependency is present, but too old,
|
||||
we raise.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The module name. This should be top-level only, so that the
|
||||
version may be checked.
|
||||
extra : str
|
||||
Additional text to include in the ImportError message.
|
||||
raise_on_missing : bool, default True
|
||||
Whether to raise if the optional dependency is not found.
|
||||
When False and the module is not present, None is returned.
|
||||
on_version : str {'raise', 'warn'}
|
||||
What to do when a dependency's version is too old.
|
||||
|
||||
* raise : Raise an ImportError
|
||||
* warn : Warn that the version is too old. Returns None
|
||||
* ignore: Return the module, even if the version is too old.
|
||||
It's expected that users validate the version locally when
|
||||
using ``on_version="ignore"`` (see. ``io/html.py``)
|
||||
|
||||
Returns
|
||||
-------
|
||||
maybe_module : Optional[ModuleType]
|
||||
The imported module, when found and the version is correct.
|
||||
None is returned when the package is not found and `raise_on_missing`
|
||||
is False, or when the package's version is too old and `on_version`
|
||||
is ``'warn'``.
|
||||
"""
|
||||
try:
|
||||
module = importlib.import_module(name)
|
||||
except ImportError:
|
||||
if raise_on_missing:
|
||||
raise ImportError(message.format(name=name, extra=extra)) from None
|
||||
else:
|
||||
return None
|
||||
|
||||
minimum_version = VERSIONS.get(name)
|
||||
if minimum_version:
|
||||
version = _get_version(module)
|
||||
if distutils.version.LooseVersion(version) < minimum_version:
|
||||
assert on_version in {"warn", "raise", "ignore"}
|
||||
msg = version_message.format(
|
||||
minimum_version=minimum_version, name=name, actual_version=version
|
||||
)
|
||||
if on_version == "warn":
|
||||
warnings.warn(msg, UserWarning)
|
||||
return None
|
||||
elif on_version == "raise":
|
||||
raise ImportError(msg)
|
||||
|
||||
return module
|
||||
23
venv/lib/python3.6/site-packages/pandas/compat/chainmap.py
Normal file
23
venv/lib/python3.6/site-packages/pandas/compat/chainmap.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from collections import ChainMap
|
||||
|
||||
|
||||
class DeepChainMap(ChainMap):
|
||||
def __setitem__(self, key, value):
|
||||
for mapping in self.maps:
|
||||
if key in mapping:
|
||||
mapping[key] = value
|
||||
return
|
||||
self.maps[0][key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
for mapping in self.maps:
|
||||
if key in mapping:
|
||||
del mapping[key]
|
||||
return
|
||||
raise KeyError(key)
|
||||
|
||||
# override because the m parameter is introduced in Python 3.4
|
||||
def new_child(self, m=None):
|
||||
if m is None:
|
||||
m = {}
|
||||
return self.__class__(m, *self.maps)
|
||||
@@ -0,0 +1,74 @@
|
||||
""" support numpy compatibility across versions """
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
|
||||
# numpy versioning
|
||||
_np_version = np.__version__
|
||||
_nlv = LooseVersion(_np_version)
|
||||
_np_version_under1p14 = _nlv < LooseVersion("1.14")
|
||||
_np_version_under1p15 = _nlv < LooseVersion("1.15")
|
||||
_np_version_under1p16 = _nlv < LooseVersion("1.16")
|
||||
_np_version_under1p17 = _nlv < LooseVersion("1.17")
|
||||
_is_numpy_dev = ".dev" in str(_nlv)
|
||||
|
||||
|
||||
if _nlv < "1.13.3":
|
||||
raise ImportError(
|
||||
"this version of pandas is incompatible with "
|
||||
"numpy < 1.13.3\n"
|
||||
"your numpy version is {0}.\n"
|
||||
"Please upgrade numpy to >= 1.13.3 to use "
|
||||
"this pandas version".format(_np_version)
|
||||
)
|
||||
|
||||
|
||||
_tz_regex = re.compile("[+-]0000$")
|
||||
|
||||
|
||||
def tz_replacer(s):
|
||||
if isinstance(s, str):
|
||||
if s.endswith("Z"):
|
||||
s = s[:-1]
|
||||
elif _tz_regex.search(s):
|
||||
s = s[:-5]
|
||||
return s
|
||||
|
||||
|
||||
def np_datetime64_compat(s, *args, **kwargs):
|
||||
"""
|
||||
provide compat for construction of strings to numpy datetime64's with
|
||||
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
|
||||
warning, when need to pass '2015-01-01 09:00:00'
|
||||
"""
|
||||
s = tz_replacer(s)
|
||||
return np.datetime64(s, *args, **kwargs)
|
||||
|
||||
|
||||
def np_array_datetime64_compat(arr, *args, **kwargs):
|
||||
"""
|
||||
provide compat for construction of an array of strings to a
|
||||
np.array(..., dtype=np.datetime64(..))
|
||||
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
|
||||
warning, when need to pass '2015-01-01 09:00:00'
|
||||
"""
|
||||
# is_list_like
|
||||
if hasattr(arr, "__iter__") and not isinstance(arr, (str, bytes)):
|
||||
arr = [tz_replacer(s) for s in arr]
|
||||
else:
|
||||
arr = tz_replacer(arr)
|
||||
|
||||
return np.array(arr, *args, **kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"np",
|
||||
"_np_version",
|
||||
"_np_version_under1p14",
|
||||
"_np_version_under1p15",
|
||||
"_np_version_under1p16",
|
||||
"_np_version_under1p17",
|
||||
"_is_numpy_dev",
|
||||
]
|
||||
424
venv/lib/python3.6/site-packages/pandas/compat/numpy/function.py
Normal file
424
venv/lib/python3.6/site-packages/pandas/compat/numpy/function.py
Normal file
@@ -0,0 +1,424 @@
|
||||
"""
|
||||
For compatibility with numpy libraries, pandas functions or
|
||||
methods have to accept '*args' and '**kwargs' parameters to
|
||||
accommodate numpy arguments that are not actually used or
|
||||
respected in the pandas implementation.
|
||||
|
||||
To ensure that users do not abuse these parameters, validation
|
||||
is performed in 'validators.py' to make sure that any extra
|
||||
parameters passed correspond ONLY to those in the numpy signature.
|
||||
Part of that validation includes whether or not the user attempted
|
||||
to pass in non-default values for these extraneous parameters. As we
|
||||
want to discourage users from relying on these parameters when calling
|
||||
the pandas implementation, we want them only to pass in the default values
|
||||
for these parameters.
|
||||
|
||||
This module provides a set of commonly used default arguments for functions
|
||||
and methods that are spread throughout the codebase. This module will make it
|
||||
easier to adjust to future upstream changes in the analogous numpy signatures.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
from distutils.version import LooseVersion
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
from numpy import __version__ as _np_version, ndarray
|
||||
|
||||
from pandas._libs.lib import is_bool, is_integer
|
||||
from pandas.errors import UnsupportedFunctionCall
|
||||
from pandas.util._validators import (
|
||||
validate_args,
|
||||
validate_args_and_kwargs,
|
||||
validate_kwargs,
|
||||
)
|
||||
|
||||
|
||||
class CompatValidator:
|
||||
def __init__(self, defaults, fname=None, method=None, max_fname_arg_count=None):
|
||||
self.fname = fname
|
||||
self.method = method
|
||||
self.defaults = defaults
|
||||
self.max_fname_arg_count = max_fname_arg_count
|
||||
|
||||
def __call__(self, args, kwargs, fname=None, max_fname_arg_count=None, method=None):
|
||||
if args or kwargs:
|
||||
fname = self.fname if fname is None else fname
|
||||
max_fname_arg_count = (
|
||||
self.max_fname_arg_count
|
||||
if max_fname_arg_count is None
|
||||
else max_fname_arg_count
|
||||
)
|
||||
method = self.method if method is None else method
|
||||
|
||||
if method == "args":
|
||||
validate_args(fname, args, max_fname_arg_count, self.defaults)
|
||||
elif method == "kwargs":
|
||||
validate_kwargs(fname, kwargs, self.defaults)
|
||||
elif method == "both":
|
||||
validate_args_and_kwargs(
|
||||
fname, args, kwargs, max_fname_arg_count, self.defaults
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"invalid validation method " "'{method}'".format(method=method)
|
||||
)
|
||||
|
||||
|
||||
ARGMINMAX_DEFAULTS = dict(out=None)
|
||||
validate_argmin = CompatValidator(
|
||||
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_argmax = CompatValidator(
|
||||
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
|
||||
def process_skipna(skipna, args):
|
||||
if isinstance(skipna, ndarray) or skipna is None:
|
||||
args = (skipna,) + args
|
||||
skipna = True
|
||||
|
||||
return skipna, args
|
||||
|
||||
|
||||
def validate_argmin_with_skipna(skipna, args, kwargs):
|
||||
"""
|
||||
If 'Series.argmin' is called via the 'numpy' library,
|
||||
the third parameter in its signature is 'out', which
|
||||
takes either an ndarray or 'None', so check if the
|
||||
'skipna' parameter is either an instance of ndarray or
|
||||
is None, since 'skipna' itself should be a boolean
|
||||
"""
|
||||
|
||||
skipna, args = process_skipna(skipna, args)
|
||||
validate_argmin(args, kwargs)
|
||||
return skipna
|
||||
|
||||
|
||||
def validate_argmax_with_skipna(skipna, args, kwargs):
|
||||
"""
|
||||
If 'Series.argmax' is called via the 'numpy' library,
|
||||
the third parameter in its signature is 'out', which
|
||||
takes either an ndarray or 'None', so check if the
|
||||
'skipna' parameter is either an instance of ndarray or
|
||||
is None, since 'skipna' itself should be a boolean
|
||||
"""
|
||||
|
||||
skipna, args = process_skipna(skipna, args)
|
||||
validate_argmax(args, kwargs)
|
||||
return skipna
|
||||
|
||||
|
||||
ARGSORT_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Union[int, str]]]
|
||||
ARGSORT_DEFAULTS["axis"] = -1
|
||||
ARGSORT_DEFAULTS["kind"] = "quicksort"
|
||||
ARGSORT_DEFAULTS["order"] = None
|
||||
|
||||
if LooseVersion(_np_version) >= LooseVersion("1.17.0"):
|
||||
# GH-26361. NumPy added radix sort and changed default to None.
|
||||
ARGSORT_DEFAULTS["kind"] = None
|
||||
|
||||
|
||||
validate_argsort = CompatValidator(
|
||||
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
|
||||
)
|
||||
|
||||
# two different signatures of argsort, this second validation
|
||||
# for when the `kind` param is supported
|
||||
ARGSORT_DEFAULTS_KIND = OrderedDict() # type: OrderedDict[str, Optional[int]]
|
||||
ARGSORT_DEFAULTS_KIND["axis"] = -1
|
||||
ARGSORT_DEFAULTS_KIND["order"] = None
|
||||
validate_argsort_kind = CompatValidator(
|
||||
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
|
||||
)
|
||||
|
||||
|
||||
def validate_argsort_with_ascending(ascending, args, kwargs):
|
||||
"""
|
||||
If 'Categorical.argsort' is called via the 'numpy' library, the
|
||||
first parameter in its signature is 'axis', which takes either
|
||||
an integer or 'None', so check if the 'ascending' parameter has
|
||||
either integer type or is None, since 'ascending' itself should
|
||||
be a boolean
|
||||
"""
|
||||
|
||||
if is_integer(ascending) or ascending is None:
|
||||
args = (ascending,) + args
|
||||
ascending = True
|
||||
|
||||
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
|
||||
return ascending
|
||||
|
||||
|
||||
CLIP_DEFAULTS = dict(out=None) # type Dict[str, Any]
|
||||
validate_clip = CompatValidator(
|
||||
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
|
||||
)
|
||||
|
||||
|
||||
def validate_clip_with_axis(axis, args, kwargs):
|
||||
"""
|
||||
If 'NDFrame.clip' is called via the numpy library, the third
|
||||
parameter in its signature is 'out', which can takes an ndarray,
|
||||
so check if the 'axis' parameter is an instance of ndarray, since
|
||||
'axis' itself should either be an integer or None
|
||||
"""
|
||||
|
||||
if isinstance(axis, ndarray):
|
||||
args = (axis,) + args
|
||||
axis = None
|
||||
|
||||
validate_clip(args, kwargs)
|
||||
return axis
|
||||
|
||||
|
||||
COMPRESS_DEFAULTS = OrderedDict() # type: OrderedDict[str, Any]
|
||||
COMPRESS_DEFAULTS["axis"] = None
|
||||
COMPRESS_DEFAULTS["out"] = None
|
||||
validate_compress = CompatValidator(
|
||||
COMPRESS_DEFAULTS, fname="compress", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
CUM_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Any]
|
||||
CUM_FUNC_DEFAULTS["dtype"] = None
|
||||
CUM_FUNC_DEFAULTS["out"] = None
|
||||
validate_cum_func = CompatValidator(
|
||||
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_cumsum = CompatValidator(
|
||||
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
|
||||
def validate_cum_func_with_skipna(skipna, args, kwargs, name):
|
||||
"""
|
||||
If this function is called via the 'numpy' library, the third
|
||||
parameter in its signature is 'dtype', which takes either a
|
||||
'numpy' dtype or 'None', so check if the 'skipna' parameter is
|
||||
a boolean or not
|
||||
"""
|
||||
if not is_bool(skipna):
|
||||
args = (skipna,) + args
|
||||
skipna = True
|
||||
|
||||
validate_cum_func(args, kwargs, fname=name)
|
||||
return skipna
|
||||
|
||||
|
||||
ALLANY_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[bool]]
|
||||
ALLANY_DEFAULTS["dtype"] = None
|
||||
ALLANY_DEFAULTS["out"] = None
|
||||
ALLANY_DEFAULTS["keepdims"] = False
|
||||
validate_all = CompatValidator(
|
||||
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_any = CompatValidator(
|
||||
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
|
||||
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
|
||||
|
||||
MINMAX_DEFAULTS = dict(out=None, keepdims=False)
|
||||
validate_min = CompatValidator(
|
||||
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_max = CompatValidator(
|
||||
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
RESHAPE_DEFAULTS = dict(order="C") # type: Dict[str, str]
|
||||
validate_reshape = CompatValidator(
|
||||
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
REPEAT_DEFAULTS = dict(axis=None) # type: Dict[str, Any]
|
||||
validate_repeat = CompatValidator(
|
||||
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
ROUND_DEFAULTS = dict(out=None) # type: Dict[str, Any]
|
||||
validate_round = CompatValidator(
|
||||
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
SORT_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Union[int, str]]]
|
||||
SORT_DEFAULTS["axis"] = -1
|
||||
SORT_DEFAULTS["kind"] = "quicksort"
|
||||
SORT_DEFAULTS["order"] = None
|
||||
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
|
||||
|
||||
STAT_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[Any]]
|
||||
STAT_FUNC_DEFAULTS["dtype"] = None
|
||||
STAT_FUNC_DEFAULTS["out"] = None
|
||||
|
||||
PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
|
||||
SUM_DEFAULTS["keepdims"] = False
|
||||
SUM_DEFAULTS["initial"] = None
|
||||
|
||||
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
|
||||
MEDIAN_DEFAULTS["overwrite_input"] = False
|
||||
MEDIAN_DEFAULTS["keepdims"] = False
|
||||
|
||||
STAT_FUNC_DEFAULTS["keepdims"] = False
|
||||
|
||||
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
|
||||
validate_sum = CompatValidator(
|
||||
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_prod = CompatValidator(
|
||||
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_mean = CompatValidator(
|
||||
STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
|
||||
)
|
||||
validate_median = CompatValidator(
|
||||
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
|
||||
)
|
||||
|
||||
STAT_DDOF_FUNC_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[bool]]
|
||||
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
|
||||
STAT_DDOF_FUNC_DEFAULTS["out"] = None
|
||||
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
|
||||
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
|
||||
|
||||
TAKE_DEFAULTS = OrderedDict() # type: OrderedDict[str, Optional[str]]
|
||||
TAKE_DEFAULTS["out"] = None
|
||||
TAKE_DEFAULTS["mode"] = "raise"
|
||||
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
|
||||
|
||||
|
||||
def validate_take_with_convert(convert, args, kwargs):
|
||||
"""
|
||||
If this function is called via the 'numpy' library, the third
|
||||
parameter in its signature is 'axis', which takes either an
|
||||
ndarray or 'None', so check if the 'convert' parameter is either
|
||||
an instance of ndarray or is None
|
||||
"""
|
||||
|
||||
if isinstance(convert, ndarray) or convert is None:
|
||||
args = (convert,) + args
|
||||
convert = True
|
||||
|
||||
validate_take(args, kwargs, max_fname_arg_count=3, method="both")
|
||||
return convert
|
||||
|
||||
|
||||
TRANSPOSE_DEFAULTS = dict(axes=None)
|
||||
validate_transpose = CompatValidator(
|
||||
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
|
||||
)
|
||||
|
||||
|
||||
def validate_window_func(name, args, kwargs):
|
||||
numpy_args = ("axis", "dtype", "out")
|
||||
msg = (
|
||||
"numpy operations are not "
|
||||
"valid with window objects. "
|
||||
"Use .{func}() directly instead ".format(func=name)
|
||||
)
|
||||
|
||||
if len(args) > 0:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
for arg in numpy_args:
|
||||
if arg in kwargs:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
|
||||
def validate_rolling_func(name, args, kwargs):
|
||||
numpy_args = ("axis", "dtype", "out")
|
||||
msg = (
|
||||
"numpy operations are not "
|
||||
"valid with window objects. "
|
||||
"Use .rolling(...).{func}() instead ".format(func=name)
|
||||
)
|
||||
|
||||
if len(args) > 0:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
for arg in numpy_args:
|
||||
if arg in kwargs:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
|
||||
def validate_expanding_func(name, args, kwargs):
|
||||
numpy_args = ("axis", "dtype", "out")
|
||||
msg = (
|
||||
"numpy operations are not "
|
||||
"valid with window objects. "
|
||||
"Use .expanding(...).{func}() instead ".format(func=name)
|
||||
)
|
||||
|
||||
if len(args) > 0:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
for arg in numpy_args:
|
||||
if arg in kwargs:
|
||||
raise UnsupportedFunctionCall(msg)
|
||||
|
||||
|
||||
def validate_groupby_func(name, args, kwargs, allowed=None):
|
||||
"""
|
||||
'args' and 'kwargs' should be empty, except for allowed
|
||||
kwargs because all of
|
||||
their necessary parameters are explicitly listed in
|
||||
the function signature
|
||||
"""
|
||||
if allowed is None:
|
||||
allowed = []
|
||||
|
||||
kwargs = set(kwargs) - set(allowed)
|
||||
|
||||
if len(args) + len(kwargs) > 0:
|
||||
raise UnsupportedFunctionCall(
|
||||
(
|
||||
"numpy operations are not valid "
|
||||
"with groupby. Use .groupby(...)."
|
||||
"{func}() instead".format(func=name)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
|
||||
|
||||
|
||||
def validate_resampler_func(method, args, kwargs):
|
||||
"""
|
||||
'args' and 'kwargs' should be empty because all of
|
||||
their necessary parameters are explicitly listed in
|
||||
the function signature
|
||||
"""
|
||||
if len(args) + len(kwargs) > 0:
|
||||
if method in RESAMPLER_NUMPY_OPS:
|
||||
raise UnsupportedFunctionCall(
|
||||
(
|
||||
"numpy operations are not valid "
|
||||
"with resample. Use .resample(...)."
|
||||
"{func}() instead".format(func=method)
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise TypeError("too many arguments passed in")
|
||||
|
||||
|
||||
def validate_minmax_axis(axis):
|
||||
"""
|
||||
Ensure that the axis argument passed to min, max, argmin, or argmax is
|
||||
zero or None, as otherwise it will be incorrectly ignored.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
axis : int or None
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
"""
|
||||
ndim = 1 # hard-coded for Index
|
||||
if axis is None:
|
||||
return
|
||||
if axis >= ndim or (axis < 0 and ndim + axis < 0):
|
||||
raise ValueError(
|
||||
"`axis` must be fewer than the number of "
|
||||
"dimensions ({ndim})".format(ndim=ndim)
|
||||
)
|
||||
221
venv/lib/python3.6/site-packages/pandas/compat/pickle_compat.py
Normal file
221
venv/lib/python3.6/site-packages/pandas/compat/pickle_compat.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
Support pre-0.12 series pickle compatibility.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import pickle as pkl
|
||||
import sys
|
||||
|
||||
import pandas # noqa
|
||||
from pandas import Index
|
||||
|
||||
|
||||
def load_reduce(self):
|
||||
stack = self.stack
|
||||
args = stack.pop()
|
||||
func = stack[-1]
|
||||
|
||||
if len(args) and type(args[0]) is type:
|
||||
n = args[0].__name__ # noqa
|
||||
|
||||
try:
|
||||
stack[-1] = func(*args)
|
||||
return
|
||||
except Exception as e:
|
||||
|
||||
# If we have a deprecated function,
|
||||
# try to replace and try again.
|
||||
|
||||
msg = "_reconstruct: First argument must be a sub-type of ndarray"
|
||||
|
||||
if msg in str(e):
|
||||
try:
|
||||
cls = args[0]
|
||||
stack[-1] = object.__new__(cls)
|
||||
return
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# try to re-encode the arguments
|
||||
if getattr(self, "encoding", None) is not None:
|
||||
args = tuple(
|
||||
arg.encode(self.encoding) if isinstance(arg, str) else arg
|
||||
for arg in args
|
||||
)
|
||||
try:
|
||||
stack[-1] = func(*args)
|
||||
return
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# unknown exception, re-raise
|
||||
if getattr(self, "is_verbose", None):
|
||||
print(sys.exc_info())
|
||||
print(func, args)
|
||||
raise
|
||||
|
||||
|
||||
# If classes are moved, provide compat here.
|
||||
_class_locations_map = {
|
||||
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
|
||||
# 15477
|
||||
#
|
||||
# TODO: When FrozenNDArray is removed, add
|
||||
# the following lines for compat:
|
||||
#
|
||||
# ('pandas.core.base', 'FrozenNDArray'):
|
||||
# ('numpy', 'ndarray'),
|
||||
# ('pandas.core.indexes.frozen', 'FrozenNDArray'):
|
||||
# ('numpy', 'ndarray'),
|
||||
#
|
||||
# Afterwards, remove the current entry
|
||||
# for `pandas.core.base.FrozenNDArray`.
|
||||
("pandas.core.base", "FrozenNDArray"): (
|
||||
"pandas.core.indexes.frozen",
|
||||
"FrozenNDArray",
|
||||
),
|
||||
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
|
||||
# 10890
|
||||
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
|
||||
("pandas.sparse.series", "SparseTimeSeries"): (
|
||||
"pandas.core.sparse.series",
|
||||
"SparseSeries",
|
||||
),
|
||||
# 12588, extensions moving
|
||||
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
|
||||
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
|
||||
# 18543 moving period
|
||||
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
|
||||
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
|
||||
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
|
||||
("pandas.tslib", "__nat_unpickle"): (
|
||||
"pandas._libs.tslibs.nattype",
|
||||
"__nat_unpickle",
|
||||
),
|
||||
("pandas._libs.tslib", "__nat_unpickle"): (
|
||||
"pandas._libs.tslibs.nattype",
|
||||
"__nat_unpickle",
|
||||
),
|
||||
# 15998 top-level dirs moving
|
||||
("pandas.sparse.array", "SparseArray"): (
|
||||
"pandas.core.arrays.sparse",
|
||||
"SparseArray",
|
||||
),
|
||||
("pandas.sparse.series", "SparseSeries"): (
|
||||
"pandas.core.sparse.series",
|
||||
"SparseSeries",
|
||||
),
|
||||
("pandas.sparse.frame", "SparseDataFrame"): (
|
||||
"pandas.core.sparse.frame",
|
||||
"SparseDataFrame",
|
||||
),
|
||||
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
|
||||
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
|
||||
("pandas.indexes.numeric", "Int64Index"): (
|
||||
"pandas.core.indexes.numeric",
|
||||
"Int64Index",
|
||||
),
|
||||
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
|
||||
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
|
||||
("pandas.tseries.index", "_new_DatetimeIndex"): (
|
||||
"pandas.core.indexes.datetimes",
|
||||
"_new_DatetimeIndex",
|
||||
),
|
||||
("pandas.tseries.index", "DatetimeIndex"): (
|
||||
"pandas.core.indexes.datetimes",
|
||||
"DatetimeIndex",
|
||||
),
|
||||
("pandas.tseries.period", "PeriodIndex"): (
|
||||
"pandas.core.indexes.period",
|
||||
"PeriodIndex",
|
||||
),
|
||||
# 19269, arrays moving
|
||||
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
|
||||
# 19939, add timedeltaindex, float64index compat from 15998 move
|
||||
("pandas.tseries.tdi", "TimedeltaIndex"): (
|
||||
"pandas.core.indexes.timedeltas",
|
||||
"TimedeltaIndex",
|
||||
),
|
||||
("pandas.indexes.numeric", "Float64Index"): (
|
||||
"pandas.core.indexes.numeric",
|
||||
"Float64Index",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# our Unpickler sub-class to override methods and some dispatcher
|
||||
# functions for compat
|
||||
|
||||
|
||||
class Unpickler(pkl._Unpickler): # type: ignore
|
||||
def find_class(self, module, name):
|
||||
# override superclass
|
||||
key = (module, name)
|
||||
module, name = _class_locations_map.get(key, key)
|
||||
return super().find_class(module, name)
|
||||
|
||||
|
||||
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
|
||||
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
|
||||
|
||||
|
||||
def load_newobj(self):
|
||||
args = self.stack.pop()
|
||||
cls = self.stack[-1]
|
||||
|
||||
# compat
|
||||
if issubclass(cls, Index):
|
||||
obj = object.__new__(cls)
|
||||
else:
|
||||
obj = cls.__new__(cls, *args)
|
||||
|
||||
self.stack[-1] = obj
|
||||
|
||||
|
||||
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
|
||||
|
||||
|
||||
def load_newobj_ex(self):
|
||||
kwargs = self.stack.pop()
|
||||
args = self.stack.pop()
|
||||
cls = self.stack.pop()
|
||||
|
||||
# compat
|
||||
if issubclass(cls, Index):
|
||||
obj = object.__new__(cls)
|
||||
else:
|
||||
obj = cls.__new__(cls, *args, **kwargs)
|
||||
self.append(obj)
|
||||
|
||||
|
||||
try:
|
||||
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
|
||||
|
||||
def load(fh, encoding=None, is_verbose=False):
|
||||
"""load a pickle, with a provided encoding
|
||||
|
||||
if compat is True:
|
||||
fake the old class hierarchy
|
||||
if it works, then return the new type objects
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fh : a filelike object
|
||||
encoding : an optional encoding
|
||||
is_verbose : show exception output
|
||||
"""
|
||||
|
||||
try:
|
||||
fh.seek(0)
|
||||
if encoding is not None:
|
||||
up = Unpickler(fh, encoding=encoding)
|
||||
else:
|
||||
up = Unpickler(fh)
|
||||
up.is_verbose = is_verbose
|
||||
|
||||
return up.load()
|
||||
except (ValueError, TypeError):
|
||||
raise
|
||||
843
venv/lib/python3.6/site-packages/pandas/conftest.py
Normal file
843
venv/lib/python3.6/site-packages/pandas/conftest.py
Normal file
@@ -0,0 +1,843 @@
|
||||
from datetime import date, time, timedelta, timezone
|
||||
from decimal import Decimal
|
||||
import operator
|
||||
import os
|
||||
|
||||
from dateutil.tz import tzlocal, tzutc
|
||||
import hypothesis
|
||||
from hypothesis import strategies as st
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pytz import FixedOffset, utc
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas.core import ops
|
||||
import pandas.util.testing as tm
|
||||
|
||||
hypothesis.settings.register_profile(
|
||||
"ci",
|
||||
# Hypothesis timing checks are tuned for scalars by default, so we bump
|
||||
# them from 200ms to 500ms per test case as the global default. If this
|
||||
# is too short for a specific test, (a) try to make it faster, and (b)
|
||||
# if it really is slow add `@settings(deadline=...)` with a working value,
|
||||
# or `deadline=None` to entirely disable timeouts for that test.
|
||||
deadline=500,
|
||||
suppress_health_check=(hypothesis.HealthCheck.too_slow,),
|
||||
)
|
||||
hypothesis.settings.load_profile("ci")
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption("--skip-slow", action="store_true", help="skip slow tests")
|
||||
parser.addoption("--skip-network", action="store_true", help="skip network tests")
|
||||
parser.addoption("--skip-db", action="store_true", help="skip db tests")
|
||||
parser.addoption(
|
||||
"--run-high-memory", action="store_true", help="run high memory tests"
|
||||
)
|
||||
parser.addoption("--only-slow", action="store_true", help="run only slow tests")
|
||||
parser.addoption(
|
||||
"--strict-data-files",
|
||||
action="store_true",
|
||||
help="Fail if a test is skipped for missing data file.",
|
||||
)
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
if "slow" in item.keywords and item.config.getoption("--skip-slow"):
|
||||
pytest.skip("skipping due to --skip-slow")
|
||||
|
||||
if "slow" not in item.keywords and item.config.getoption("--only-slow"):
|
||||
pytest.skip("skipping due to --only-slow")
|
||||
|
||||
if "network" in item.keywords and item.config.getoption("--skip-network"):
|
||||
pytest.skip("skipping due to --skip-network")
|
||||
|
||||
if "db" in item.keywords and item.config.getoption("--skip-db"):
|
||||
pytest.skip("skipping due to --skip-db")
|
||||
|
||||
if "high_memory" in item.keywords and not item.config.getoption(
|
||||
"--run-high-memory"
|
||||
):
|
||||
pytest.skip("skipping high memory test since --run-high-memory was not set")
|
||||
|
||||
|
||||
# Configurations for all tests and all test modules
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure_tests():
|
||||
pd.set_option("chained_assignment", "raise")
|
||||
|
||||
|
||||
# For running doctests: make np and pd names available
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def add_imports(doctest_namespace):
|
||||
doctest_namespace["np"] = np
|
||||
doctest_namespace["pd"] = pd
|
||||
|
||||
|
||||
@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])
|
||||
def spmatrix(request):
|
||||
from scipy import sparse
|
||||
|
||||
return getattr(sparse, request.param + "_matrix")
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: "axis {!r}".format(x))
|
||||
def axis(request):
|
||||
"""
|
||||
Fixture for returning the axis numbers of a DataFrame.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
axis_frame = axis
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, "index"], ids=lambda x: "axis {!r}".format(x))
|
||||
def axis_series(request):
|
||||
"""
|
||||
Fixture for returning the axis numbers of a Series.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ip():
|
||||
"""
|
||||
Get an instance of IPython.InteractiveShell.
|
||||
|
||||
Will raise a skip if IPython is not installed.
|
||||
"""
|
||||
|
||||
pytest.importorskip("IPython", minversion="6.0.0")
|
||||
from IPython.core.interactiveshell import InteractiveShell
|
||||
|
||||
return InteractiveShell()
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False, None])
|
||||
def observed(request):
|
||||
""" pass in the observed keyword to groupby for [True, False]
|
||||
This indicates whether categoricals should return values for
|
||||
values which are not in the grouper [False / None], or only values which
|
||||
appear in the grouper [True]. [None] is supported for future compatibility
|
||||
if we decide to change the default (and would need to warn if this
|
||||
parameter is not passed)"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False, None])
|
||||
def ordered_fixture(request):
|
||||
"""Boolean 'ordered' parameter for Categorical."""
|
||||
return request.param
|
||||
|
||||
|
||||
_all_arithmetic_operators = [
|
||||
"__add__",
|
||||
"__radd__",
|
||||
"__sub__",
|
||||
"__rsub__",
|
||||
"__mul__",
|
||||
"__rmul__",
|
||||
"__floordiv__",
|
||||
"__rfloordiv__",
|
||||
"__truediv__",
|
||||
"__rtruediv__",
|
||||
"__pow__",
|
||||
"__rpow__",
|
||||
"__mod__",
|
||||
"__rmod__",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_arithmetic_operators)
|
||||
def all_arithmetic_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common arithmetic operations
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
operator.add,
|
||||
ops.radd,
|
||||
operator.sub,
|
||||
ops.rsub,
|
||||
operator.mul,
|
||||
ops.rmul,
|
||||
operator.truediv,
|
||||
ops.rtruediv,
|
||||
operator.floordiv,
|
||||
ops.rfloordiv,
|
||||
operator.mod,
|
||||
ops.rmod,
|
||||
operator.pow,
|
||||
ops.rpow,
|
||||
]
|
||||
)
|
||||
def all_arithmetic_functions(request):
|
||||
"""
|
||||
Fixture for operator and roperator arithmetic functions.
|
||||
|
||||
Note: This includes divmod and rdivmod, whereas all_arithmetic_operators
|
||||
does not.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_all_numeric_reductions = [
|
||||
"sum",
|
||||
"max",
|
||||
"min",
|
||||
"mean",
|
||||
"prod",
|
||||
"std",
|
||||
"var",
|
||||
"median",
|
||||
"kurt",
|
||||
"skew",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_numeric_reductions)
|
||||
def all_numeric_reductions(request):
|
||||
"""
|
||||
Fixture for numeric reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_all_boolean_reductions = ["all", "any"]
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_boolean_reductions)
|
||||
def all_boolean_reductions(request):
|
||||
"""
|
||||
Fixture for boolean reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_cython_table = pd.core.base.SelectionMixin._cython_table.items()
|
||||
|
||||
|
||||
@pytest.fixture(params=list(_cython_table))
|
||||
def cython_table_items(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def _get_cython_table_params(ndframe, func_names_and_expected):
|
||||
"""combine frame, functions from SelectionMixin._cython_table
|
||||
keys and expected result.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ndframe : DataFrame or Series
|
||||
func_names_and_expected : Sequence of two items
|
||||
The first item is a name of a NDFrame method ('sum', 'prod') etc.
|
||||
The second item is the expected return value
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : list
|
||||
List of three items (DataFrame, function, expected result)
|
||||
"""
|
||||
results = []
|
||||
for func_name, expected in func_names_and_expected:
|
||||
results.append((ndframe, func_name, expected))
|
||||
results += [
|
||||
(ndframe, func, expected)
|
||||
for func, name in _cython_table
|
||||
if name == func_name
|
||||
]
|
||||
return results
|
||||
|
||||
|
||||
@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
|
||||
def all_compare_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common compare operations
|
||||
|
||||
* >=
|
||||
* >
|
||||
* ==
|
||||
* !=
|
||||
* <
|
||||
* <=
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"])
|
||||
def compare_operators_no_eq_ne(request):
|
||||
"""
|
||||
Fixture for dunder names for compare operations except == and !=
|
||||
|
||||
* >=
|
||||
* >
|
||||
* <
|
||||
* <=
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "gzip", "bz2", "zip", "xz"])
|
||||
def compression(request):
|
||||
"""
|
||||
Fixture for trying common compression types in compression tests
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["gzip", "bz2", "zip", "xz"])
|
||||
def compression_only(request):
|
||||
"""
|
||||
Fixture for trying common compression types in compression tests excluding
|
||||
uncompressed case
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def writable(request):
|
||||
"""
|
||||
Fixture that an array is writable
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def datetime_tz_utc():
|
||||
return timezone.utc
|
||||
|
||||
|
||||
@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc])
|
||||
def utc_fixture(request):
|
||||
"""
|
||||
Fixture to provide variants of UTC timezone strings and tzinfo objects
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["inner", "outer", "left", "right"])
|
||||
def join_type(request):
|
||||
"""
|
||||
Fixture for trying all types of join operations
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def strict_data_files(pytestconfig):
|
||||
return pytestconfig.getoption("--strict-data-files")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def datapath(strict_data_files):
|
||||
"""Get the path to a data file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
Path to the file, relative to ``pandas/tests/``
|
||||
|
||||
Returns
|
||||
-------
|
||||
path : path including ``pandas/tests``.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the path doesn't exist and the --strict-data-files option is set.
|
||||
"""
|
||||
BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
|
||||
|
||||
def deco(*args):
|
||||
path = os.path.join(BASE_PATH, *args)
|
||||
if not os.path.exists(path):
|
||||
if strict_data_files:
|
||||
msg = "Could not find file {} and --strict-data-files is set."
|
||||
raise ValueError(msg.format(path))
|
||||
else:
|
||||
msg = "Could not find {}."
|
||||
pytest.skip(msg.format(path))
|
||||
return path
|
||||
|
||||
return deco
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def iris(datapath):
|
||||
"""The iris dataset as a DataFrame."""
|
||||
return pd.read_csv(datapath("data", "iris.csv"))
|
||||
|
||||
|
||||
@pytest.fixture(params=["nlargest", "nsmallest"])
|
||||
def nselect_method(request):
|
||||
"""
|
||||
Fixture for trying all nselect methods
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["left", "right", "both", "neither"])
|
||||
def closed(request):
|
||||
"""
|
||||
Fixture for trying all interval closed parameters
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["left", "right", "both", "neither"])
|
||||
def other_closed(request):
|
||||
"""
|
||||
Secondary closed fixture to allow parametrizing over all pairs of closed
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN")])
|
||||
def nulls_fixture(request):
|
||||
"""
|
||||
Fixture for each null type in pandas
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, np.nan, pd.NaT])
|
||||
def unique_nulls_fixture(request):
|
||||
"""
|
||||
Fixture for each null type in pandas, each null type exactly once
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# Generate cartesian product of unique_nulls_fixture:
|
||||
unique_nulls_fixture2 = unique_nulls_fixture
|
||||
|
||||
|
||||
TIMEZONES = [
|
||||
None,
|
||||
"UTC",
|
||||
"US/Eastern",
|
||||
"Asia/Tokyo",
|
||||
"dateutil/US/Pacific",
|
||||
"dateutil/Asia/Singapore",
|
||||
tzutc(),
|
||||
tzlocal(),
|
||||
FixedOffset(300),
|
||||
FixedOffset(0),
|
||||
FixedOffset(-300),
|
||||
timezone.utc,
|
||||
timezone(timedelta(hours=1)),
|
||||
timezone(timedelta(hours=-1), name="foo"),
|
||||
]
|
||||
TIMEZONE_IDS = [repr(i) for i in TIMEZONES]
|
||||
|
||||
|
||||
@td.parametrize_fixture_doc(str(TIMEZONE_IDS))
|
||||
@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS)
|
||||
def tz_naive_fixture(request):
|
||||
"""
|
||||
Fixture for trying timezones including default (None): {0}
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:]))
|
||||
@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:])
|
||||
def tz_aware_fixture(request):
|
||||
"""
|
||||
Fixture for trying explicit timezones: {0}
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# Generate cartesian product of tz_aware_fixture:
|
||||
tz_aware_fixture2 = tz_aware_fixture
|
||||
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Dtypes
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
|
||||
UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
|
||||
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
|
||||
SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
|
||||
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
|
||||
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
|
||||
|
||||
FLOAT_DTYPES = [float, "float32", "float64"]
|
||||
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
|
||||
STRING_DTYPES = [str, "str", "U"]
|
||||
|
||||
DATETIME64_DTYPES = ["datetime64[ns]", "M8[ns]"]
|
||||
TIMEDELTA64_DTYPES = ["timedelta64[ns]", "m8[ns]"]
|
||||
|
||||
BOOL_DTYPES = [bool, "bool"]
|
||||
BYTES_DTYPES = [bytes, "bytes"]
|
||||
OBJECT_DTYPES = [object, "object"]
|
||||
|
||||
ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
|
||||
ALL_NUMPY_DTYPES = (
|
||||
ALL_REAL_DTYPES
|
||||
+ COMPLEX_DTYPES
|
||||
+ STRING_DTYPES
|
||||
+ DATETIME64_DTYPES
|
||||
+ TIMEDELTA64_DTYPES
|
||||
+ BOOL_DTYPES
|
||||
+ OBJECT_DTYPES
|
||||
+ BYTES_DTYPES
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=STRING_DTYPES)
|
||||
def string_dtype(request):
|
||||
"""Parametrized fixture for string dtypes.
|
||||
|
||||
* str
|
||||
* 'str'
|
||||
* 'U'
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=BYTES_DTYPES)
|
||||
def bytes_dtype(request):
|
||||
"""Parametrized fixture for bytes dtypes.
|
||||
|
||||
* bytes
|
||||
* 'bytes'
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=OBJECT_DTYPES)
|
||||
def object_dtype(request):
|
||||
"""Parametrized fixture for object dtypes.
|
||||
|
||||
* object
|
||||
* 'object'
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=DATETIME64_DTYPES)
|
||||
def datetime64_dtype(request):
|
||||
"""Parametrized fixture for datetime64 dtypes.
|
||||
|
||||
* 'datetime64[ns]'
|
||||
* 'M8[ns]'
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=TIMEDELTA64_DTYPES)
|
||||
def timedelta64_dtype(request):
|
||||
"""Parametrized fixture for timedelta64 dtypes.
|
||||
|
||||
* 'timedelta64[ns]'
|
||||
* 'm8[ns]'
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=FLOAT_DTYPES)
|
||||
def float_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for float dtypes.
|
||||
|
||||
* float
|
||||
* 'float32'
|
||||
* 'float64'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=COMPLEX_DTYPES)
|
||||
def complex_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for complex dtypes.
|
||||
|
||||
* complex
|
||||
* 'complex64'
|
||||
* 'complex128'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=SIGNED_INT_DTYPES)
|
||||
def sint_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for signed integer dtypes.
|
||||
|
||||
* int
|
||||
* 'int8'
|
||||
* 'int16'
|
||||
* 'int32'
|
||||
* 'int64'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=UNSIGNED_INT_DTYPES)
|
||||
def uint_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for unsigned integer dtypes.
|
||||
|
||||
* 'uint8'
|
||||
* 'uint16'
|
||||
* 'uint32'
|
||||
* 'uint64'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=ALL_INT_DTYPES)
|
||||
def any_int_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for any integer dtype.
|
||||
|
||||
* int
|
||||
* 'int8'
|
||||
* 'uint8'
|
||||
* 'int16'
|
||||
* 'uint16'
|
||||
* 'int32'
|
||||
* 'uint32'
|
||||
* 'int64'
|
||||
* 'uint64'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=ALL_REAL_DTYPES)
|
||||
def any_real_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for any (purely) real numeric dtype.
|
||||
|
||||
* int
|
||||
* 'int8'
|
||||
* 'uint8'
|
||||
* 'int16'
|
||||
* 'uint16'
|
||||
* 'int32'
|
||||
* 'uint32'
|
||||
* 'int64'
|
||||
* 'uint64'
|
||||
* float
|
||||
* 'float32'
|
||||
* 'float64'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=ALL_NUMPY_DTYPES)
|
||||
def any_numpy_dtype(request):
|
||||
"""
|
||||
Parameterized fixture for all numpy dtypes.
|
||||
|
||||
* bool
|
||||
* 'bool'
|
||||
* int
|
||||
* 'int8'
|
||||
* 'uint8'
|
||||
* 'int16'
|
||||
* 'uint16'
|
||||
* 'int32'
|
||||
* 'uint32'
|
||||
* 'int64'
|
||||
* 'uint64'
|
||||
* float
|
||||
* 'float32'
|
||||
* 'float64'
|
||||
* complex
|
||||
* 'complex64'
|
||||
* 'complex128'
|
||||
* str
|
||||
* 'str'
|
||||
* 'U'
|
||||
* bytes
|
||||
* 'bytes'
|
||||
* 'datetime64[ns]'
|
||||
* 'M8[ns]'
|
||||
* 'timedelta64[ns]'
|
||||
* 'm8[ns]'
|
||||
* object
|
||||
* 'object'
|
||||
"""
|
||||
|
||||
return request.param
|
||||
|
||||
|
||||
# categoricals are handled separately
|
||||
_any_skipna_inferred_dtype = [
|
||||
("string", ["a", np.nan, "c"]),
|
||||
("bytes", [b"a", np.nan, b"c"]),
|
||||
("empty", [np.nan, np.nan, np.nan]),
|
||||
("empty", []),
|
||||
("mixed-integer", ["a", np.nan, 2]),
|
||||
("mixed", ["a", np.nan, 2.0]),
|
||||
("floating", [1.0, np.nan, 2.0]),
|
||||
("integer", [1, np.nan, 2]),
|
||||
("mixed-integer-float", [1, np.nan, 2.0]),
|
||||
("decimal", [Decimal(1), np.nan, Decimal(2)]),
|
||||
("boolean", [True, np.nan, False]),
|
||||
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
|
||||
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
|
||||
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
|
||||
# The following two dtypes are commented out due to GH 23554
|
||||
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
|
||||
# ('timedelta64', [np.timedelta64(1, 'D'),
|
||||
# np.nan, np.timedelta64(2, 'D')]),
|
||||
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
|
||||
("time", [time(1), np.nan, time(2)]),
|
||||
("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
|
||||
("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
|
||||
]
|
||||
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
|
||||
|
||||
|
||||
@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
|
||||
def any_skipna_inferred_dtype(request):
|
||||
"""
|
||||
Fixture for all inferred dtypes from _libs.lib.infer_dtype
|
||||
|
||||
The covered (inferred) types are:
|
||||
* 'string'
|
||||
* 'empty'
|
||||
* 'bytes'
|
||||
* 'mixed'
|
||||
* 'mixed-integer'
|
||||
* 'mixed-integer-float'
|
||||
* 'floating'
|
||||
* 'integer'
|
||||
* 'decimal'
|
||||
* 'boolean'
|
||||
* 'datetime64'
|
||||
* 'datetime'
|
||||
* 'date'
|
||||
* 'timedelta'
|
||||
* 'time'
|
||||
* 'period'
|
||||
* 'interval'
|
||||
|
||||
Returns
|
||||
-------
|
||||
inferred_dtype : str
|
||||
The string for the inferred dtype from _libs.lib.infer_dtype
|
||||
values : np.ndarray
|
||||
An array of object dtype that will be inferred to have
|
||||
`inferred_dtype`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import pandas._libs.lib as lib
|
||||
>>>
|
||||
>>> def test_something(any_skipna_inferred_dtype):
|
||||
... inferred_dtype, values = any_skipna_inferred_dtype
|
||||
... # will pass
|
||||
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
|
||||
"""
|
||||
inferred_dtype, values = request.param
|
||||
values = np.array(values, dtype=object) # object dtype to avoid casting
|
||||
|
||||
# correctness of inference tested in tests/dtypes/test_inference.py
|
||||
return inferred_dtype, values
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
getattr(pd.offsets, o)
|
||||
for o in pd.offsets.__all__
|
||||
if issubclass(getattr(pd.offsets, o), pd.offsets.Tick)
|
||||
]
|
||||
)
|
||||
def tick_classes(request):
|
||||
"""
|
||||
Fixture for Tick based datetime offsets available for a time series.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Global setup for tests using Hypothesis
|
||||
|
||||
|
||||
# Registering these strategies makes them globally available via st.from_type,
|
||||
# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
|
||||
for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split():
|
||||
cls = getattr(pd.tseries.offsets, name)
|
||||
st.register_type_strategy(
|
||||
cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans())
|
||||
)
|
||||
|
||||
for name in "YearBegin YearEnd BYearBegin BYearEnd".split():
|
||||
cls = getattr(pd.tseries.offsets, name)
|
||||
st.register_type_strategy(
|
||||
cls,
|
||||
st.builds(
|
||||
cls,
|
||||
n=st.integers(-5, 5),
|
||||
normalize=st.booleans(),
|
||||
month=st.integers(min_value=1, max_value=12),
|
||||
),
|
||||
)
|
||||
|
||||
for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split():
|
||||
cls = getattr(pd.tseries.offsets, name)
|
||||
st.register_type_strategy(
|
||||
cls,
|
||||
st.builds(
|
||||
cls,
|
||||
n=st.integers(-24, 24),
|
||||
normalize=st.booleans(),
|
||||
startingMonth=st.integers(min_value=1, max_value=12),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame():
|
||||
"""
|
||||
Fixture for DataFrame of floats with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'].
|
||||
|
||||
A B C D
|
||||
P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465
|
||||
qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901
|
||||
tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433
|
||||
wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651
|
||||
M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938
|
||||
QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053
|
||||
r78Jwns6dn -0.653707 0.883127 0.682199 0.206159
|
||||
... ... ... ... ...
|
||||
IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316
|
||||
lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
|
||||
qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121
|
||||
yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962
|
||||
65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987
|
||||
eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871
|
||||
xSucinXxuV -1.263557 0.252799 -0.552247 0.400426
|
||||
|
||||
[30 rows x 4 columns]
|
||||
"""
|
||||
return DataFrame(tm.getSeriesData())
|
||||
307
venv/lib/python3.6/site-packages/pandas/core/accessor.py
Normal file
307
venv/lib/python3.6/site-packages/pandas/core/accessor.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
|
||||
accessor.py contains base classes for implementing accessor properties
|
||||
that can be mixed into or pinned onto other pandas classes.
|
||||
|
||||
"""
|
||||
from typing import Set
|
||||
import warnings
|
||||
|
||||
from pandas.util._decorators import Appender
|
||||
|
||||
|
||||
class DirNamesMixin:
|
||||
_accessors = set() # type: Set[str]
|
||||
_deprecations = frozenset(
|
||||
["asobject", "base", "data", "flags", "itemsize", "strides"]
|
||||
)
|
||||
|
||||
def _dir_deletions(self):
|
||||
"""
|
||||
Delete unwanted __dir__ for this object.
|
||||
"""
|
||||
return self._accessors | self._deprecations
|
||||
|
||||
def _dir_additions(self):
|
||||
"""
|
||||
Add additional __dir__ for this object.
|
||||
"""
|
||||
rv = set()
|
||||
for accessor in self._accessors:
|
||||
try:
|
||||
getattr(self, accessor)
|
||||
rv.add(accessor)
|
||||
except AttributeError:
|
||||
pass
|
||||
return rv
|
||||
|
||||
def __dir__(self):
|
||||
"""
|
||||
Provide method name lookup and completion
|
||||
Only provide 'public' methods.
|
||||
"""
|
||||
rv = set(dir(type(self)))
|
||||
rv = (rv - self._dir_deletions()) | self._dir_additions()
|
||||
return sorted(rv)
|
||||
|
||||
|
||||
class PandasDelegate:
|
||||
"""
|
||||
An abstract base class for delegating methods/properties.
|
||||
"""
|
||||
|
||||
def _delegate_property_get(self, name, *args, **kwargs):
|
||||
raise TypeError("You cannot access the " "property {name}".format(name=name))
|
||||
|
||||
def _delegate_property_set(self, name, value, *args, **kwargs):
|
||||
raise TypeError("The property {name} cannot be set".format(name=name))
|
||||
|
||||
def _delegate_method(self, name, *args, **kwargs):
|
||||
raise TypeError("You cannot call method {name}".format(name=name))
|
||||
|
||||
@classmethod
|
||||
def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite=False):
|
||||
"""
|
||||
Add accessors to cls from the delegate class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cls : the class to add the methods/properties to
|
||||
delegate : the class to get methods/properties & doc-strings
|
||||
accessors : string list of accessors to add
|
||||
typ : 'property' or 'method'
|
||||
overwrite : boolean, default False
|
||||
overwrite the method/property in the target class if it exists.
|
||||
"""
|
||||
|
||||
def _create_delegator_property(name):
|
||||
def _getter(self):
|
||||
return self._delegate_property_get(name)
|
||||
|
||||
def _setter(self, new_values):
|
||||
return self._delegate_property_set(name, new_values)
|
||||
|
||||
_getter.__name__ = name
|
||||
_setter.__name__ = name
|
||||
|
||||
return property(
|
||||
fget=_getter, fset=_setter, doc=getattr(delegate, name).__doc__
|
||||
)
|
||||
|
||||
def _create_delegator_method(name):
|
||||
def f(self, *args, **kwargs):
|
||||
return self._delegate_method(name, *args, **kwargs)
|
||||
|
||||
f.__name__ = name
|
||||
f.__doc__ = getattr(delegate, name).__doc__
|
||||
|
||||
return f
|
||||
|
||||
for name in accessors:
|
||||
|
||||
if typ == "property":
|
||||
f = _create_delegator_property(name)
|
||||
else:
|
||||
f = _create_delegator_method(name)
|
||||
|
||||
# don't overwrite existing methods/properties
|
||||
if overwrite or not hasattr(cls, name):
|
||||
setattr(cls, name, f)
|
||||
|
||||
|
||||
def delegate_names(delegate, accessors, typ, overwrite=False):
|
||||
"""
|
||||
Add delegated names to a class using a class decorator. This provides
|
||||
an alternative usage to directly calling `_add_delegate_accessors`
|
||||
below a class definition.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delegate : object
|
||||
the class to get methods/properties & doc-strings
|
||||
accessors : Sequence[str]
|
||||
List of accessor to add
|
||||
typ : {'property', 'method'}
|
||||
overwrite : boolean, default False
|
||||
overwrite the method/property in the target class if it exists
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
A class decorator.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@delegate_names(Categorical, ["categories", "ordered"], "property")
|
||||
class CategoricalAccessor(PandasDelegate):
|
||||
[...]
|
||||
"""
|
||||
|
||||
def add_delegate_accessors(cls):
|
||||
cls._add_delegate_accessors(delegate, accessors, typ, overwrite=overwrite)
|
||||
return cls
|
||||
|
||||
return add_delegate_accessors
|
||||
|
||||
|
||||
# Ported with modifications from xarray
|
||||
# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
|
||||
# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
|
||||
# 2. We use a UserWarning instead of a custom Warning
|
||||
|
||||
|
||||
class CachedAccessor:
|
||||
"""
|
||||
Custom property-like object (descriptor) for caching accessors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The namespace this will be accessed under, e.g. ``df.foo``
|
||||
accessor : cls
|
||||
The class with the extension methods. The class' __init__ method
|
||||
should expect one of a ``Series``, ``DataFrame`` or ``Index`` as
|
||||
the single argument ``data``
|
||||
"""
|
||||
|
||||
def __init__(self, name, accessor):
|
||||
self._name = name
|
||||
self._accessor = accessor
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None:
|
||||
# we're accessing the attribute of the class, i.e., Dataset.geo
|
||||
return self._accessor
|
||||
accessor_obj = self._accessor(obj)
|
||||
# Replace the property with the accessor object. Inspired by:
|
||||
# http://www.pydanny.com/cached-property.html
|
||||
# We need to use object.__setattr__ because we overwrite __setattr__ on
|
||||
# NDFrame
|
||||
object.__setattr__(obj, self._name, accessor_obj)
|
||||
return accessor_obj
|
||||
|
||||
|
||||
def _register_accessor(name, cls):
|
||||
def decorator(accessor):
|
||||
if hasattr(cls, name):
|
||||
warnings.warn(
|
||||
"registration of accessor {!r} under name {!r} for type "
|
||||
"{!r} is overriding a preexisting attribute with the same "
|
||||
"name.".format(accessor, name, cls),
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
setattr(cls, name, CachedAccessor(name, accessor))
|
||||
cls._accessors.add(name)
|
||||
return accessor
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
_doc = """
|
||||
Register a custom accessor on %(klass)s objects.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name under which the accessor should be registered. A warning is issued
|
||||
if this name conflicts with a preexisting attribute.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
A class decorator.
|
||||
|
||||
See Also
|
||||
--------
|
||||
%(others)s
|
||||
|
||||
Notes
|
||||
-----
|
||||
When accessed, your accessor will be initialized with the pandas object
|
||||
the user is interacting with. So the signature must be
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def __init__(self, pandas_object): # noqa: E999
|
||||
...
|
||||
|
||||
For consistency with pandas methods, you should raise an ``AttributeError``
|
||||
if the data passed to your accessor has an incorrect dtype.
|
||||
|
||||
>>> pd.Series(['a', 'b']).dt
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AttributeError: Can only use .dt accessor with datetimelike values
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
In your library code::
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@pd.api.extensions.register_dataframe_accessor("geo")
|
||||
class GeoAccessor:
|
||||
def __init__(self, pandas_obj):
|
||||
self._obj = pandas_obj
|
||||
|
||||
@property
|
||||
def center(self):
|
||||
# return the geographic center point of this DataFrame
|
||||
lat = self._obj.latitude
|
||||
lon = self._obj.longitude
|
||||
return (float(lon.mean()), float(lat.mean()))
|
||||
|
||||
def plot(self):
|
||||
# plot this array's data on a map, e.g., using Cartopy
|
||||
pass
|
||||
|
||||
Back in an interactive IPython session:
|
||||
|
||||
>>> ds = pd.DataFrame({'longitude': np.linspace(0, 10),
|
||||
... 'latitude': np.linspace(0, 20)})
|
||||
>>> ds.geo.center
|
||||
(5.0, 10.0)
|
||||
>>> ds.geo.plot()
|
||||
# plots data on a map
|
||||
"""
|
||||
|
||||
|
||||
@Appender(
|
||||
_doc
|
||||
% dict(
|
||||
klass="DataFrame",
|
||||
others=("register_series_accessor, " "register_index_accessor"),
|
||||
)
|
||||
)
|
||||
def register_dataframe_accessor(name):
|
||||
from pandas import DataFrame
|
||||
|
||||
return _register_accessor(name, DataFrame)
|
||||
|
||||
|
||||
@Appender(
|
||||
_doc
|
||||
% dict(
|
||||
klass="Series",
|
||||
others=("register_dataframe_accessor, " "register_index_accessor"),
|
||||
)
|
||||
)
|
||||
def register_series_accessor(name):
|
||||
from pandas import Series
|
||||
|
||||
return _register_accessor(name, Series)
|
||||
|
||||
|
||||
@Appender(
|
||||
_doc
|
||||
% dict(
|
||||
klass="Index",
|
||||
others=("register_dataframe_accessor, " "register_series_accessor"),
|
||||
)
|
||||
)
|
||||
def register_index_accessor(name):
|
||||
from pandas import Index
|
||||
|
||||
return _register_accessor(name, Index)
|
||||
1988
venv/lib/python3.6/site-packages/pandas/core/algorithms.py
Normal file
1988
venv/lib/python3.6/site-packages/pandas/core/algorithms.py
Normal file
File diff suppressed because it is too large
Load Diff
55
venv/lib/python3.6/site-packages/pandas/core/api.py
Normal file
55
venv/lib/python3.6/site-packages/pandas/core/api.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# flake8: noqa
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
from pandas.core.algorithms import factorize, unique, value_counts
|
||||
from pandas.core.dtypes.missing import isna, isnull, notna, notnull
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
CategoricalDtype,
|
||||
PeriodDtype,
|
||||
IntervalDtype,
|
||||
DatetimeTZDtype,
|
||||
)
|
||||
from pandas.core.arrays import Categorical, array
|
||||
from pandas.core.groupby import Grouper, NamedAgg
|
||||
from pandas.io.formats.format import set_eng_float_format
|
||||
from pandas.core.index import (
|
||||
Index,
|
||||
CategoricalIndex,
|
||||
Int64Index,
|
||||
UInt64Index,
|
||||
RangeIndex,
|
||||
Float64Index,
|
||||
MultiIndex,
|
||||
IntervalIndex,
|
||||
TimedeltaIndex,
|
||||
DatetimeIndex,
|
||||
PeriodIndex,
|
||||
NaT,
|
||||
)
|
||||
from pandas.core.indexes.period import Period, period_range
|
||||
from pandas.core.indexes.timedeltas import Timedelta, timedelta_range
|
||||
from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range
|
||||
from pandas.core.indexes.interval import Interval, interval_range
|
||||
|
||||
from pandas.core.series import Series
|
||||
from pandas.core.frame import DataFrame
|
||||
|
||||
# TODO: Remove import when statsmodels updates #18264
|
||||
from pandas.core.reshape.reshape import get_dummies
|
||||
|
||||
from pandas.core.indexing import IndexSlice
|
||||
from pandas.core.tools.numeric import to_numeric
|
||||
from pandas.tseries.offsets import DateOffset
|
||||
from pandas.core.tools.datetimes import to_datetime
|
||||
from pandas.core.tools.timedeltas import to_timedelta
|
||||
446
venv/lib/python3.6/site-packages/pandas/core/apply.py
Normal file
446
venv/lib/python3.6/site-packages/pandas/core/apply.py
Normal file
@@ -0,0 +1,446 @@
|
||||
import inspect
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import reduction
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_dict_like,
|
||||
is_extension_type,
|
||||
is_list_like,
|
||||
is_sequence,
|
||||
)
|
||||
from pandas.core.dtypes.generic import ABCSeries
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
|
||||
def frame_apply(
|
||||
obj,
|
||||
func,
|
||||
axis=0,
|
||||
broadcast=None,
|
||||
raw=False,
|
||||
reduce=None,
|
||||
result_type=None,
|
||||
ignore_failures=False,
|
||||
args=None,
|
||||
kwds=None,
|
||||
):
|
||||
""" construct and return a row or column based frame apply object """
|
||||
|
||||
axis = obj._get_axis_number(axis)
|
||||
if axis == 0:
|
||||
klass = FrameRowApply
|
||||
elif axis == 1:
|
||||
klass = FrameColumnApply
|
||||
|
||||
return klass(
|
||||
obj,
|
||||
func,
|
||||
broadcast=broadcast,
|
||||
raw=raw,
|
||||
reduce=reduce,
|
||||
result_type=result_type,
|
||||
ignore_failures=ignore_failures,
|
||||
args=args,
|
||||
kwds=kwds,
|
||||
)
|
||||
|
||||
|
||||
class FrameApply:
|
||||
def __init__(
|
||||
self,
|
||||
obj,
|
||||
func,
|
||||
broadcast,
|
||||
raw,
|
||||
reduce,
|
||||
result_type,
|
||||
ignore_failures,
|
||||
args,
|
||||
kwds,
|
||||
):
|
||||
self.obj = obj
|
||||
self.raw = raw
|
||||
self.ignore_failures = ignore_failures
|
||||
self.args = args or ()
|
||||
self.kwds = kwds or {}
|
||||
|
||||
if result_type not in [None, "reduce", "broadcast", "expand"]:
|
||||
raise ValueError(
|
||||
"invalid value for result_type, must be one "
|
||||
"of {None, 'reduce', 'broadcast', 'expand'}"
|
||||
)
|
||||
|
||||
if broadcast is not None:
|
||||
warnings.warn(
|
||||
"The broadcast argument is deprecated and will "
|
||||
"be removed in a future version. You can specify "
|
||||
"result_type='broadcast' to broadcast the result "
|
||||
"to the original dimensions",
|
||||
FutureWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
if broadcast:
|
||||
result_type = "broadcast"
|
||||
|
||||
if reduce is not None:
|
||||
warnings.warn(
|
||||
"The reduce argument is deprecated and will "
|
||||
"be removed in a future version. You can specify "
|
||||
"result_type='reduce' to try to reduce the result "
|
||||
"to the original dimensions",
|
||||
FutureWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
if reduce:
|
||||
|
||||
if result_type is not None:
|
||||
raise ValueError("cannot pass both reduce=True and result_type")
|
||||
|
||||
result_type = "reduce"
|
||||
|
||||
self.result_type = result_type
|
||||
|
||||
# curry if needed
|
||||
if (kwds or args) and not isinstance(func, (np.ufunc, str)):
|
||||
|
||||
def f(x):
|
||||
return func(x, *args, **kwds)
|
||||
|
||||
else:
|
||||
f = func
|
||||
|
||||
self.f = f
|
||||
|
||||
# results
|
||||
self.result = None
|
||||
self.res_index = None
|
||||
self.res_columns = None
|
||||
|
||||
@property
|
||||
def columns(self):
|
||||
return self.obj.columns
|
||||
|
||||
@property
|
||||
def index(self):
|
||||
return self.obj.index
|
||||
|
||||
@cache_readonly
|
||||
def values(self):
|
||||
return self.obj.values
|
||||
|
||||
@cache_readonly
|
||||
def dtypes(self):
|
||||
return self.obj.dtypes
|
||||
|
||||
@property
|
||||
def agg_axis(self):
|
||||
return self.obj._get_agg_axis(self.axis)
|
||||
|
||||
def get_result(self):
|
||||
""" compute the results """
|
||||
|
||||
# dispatch to agg
|
||||
if is_list_like(self.f) or is_dict_like(self.f):
|
||||
return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds)
|
||||
|
||||
# all empty
|
||||
if len(self.columns) == 0 and len(self.index) == 0:
|
||||
return self.apply_empty_result()
|
||||
|
||||
# string dispatch
|
||||
if isinstance(self.f, str):
|
||||
# Support for `frame.transform('method')`
|
||||
# Some methods (shift, etc.) require the axis argument, others
|
||||
# don't, so inspect and insert if necessary.
|
||||
func = getattr(self.obj, self.f)
|
||||
sig = inspect.getfullargspec(func)
|
||||
if "axis" in sig.args:
|
||||
self.kwds["axis"] = self.axis
|
||||
return func(*self.args, **self.kwds)
|
||||
|
||||
# ufunc
|
||||
elif isinstance(self.f, np.ufunc):
|
||||
with np.errstate(all="ignore"):
|
||||
results = self.obj._data.apply("apply", func=self.f)
|
||||
return self.obj._constructor(
|
||||
data=results, index=self.index, columns=self.columns, copy=False
|
||||
)
|
||||
|
||||
# broadcasting
|
||||
if self.result_type == "broadcast":
|
||||
return self.apply_broadcast()
|
||||
|
||||
# one axis empty
|
||||
elif not all(self.obj.shape):
|
||||
return self.apply_empty_result()
|
||||
|
||||
# raw
|
||||
elif self.raw and not self.obj._is_mixed_type:
|
||||
return self.apply_raw()
|
||||
|
||||
return self.apply_standard()
|
||||
|
||||
def apply_empty_result(self):
|
||||
"""
|
||||
we have an empty result; at least 1 axis is 0
|
||||
|
||||
we will try to apply the function to an empty
|
||||
series in order to see if this is a reduction function
|
||||
"""
|
||||
|
||||
# we are not asked to reduce or infer reduction
|
||||
# so just return a copy of the existing object
|
||||
if self.result_type not in ["reduce", None]:
|
||||
return self.obj.copy()
|
||||
|
||||
# we may need to infer
|
||||
reduce = self.result_type == "reduce"
|
||||
|
||||
from pandas import Series
|
||||
|
||||
if not reduce:
|
||||
|
||||
EMPTY_SERIES = Series([])
|
||||
try:
|
||||
r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
|
||||
reduce = not isinstance(r, Series)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if reduce:
|
||||
return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
|
||||
else:
|
||||
return self.obj.copy()
|
||||
|
||||
def apply_raw(self):
|
||||
""" apply to the values as a numpy array """
|
||||
|
||||
try:
|
||||
result = reduction.reduce(self.values, self.f, axis=self.axis)
|
||||
except Exception:
|
||||
result = np.apply_along_axis(self.f, self.axis, self.values)
|
||||
|
||||
# TODO: mixed type case
|
||||
if result.ndim == 2:
|
||||
return self.obj._constructor(result, index=self.index, columns=self.columns)
|
||||
else:
|
||||
return self.obj._constructor_sliced(result, index=self.agg_axis)
|
||||
|
||||
def apply_broadcast(self, target):
|
||||
result_values = np.empty_like(target.values)
|
||||
|
||||
# axis which we want to compare compliance
|
||||
result_compare = target.shape[0]
|
||||
|
||||
for i, col in enumerate(target.columns):
|
||||
res = self.f(target[col])
|
||||
ares = np.asarray(res).ndim
|
||||
|
||||
# must be a scalar or 1d
|
||||
if ares > 1:
|
||||
raise ValueError("too many dims to broadcast")
|
||||
elif ares == 1:
|
||||
|
||||
# must match return dim
|
||||
if result_compare != len(res):
|
||||
raise ValueError("cannot broadcast result")
|
||||
|
||||
result_values[:, i] = res
|
||||
|
||||
# we *always* preserve the original index / columns
|
||||
result = self.obj._constructor(
|
||||
result_values, index=target.index, columns=target.columns
|
||||
)
|
||||
return result
|
||||
|
||||
def apply_standard(self):
|
||||
|
||||
# try to reduce first (by default)
|
||||
# this only matters if the reduction in values is of different dtype
|
||||
# e.g. if we want to apply to a SparseFrame, then can't directly reduce
|
||||
|
||||
# we cannot reduce using non-numpy dtypes,
|
||||
# as demonstrated in gh-12244
|
||||
if (
|
||||
self.result_type in ["reduce", None]
|
||||
and not self.dtypes.apply(is_extension_type).any()
|
||||
):
|
||||
|
||||
# Create a dummy Series from an empty array
|
||||
from pandas import Series
|
||||
|
||||
values = self.values
|
||||
index = self.obj._get_axis(self.axis)
|
||||
labels = self.agg_axis
|
||||
empty_arr = np.empty(len(index), dtype=values.dtype)
|
||||
dummy = Series(empty_arr, index=index, dtype=values.dtype)
|
||||
|
||||
try:
|
||||
result = reduction.reduce(
|
||||
values, self.f, axis=self.axis, dummy=dummy, labels=labels
|
||||
)
|
||||
return self.obj._constructor_sliced(result, index=labels)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# compute the result using the series generator
|
||||
self.apply_series_generator()
|
||||
|
||||
# wrap results
|
||||
return self.wrap_results()
|
||||
|
||||
def apply_series_generator(self):
|
||||
series_gen = self.series_generator
|
||||
res_index = self.result_index
|
||||
|
||||
i = None
|
||||
keys = []
|
||||
results = {}
|
||||
if self.ignore_failures:
|
||||
successes = []
|
||||
for i, v in enumerate(series_gen):
|
||||
try:
|
||||
results[i] = self.f(v)
|
||||
keys.append(v.name)
|
||||
successes.append(i)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# so will work with MultiIndex
|
||||
if len(successes) < len(res_index):
|
||||
res_index = res_index.take(successes)
|
||||
|
||||
else:
|
||||
try:
|
||||
for i, v in enumerate(series_gen):
|
||||
results[i] = self.f(v)
|
||||
keys.append(v.name)
|
||||
except Exception as e:
|
||||
if hasattr(e, "args"):
|
||||
|
||||
# make sure i is defined
|
||||
if i is not None:
|
||||
k = res_index[i]
|
||||
e.args = e.args + ("occurred at index %s" % pprint_thing(k),)
|
||||
raise
|
||||
|
||||
self.results = results
|
||||
self.res_index = res_index
|
||||
self.res_columns = self.result_columns
|
||||
|
||||
def wrap_results(self):
|
||||
results = self.results
|
||||
|
||||
# see if we can infer the results
|
||||
if len(results) > 0 and is_sequence(results[0]):
|
||||
|
||||
return self.wrap_results_for_axis()
|
||||
|
||||
# dict of scalars
|
||||
result = self.obj._constructor_sliced(results)
|
||||
result.index = self.res_index
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class FrameRowApply(FrameApply):
|
||||
axis = 0
|
||||
|
||||
def apply_broadcast(self):
|
||||
return super().apply_broadcast(self.obj)
|
||||
|
||||
@property
|
||||
def series_generator(self):
|
||||
return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))
|
||||
|
||||
@property
|
||||
def result_index(self):
|
||||
return self.columns
|
||||
|
||||
@property
|
||||
def result_columns(self):
|
||||
return self.index
|
||||
|
||||
def wrap_results_for_axis(self):
|
||||
""" return the results for the rows """
|
||||
|
||||
results = self.results
|
||||
result = self.obj._constructor(data=results)
|
||||
|
||||
if not isinstance(results[0], ABCSeries):
|
||||
try:
|
||||
result.index = self.res_columns
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
result.columns = self.res_index
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class FrameColumnApply(FrameApply):
|
||||
axis = 1
|
||||
|
||||
def apply_broadcast(self):
|
||||
result = super().apply_broadcast(self.obj.T)
|
||||
return result.T
|
||||
|
||||
@property
|
||||
def series_generator(self):
|
||||
constructor = self.obj._constructor_sliced
|
||||
return (
|
||||
constructor(arr, index=self.columns, name=name)
|
||||
for i, (arr, name) in enumerate(zip(self.values, self.index))
|
||||
)
|
||||
|
||||
@property
|
||||
def result_index(self):
|
||||
return self.index
|
||||
|
||||
@property
|
||||
def result_columns(self):
|
||||
return self.columns
|
||||
|
||||
def wrap_results_for_axis(self):
|
||||
""" return the results for the columns """
|
||||
results = self.results
|
||||
|
||||
# we have requested to expand
|
||||
if self.result_type == "expand":
|
||||
result = self.infer_to_same_shape()
|
||||
|
||||
# we have a non-series and don't want inference
|
||||
elif not isinstance(results[0], ABCSeries):
|
||||
from pandas import Series
|
||||
|
||||
result = Series(results)
|
||||
result.index = self.res_index
|
||||
|
||||
# we may want to infer results
|
||||
else:
|
||||
result = self.infer_to_same_shape()
|
||||
|
||||
return result
|
||||
|
||||
def infer_to_same_shape(self):
|
||||
""" infer the results to the same shape as the input object """
|
||||
results = self.results
|
||||
|
||||
result = self.obj._constructor(data=results)
|
||||
result = result.T
|
||||
|
||||
# set the index
|
||||
result.index = self.res_index
|
||||
|
||||
# infer dtypes
|
||||
result = result.infer_objects()
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,14 @@
|
||||
from .array_ import array # noqa: F401
|
||||
from .base import ( # noqa: F401
|
||||
ExtensionArray,
|
||||
ExtensionOpsMixin,
|
||||
ExtensionScalarOpsMixin,
|
||||
)
|
||||
from .categorical import Categorical # noqa: F401
|
||||
from .datetimes import DatetimeArray # noqa: F401
|
||||
from .integer import IntegerArray, integer_array # noqa: F401
|
||||
from .interval import IntervalArray # noqa: F401
|
||||
from .numpy_ import PandasArray, PandasDtype # noqa: F401
|
||||
from .period import PeriodArray, period_array # noqa: F401
|
||||
from .sparse import SparseArray # noqa: F401
|
||||
from .timedeltas import TimedeltaArray # noqa: F401
|
||||
196
venv/lib/python3.6/site-packages/pandas/core/arrays/_ranges.py
Normal file
196
venv/lib/python3.6/site-packages/pandas/core/arrays/_ranges.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
Helper functions to generate range-like data for DatetimeArray
|
||||
(and possibly TimedeltaArray/PeriodArray)
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
|
||||
|
||||
from pandas.tseries.offsets import DateOffset, Tick, generate_range
|
||||
|
||||
|
||||
def generate_regular_range(
|
||||
start: Timestamp, end: Timestamp, periods: int, freq: DateOffset
|
||||
) -> Tuple[np.ndarray, str]:
|
||||
"""
|
||||
Generate a range of dates with the spans between dates described by
|
||||
the given `freq` DateOffset.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
start : Timestamp or None
|
||||
first point of produced date range
|
||||
end : Timestamp or None
|
||||
last point of produced date range
|
||||
periods : int
|
||||
number of periods in produced date range
|
||||
freq : DateOffset
|
||||
describes space between dates in produced date range
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray[np.int64] representing nanosecond unix timestamps
|
||||
"""
|
||||
if isinstance(freq, Tick):
|
||||
stride = freq.nanos
|
||||
if periods is None:
|
||||
b = Timestamp(start).value
|
||||
# cannot just use e = Timestamp(end) + 1 because arange breaks when
|
||||
# stride is too large, see GH10887
|
||||
e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1
|
||||
# end.tz == start.tz by this point due to _generate implementation
|
||||
tz = start.tz
|
||||
elif start is not None:
|
||||
b = Timestamp(start).value
|
||||
e = _generate_range_overflow_safe(b, periods, stride, side="start")
|
||||
tz = start.tz
|
||||
elif end is not None:
|
||||
e = Timestamp(end).value + stride
|
||||
b = _generate_range_overflow_safe(e, periods, stride, side="end")
|
||||
tz = end.tz
|
||||
else:
|
||||
raise ValueError(
|
||||
"at least 'start' or 'end' should be specified "
|
||||
"if a 'period' is given."
|
||||
)
|
||||
|
||||
with np.errstate(over="raise"):
|
||||
# If the range is sufficiently large, np.arange may overflow
|
||||
# and incorrectly return an empty array if not caught.
|
||||
try:
|
||||
values = np.arange(b, e, stride, dtype=np.int64)
|
||||
except FloatingPointError:
|
||||
xdr = [b]
|
||||
while xdr[-1] != e:
|
||||
xdr.append(xdr[-1] + stride)
|
||||
values = np.array(xdr[:-1], dtype=np.int64)
|
||||
|
||||
else:
|
||||
tz = None
|
||||
# start and end should have the same timezone by this point
|
||||
if start is not None:
|
||||
tz = start.tz
|
||||
elif end is not None:
|
||||
tz = end.tz
|
||||
|
||||
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
|
||||
|
||||
values = np.array([x.value for x in xdr], dtype=np.int64)
|
||||
|
||||
return values, tz
|
||||
|
||||
|
||||
def _generate_range_overflow_safe(
|
||||
endpoint: int, periods: int, stride: int, side: str = "start"
|
||||
) -> int:
|
||||
"""
|
||||
Calculate the second endpoint for passing to np.arange, checking
|
||||
to avoid an integer overflow. Catch OverflowError and re-raise
|
||||
as OutOfBoundsDatetime.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endpoint : int
|
||||
nanosecond timestamp of the known endpoint of the desired range
|
||||
periods : int
|
||||
number of periods in the desired range
|
||||
stride : int
|
||||
nanoseconds between periods in the desired range
|
||||
side : {'start', 'end'}
|
||||
which end of the range `endpoint` refers to
|
||||
|
||||
Returns
|
||||
-------
|
||||
other_end : int
|
||||
|
||||
Raises
|
||||
------
|
||||
OutOfBoundsDatetime
|
||||
"""
|
||||
# GH#14187 raise instead of incorrectly wrapping around
|
||||
assert side in ["start", "end"]
|
||||
|
||||
i64max = np.uint64(np.iinfo(np.int64).max)
|
||||
msg = (
|
||||
"Cannot generate range with {side}={endpoint} and "
|
||||
"periods={periods}".format(side=side, endpoint=endpoint, periods=periods)
|
||||
)
|
||||
|
||||
with np.errstate(over="raise"):
|
||||
# if periods * strides cannot be multiplied within the *uint64* bounds,
|
||||
# we cannot salvage the operation by recursing, so raise
|
||||
try:
|
||||
addend = np.uint64(periods) * np.uint64(np.abs(stride))
|
||||
except FloatingPointError:
|
||||
raise OutOfBoundsDatetime(msg)
|
||||
|
||||
if np.abs(addend) <= i64max:
|
||||
# relatively easy case without casting concerns
|
||||
return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
|
||||
|
||||
elif (endpoint > 0 and side == "start" and stride > 0) or (
|
||||
endpoint < 0 and side == "end" and stride > 0
|
||||
):
|
||||
# no chance of not-overflowing
|
||||
raise OutOfBoundsDatetime(msg)
|
||||
|
||||
elif side == "end" and endpoint > i64max and endpoint - stride <= i64max:
|
||||
# in _generate_regular_range we added `stride` thereby overflowing
|
||||
# the bounds. Adjust to fix this.
|
||||
return _generate_range_overflow_safe(
|
||||
endpoint - stride, periods - 1, stride, side
|
||||
)
|
||||
|
||||
# split into smaller pieces
|
||||
mid_periods = periods // 2
|
||||
remaining = periods - mid_periods
|
||||
assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
|
||||
|
||||
midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side)
|
||||
return _generate_range_overflow_safe(midpoint, remaining, stride, side)
|
||||
|
||||
|
||||
def _generate_range_overflow_safe_signed(
|
||||
endpoint: int, periods: int, stride: int, side: str
|
||||
) -> int:
|
||||
"""
|
||||
A special case for _generate_range_overflow_safe where `periods * stride`
|
||||
can be calculated without overflowing int64 bounds.
|
||||
"""
|
||||
assert side in ["start", "end"]
|
||||
if side == "end":
|
||||
stride *= -1
|
||||
|
||||
with np.errstate(over="raise"):
|
||||
addend = np.int64(periods) * np.int64(stride)
|
||||
try:
|
||||
# easy case with no overflows
|
||||
return np.int64(endpoint) + addend
|
||||
except (FloatingPointError, OverflowError):
|
||||
# with endpoint negative and addend positive we risk
|
||||
# FloatingPointError; with reversed signed we risk OverflowError
|
||||
pass
|
||||
|
||||
# if stride and endpoint had opposite signs, then endpoint + addend
|
||||
# should never overflow. so they must have the same signs
|
||||
assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0)
|
||||
|
||||
if stride > 0:
|
||||
# watch out for very special case in which we just slightly
|
||||
# exceed implementation bounds, but when passing the result to
|
||||
# np.arange will get a result slightly within the bounds
|
||||
assert endpoint >= 0
|
||||
result = np.uint64(endpoint) + np.uint64(addend)
|
||||
i64max = np.uint64(np.iinfo(np.int64).max)
|
||||
assert result > i64max
|
||||
if result <= i64max + np.uint64(stride):
|
||||
return result
|
||||
|
||||
raise OutOfBoundsDatetime(
|
||||
"Cannot generate range with "
|
||||
"{side}={endpoint} and "
|
||||
"periods={periods}".format(side=side, endpoint=endpoint, periods=periods)
|
||||
)
|
||||
281
venv/lib/python3.6/site-packages/pandas/core/arrays/array_.py
Normal file
281
venv/lib/python3.6/site-packages/pandas/core/arrays/array_.py
Normal file
@@ -0,0 +1,281 @@
|
||||
from typing import Optional, Sequence, Union, cast
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib, tslibs
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_datetime64_ns_dtype,
|
||||
is_extension_array_dtype,
|
||||
is_timedelta64_ns_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype, registry
|
||||
from pandas.core.dtypes.generic import ABCExtensionArray
|
||||
|
||||
|
||||
def array(
|
||||
data: Sequence[object],
|
||||
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
|
||||
copy: bool = True,
|
||||
) -> ABCExtensionArray:
|
||||
"""
|
||||
Create an array.
|
||||
|
||||
.. versionadded:: 0.24.0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : Sequence of objects
|
||||
The scalars inside `data` should be instances of the
|
||||
scalar type for `dtype`. It's expected that `data`
|
||||
represents a 1-dimensional array of data.
|
||||
|
||||
When `data` is an Index or Series, the underlying array
|
||||
will be extracted from `data`.
|
||||
|
||||
dtype : str, np.dtype, or ExtensionDtype, optional
|
||||
The dtype to use for the array. This may be a NumPy
|
||||
dtype or an extension type registered with pandas using
|
||||
:meth:`pandas.api.extensions.register_extension_dtype`.
|
||||
|
||||
If not specified, there are two possibilities:
|
||||
|
||||
1. When `data` is a :class:`Series`, :class:`Index`, or
|
||||
:class:`ExtensionArray`, the `dtype` will be taken
|
||||
from the data.
|
||||
2. Otherwise, pandas will attempt to infer the `dtype`
|
||||
from the data.
|
||||
|
||||
Note that when `data` is a NumPy array, ``data.dtype`` is
|
||||
*not* used for inferring the array type. This is because
|
||||
NumPy cannot represent all the types of data that can be
|
||||
held in extension arrays.
|
||||
|
||||
Currently, pandas will infer an extension dtype for sequences of
|
||||
|
||||
============================== =====================================
|
||||
Scalar Type Array Type
|
||||
============================== =====================================
|
||||
:class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`
|
||||
:class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
|
||||
:class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
|
||||
:class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
|
||||
============================== =====================================
|
||||
|
||||
For all other cases, NumPy's usual inference rules will be used.
|
||||
|
||||
copy : bool, default True
|
||||
Whether to copy the data, even if not necessary. Depending
|
||||
on the type of `data`, creating the new array may require
|
||||
copying data, even if ``copy=False``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ExtensionArray
|
||||
The newly created array.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When `data` is not 1-dimensional.
|
||||
|
||||
See Also
|
||||
--------
|
||||
numpy.array : Construct a NumPy array.
|
||||
Series : Construct a pandas Series.
|
||||
Index : Construct a pandas Index.
|
||||
arrays.PandasArray : ExtensionArray wrapping a NumPy array.
|
||||
Series.array : Extract the array stored within a Series.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Omitting the `dtype` argument means pandas will attempt to infer the
|
||||
best array type from the values in the data. As new array types are
|
||||
added by pandas and 3rd party libraries, the "best" array type may
|
||||
change. We recommend specifying `dtype` to ensure that
|
||||
|
||||
1. the correct array type for the data is returned
|
||||
2. the returned array type doesn't change as new extension types
|
||||
are added by pandas and third-party libraries
|
||||
|
||||
Additionally, if the underlying memory representation of the returned
|
||||
array matters, we recommend specifying the `dtype` as a concrete object
|
||||
rather than a string alias or allowing it to be inferred. For example,
|
||||
a future version of pandas or a 3rd-party library may include a
|
||||
dedicated ExtensionArray for string data. In this event, the following
|
||||
would no longer return a :class:`arrays.PandasArray` backed by a NumPy
|
||||
array.
|
||||
|
||||
>>> pd.array(['a', 'b'], dtype=str)
|
||||
<PandasArray>
|
||||
['a', 'b']
|
||||
Length: 2, dtype: str32
|
||||
|
||||
This would instead return the new ExtensionArray dedicated for string
|
||||
data. If you really need the new array to be backed by a NumPy array,
|
||||
specify that in the dtype.
|
||||
|
||||
>>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))
|
||||
<PandasArray>
|
||||
['a', 'b']
|
||||
Length: 2, dtype: str32
|
||||
|
||||
Or use the dedicated constructor for the array you're expecting, and
|
||||
wrap that in a PandasArray
|
||||
|
||||
>>> pd.array(np.array(['a', 'b'], dtype='<U1'))
|
||||
<PandasArray>
|
||||
['a', 'b']
|
||||
Length: 2, dtype: str32
|
||||
|
||||
Finally, Pandas has arrays that mostly overlap with NumPy
|
||||
|
||||
* :class:`arrays.DatetimeArray`
|
||||
* :class:`arrays.TimedeltaArray`
|
||||
|
||||
When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is
|
||||
passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``
|
||||
rather than a ``PandasArray``. This is for symmetry with the case of
|
||||
timezone-aware data, which NumPy does not natively support.
|
||||
|
||||
>>> pd.array(['2015', '2016'], dtype='datetime64[ns]')
|
||||
<DatetimeArray>
|
||||
['2015-01-01 00:00:00', '2016-01-01 00:00:00']
|
||||
Length: 2, dtype: datetime64[ns]
|
||||
|
||||
>>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')
|
||||
<TimedeltaArray>
|
||||
['01:00:00', '02:00:00']
|
||||
Length: 2, dtype: timedelta64[ns]
|
||||
|
||||
Examples
|
||||
--------
|
||||
If a dtype is not specified, `data` is passed through to
|
||||
:meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned.
|
||||
|
||||
>>> pd.array([1, 2])
|
||||
<PandasArray>
|
||||
[1, 2]
|
||||
Length: 2, dtype: int64
|
||||
|
||||
Or the NumPy dtype can be specified
|
||||
|
||||
>>> pd.array([1, 2], dtype=np.dtype("int32"))
|
||||
<PandasArray>
|
||||
[1, 2]
|
||||
Length: 2, dtype: int32
|
||||
|
||||
You can use the string alias for `dtype`
|
||||
|
||||
>>> pd.array(['a', 'b', 'a'], dtype='category')
|
||||
[a, b, a]
|
||||
Categories (2, object): [a, b]
|
||||
|
||||
Or specify the actual dtype
|
||||
|
||||
>>> pd.array(['a', 'b', 'a'],
|
||||
... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
|
||||
[a, b, a]
|
||||
Categories (3, object): [a < b < c]
|
||||
|
||||
Because omitting the `dtype` passes the data through to NumPy,
|
||||
a mixture of valid integers and NA will return a floating-point
|
||||
NumPy array.
|
||||
|
||||
>>> pd.array([1, 2, np.nan])
|
||||
<PandasArray>
|
||||
[1.0, 2.0, nan]
|
||||
Length: 3, dtype: float64
|
||||
|
||||
To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify
|
||||
the dtype:
|
||||
|
||||
>>> pd.array([1, 2, np.nan], dtype='Int64')
|
||||
<IntegerArray>
|
||||
[1, 2, NaN]
|
||||
Length: 3, dtype: Int64
|
||||
|
||||
Pandas will infer an ExtensionArray for some types of data:
|
||||
|
||||
>>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
|
||||
<PeriodArray>
|
||||
['2000-01-01', '2000-01-01']
|
||||
Length: 2, dtype: period[D]
|
||||
|
||||
`data` must be 1-dimensional. A ValueError is raised when the input
|
||||
has the wrong dimensionality.
|
||||
|
||||
>>> pd.array(1)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Cannot pass scalar '1' to 'pandas.array'.
|
||||
"""
|
||||
from pandas.core.arrays import (
|
||||
period_array,
|
||||
ExtensionArray,
|
||||
IntervalArray,
|
||||
PandasArray,
|
||||
DatetimeArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
from pandas.core.internals.arrays import extract_array
|
||||
|
||||
if lib.is_scalar(data):
|
||||
msg = "Cannot pass scalar '{}' to 'pandas.array'."
|
||||
raise ValueError(msg.format(data))
|
||||
|
||||
data = extract_array(data, extract_numpy=True)
|
||||
|
||||
if dtype is None and isinstance(data, ExtensionArray):
|
||||
dtype = data.dtype
|
||||
|
||||
# this returns None for not-found dtypes.
|
||||
if isinstance(dtype, str):
|
||||
dtype = registry.find(dtype) or dtype
|
||||
|
||||
if is_extension_array_dtype(dtype):
|
||||
cls = cast(ExtensionDtype, dtype).construct_array_type()
|
||||
return cls._from_sequence(data, dtype=dtype, copy=copy)
|
||||
|
||||
if dtype is None:
|
||||
inferred_dtype = lib.infer_dtype(data, skipna=False)
|
||||
if inferred_dtype == "period":
|
||||
try:
|
||||
return period_array(data, copy=copy)
|
||||
except tslibs.IncompatibleFrequency:
|
||||
# We may have a mixture of frequencies.
|
||||
# We choose to return an ndarray, rather than raising.
|
||||
pass
|
||||
elif inferred_dtype == "interval":
|
||||
try:
|
||||
return IntervalArray(data, copy=copy)
|
||||
except ValueError:
|
||||
# We may have a mixture of `closed` here.
|
||||
# We choose to return an ndarray, rather than raising.
|
||||
pass
|
||||
|
||||
elif inferred_dtype.startswith("datetime"):
|
||||
# datetime, datetime64
|
||||
try:
|
||||
return DatetimeArray._from_sequence(data, copy=copy)
|
||||
except ValueError:
|
||||
# Mixture of timezones, fall back to PandasArray
|
||||
pass
|
||||
|
||||
elif inferred_dtype.startswith("timedelta"):
|
||||
# timedelta, timedelta64
|
||||
return TimedeltaArray._from_sequence(data, copy=copy)
|
||||
|
||||
# TODO(BooleanArray): handle this type
|
||||
|
||||
# Pandas overrides NumPy for
|
||||
# 1. datetime64[ns]
|
||||
# 2. timedelta64[ns]
|
||||
# so that a DatetimeArray is returned.
|
||||
if is_datetime64_ns_dtype(dtype):
|
||||
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
|
||||
elif is_timedelta64_ns_dtype(dtype):
|
||||
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
|
||||
|
||||
result = PandasArray._from_sequence(data, dtype=dtype, copy=copy)
|
||||
return result
|
||||
1179
venv/lib/python3.6/site-packages/pandas/core/arrays/base.py
Normal file
1179
venv/lib/python3.6/site-packages/pandas/core/arrays/base.py
Normal file
File diff suppressed because it is too large
Load Diff
2816
venv/lib/python3.6/site-packages/pandas/core/arrays/categorical.py
Normal file
2816
venv/lib/python3.6/site-packages/pandas/core/arrays/categorical.py
Normal file
File diff suppressed because it is too large
Load Diff
1684
venv/lib/python3.6/site-packages/pandas/core/arrays/datetimelike.py
Normal file
1684
venv/lib/python3.6/site-packages/pandas/core/arrays/datetimelike.py
Normal file
File diff suppressed because it is too large
Load Diff
2285
venv/lib/python3.6/site-packages/pandas/core/arrays/datetimes.py
Normal file
2285
venv/lib/python3.6/site-packages/pandas/core/arrays/datetimes.py
Normal file
File diff suppressed because it is too large
Load Diff
867
venv/lib/python3.6/site-packages/pandas/core/arrays/integer.py
Normal file
867
venv/lib/python3.6/site-packages/pandas/core/arrays/integer.py
Normal file
@@ -0,0 +1,867 @@
|
||||
import numbers
|
||||
from typing import Type
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib
|
||||
from pandas.compat import set_function_name
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
from pandas.core.dtypes.cast import astype_nansafe
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_float,
|
||||
is_float_dtype,
|
||||
is_integer,
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
is_object_dtype,
|
||||
is_scalar,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import register_extension_dtype
|
||||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
|
||||
from pandas.core.dtypes.missing import isna, notna
|
||||
|
||||
from pandas.core import nanops, ops
|
||||
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
|
||||
from pandas.core.tools.numeric import to_numeric
|
||||
|
||||
|
||||
class _IntegerDtype(ExtensionDtype):
|
||||
"""
|
||||
An ExtensionDtype to hold a single size & kind of integer dtype.
|
||||
|
||||
These specific implementations are subclasses of the non-public
|
||||
_IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
|
||||
|
||||
The attributes name & type are set when these subclasses are created.
|
||||
"""
|
||||
|
||||
name = None # type: str
|
||||
base = None
|
||||
type = None # type: Type
|
||||
na_value = np.nan
|
||||
|
||||
def __repr__(self):
|
||||
sign = "U" if self.is_unsigned_integer else ""
|
||||
return "{sign}Int{size}Dtype()".format(sign=sign, size=8 * self.itemsize)
|
||||
|
||||
@cache_readonly
|
||||
def is_signed_integer(self):
|
||||
return self.kind == "i"
|
||||
|
||||
@cache_readonly
|
||||
def is_unsigned_integer(self):
|
||||
return self.kind == "u"
|
||||
|
||||
@property
|
||||
def _is_numeric(self):
|
||||
return True
|
||||
|
||||
@cache_readonly
|
||||
def numpy_dtype(self):
|
||||
""" Return an instance of our numpy dtype """
|
||||
return np.dtype(self.type)
|
||||
|
||||
@cache_readonly
|
||||
def kind(self):
|
||||
return self.numpy_dtype.kind
|
||||
|
||||
@cache_readonly
|
||||
def itemsize(self):
|
||||
""" Return the number of bytes in this dtype """
|
||||
return self.numpy_dtype.itemsize
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return IntegerArray
|
||||
|
||||
|
||||
def integer_array(values, dtype=None, copy=False):
|
||||
"""
|
||||
Infer and return an integer array of the values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : 1D list-like
|
||||
dtype : dtype, optional
|
||||
dtype to coerce
|
||||
copy : boolean, default False
|
||||
|
||||
Returns
|
||||
-------
|
||||
IntegerArray
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError if incompatible types
|
||||
"""
|
||||
values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
|
||||
return IntegerArray(values, mask)
|
||||
|
||||
|
||||
def safe_cast(values, dtype, copy):
|
||||
"""
|
||||
Safely cast the values to the dtype if they
|
||||
are equivalent, meaning floats must be equivalent to the
|
||||
ints.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
return values.astype(dtype, casting="safe", copy=copy)
|
||||
except TypeError:
|
||||
|
||||
casted = values.astype(dtype, copy=copy)
|
||||
if (casted == values).all():
|
||||
return casted
|
||||
|
||||
raise TypeError(
|
||||
"cannot safely cast non-equivalent {} to {}".format(
|
||||
values.dtype, np.dtype(dtype)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def coerce_to_array(values, dtype, mask=None, copy=False):
|
||||
"""
|
||||
Coerce the input values array to numpy arrays with a mask
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : 1D list-like
|
||||
dtype : integer dtype
|
||||
mask : boolean 1D array, optional
|
||||
copy : boolean, default False
|
||||
if True, copy the input
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of (values, mask)
|
||||
"""
|
||||
# if values is integer numpy array, preserve it's dtype
|
||||
if dtype is None and hasattr(values, "dtype"):
|
||||
if is_integer_dtype(values.dtype):
|
||||
dtype = values.dtype
|
||||
|
||||
if dtype is not None:
|
||||
if isinstance(dtype, str) and (
|
||||
dtype.startswith("Int") or dtype.startswith("UInt")
|
||||
):
|
||||
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
|
||||
# https://github.com/numpy/numpy/pull/7476
|
||||
dtype = dtype.lower()
|
||||
|
||||
if not issubclass(type(dtype), _IntegerDtype):
|
||||
try:
|
||||
dtype = _dtypes[str(np.dtype(dtype))]
|
||||
except KeyError:
|
||||
raise ValueError("invalid dtype specified {}".format(dtype))
|
||||
|
||||
if isinstance(values, IntegerArray):
|
||||
values, mask = values._data, values._mask
|
||||
if dtype is not None:
|
||||
values = values.astype(dtype.numpy_dtype, copy=False)
|
||||
|
||||
if copy:
|
||||
values = values.copy()
|
||||
mask = mask.copy()
|
||||
return values, mask
|
||||
|
||||
values = np.array(values, copy=copy)
|
||||
if is_object_dtype(values):
|
||||
inferred_type = lib.infer_dtype(values, skipna=True)
|
||||
if inferred_type == "empty":
|
||||
values = np.empty(len(values))
|
||||
values.fill(np.nan)
|
||||
elif inferred_type not in [
|
||||
"floating",
|
||||
"integer",
|
||||
"mixed-integer",
|
||||
"mixed-integer-float",
|
||||
]:
|
||||
raise TypeError(
|
||||
"{} cannot be converted to an IntegerDtype".format(values.dtype)
|
||||
)
|
||||
|
||||
elif is_bool_dtype(values) and is_integer_dtype(dtype):
|
||||
values = np.array(values, dtype=int, copy=copy)
|
||||
|
||||
elif not (is_integer_dtype(values) or is_float_dtype(values)):
|
||||
raise TypeError(
|
||||
"{} cannot be converted to an IntegerDtype".format(values.dtype)
|
||||
)
|
||||
|
||||
if mask is None:
|
||||
mask = isna(values)
|
||||
else:
|
||||
assert len(mask) == len(values)
|
||||
|
||||
if not values.ndim == 1:
|
||||
raise TypeError("values must be a 1D list-like")
|
||||
if not mask.ndim == 1:
|
||||
raise TypeError("mask must be a 1D list-like")
|
||||
|
||||
# infer dtype if needed
|
||||
if dtype is None:
|
||||
dtype = np.dtype("int64")
|
||||
else:
|
||||
dtype = dtype.type
|
||||
|
||||
# if we are float, let's make sure that we can
|
||||
# safely cast
|
||||
|
||||
# we copy as need to coerce here
|
||||
if mask.any():
|
||||
values = values.copy()
|
||||
values[mask] = 1
|
||||
values = safe_cast(values, dtype, copy=False)
|
||||
else:
|
||||
values = safe_cast(values, dtype, copy=False)
|
||||
|
||||
return values, mask
|
||||
|
||||
|
||||
class IntegerArray(ExtensionArray, ExtensionOpsMixin):
|
||||
"""
|
||||
Array of integer (optional missing) values.
|
||||
|
||||
.. versionadded:: 0.24.0
|
||||
|
||||
.. warning::
|
||||
|
||||
IntegerArray is currently experimental, and its API or internal
|
||||
implementation may change without warning.
|
||||
|
||||
We represent an IntegerArray with 2 numpy arrays:
|
||||
|
||||
- data: contains a numpy integer array of the appropriate dtype
|
||||
- mask: a boolean array holding a mask on the data, True is missing
|
||||
|
||||
To construct an IntegerArray from generic array-like input, use
|
||||
:func:`pandas.array` with one of the integer dtypes (see examples).
|
||||
|
||||
See :ref:`integer_na` for more.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : numpy.ndarray
|
||||
A 1-d integer-dtype array.
|
||||
mask : numpy.ndarray
|
||||
A 1-d boolean-dtype array indicating missing values.
|
||||
copy : bool, default False
|
||||
Whether to copy the `values` and `mask`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
None
|
||||
|
||||
Methods
|
||||
-------
|
||||
None
|
||||
|
||||
Returns
|
||||
-------
|
||||
IntegerArray
|
||||
|
||||
Examples
|
||||
--------
|
||||
Create an IntegerArray with :func:`pandas.array`.
|
||||
|
||||
>>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
|
||||
>>> int_array
|
||||
<IntegerArray>
|
||||
[1, NaN, 3]
|
||||
Length: 3, dtype: Int32
|
||||
|
||||
String aliases for the dtypes are also available. They are capitalized.
|
||||
|
||||
>>> pd.array([1, None, 3], dtype='Int32')
|
||||
<IntegerArray>
|
||||
[1, NaN, 3]
|
||||
Length: 3, dtype: Int32
|
||||
|
||||
>>> pd.array([1, None, 3], dtype='UInt16')
|
||||
<IntegerArray>
|
||||
[1, NaN, 3]
|
||||
Length: 3, dtype: UInt16
|
||||
"""
|
||||
|
||||
@cache_readonly
|
||||
def dtype(self):
|
||||
return _dtypes[str(self._data.dtype)]
|
||||
|
||||
def __init__(self, values, mask, copy=False):
|
||||
if not (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)):
|
||||
raise TypeError(
|
||||
"values should be integer numpy array. Use "
|
||||
"the 'integer_array' function instead"
|
||||
)
|
||||
if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
|
||||
raise TypeError(
|
||||
"mask should be boolean numpy array. Use "
|
||||
"the 'integer_array' function instead"
|
||||
)
|
||||
|
||||
if copy:
|
||||
values = values.copy()
|
||||
mask = mask.copy()
|
||||
|
||||
self._data = values
|
||||
self._mask = mask
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return integer_array(scalars, dtype=dtype, copy=copy)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
|
||||
scalars = to_numeric(strings, errors="raise")
|
||||
return cls._from_sequence(scalars, dtype, copy)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return integer_array(values, dtype=original.dtype)
|
||||
|
||||
def _formatter(self, boxed=False):
|
||||
def fmt(x):
|
||||
if isna(x):
|
||||
return "NaN"
|
||||
return str(x)
|
||||
|
||||
return fmt
|
||||
|
||||
def __getitem__(self, item):
|
||||
if is_integer(item):
|
||||
if self._mask[item]:
|
||||
return self.dtype.na_value
|
||||
return self._data[item]
|
||||
return type(self)(self._data[item], self._mask[item])
|
||||
|
||||
def _coerce_to_ndarray(self):
|
||||
"""
|
||||
coerce to an ndarary of object dtype
|
||||
"""
|
||||
|
||||
# TODO(jreback) make this better
|
||||
data = self._data.astype(object)
|
||||
data[self._mask] = self._na_value
|
||||
return data
|
||||
|
||||
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
|
||||
|
||||
def __array__(self, dtype=None):
|
||||
"""
|
||||
the array interface, return my values
|
||||
We return an object array here to preserve our scalar values
|
||||
"""
|
||||
return self._coerce_to_ndarray()
|
||||
|
||||
_HANDLED_TYPES = (np.ndarray, numbers.Number)
|
||||
|
||||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
||||
# For IntegerArray inputs, we apply the ufunc to ._data
|
||||
# and mask the result.
|
||||
if method == "reduce":
|
||||
# Not clear how to handle missing values in reductions. Raise.
|
||||
raise NotImplementedError("The 'reduce' method is not supported.")
|
||||
out = kwargs.get("out", ())
|
||||
|
||||
for x in inputs + out:
|
||||
if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)):
|
||||
return NotImplemented
|
||||
|
||||
# for binary ops, use our custom dunder methods
|
||||
result = ops.maybe_dispatch_ufunc_to_dunder_op(
|
||||
self, ufunc, method, *inputs, **kwargs
|
||||
)
|
||||
if result is not NotImplemented:
|
||||
return result
|
||||
|
||||
mask = np.zeros(len(self), dtype=bool)
|
||||
inputs2 = []
|
||||
for x in inputs:
|
||||
if isinstance(x, IntegerArray):
|
||||
mask |= x._mask
|
||||
inputs2.append(x._data)
|
||||
else:
|
||||
inputs2.append(x)
|
||||
|
||||
def reconstruct(x):
|
||||
# we don't worry about scalar `x` here, since we
|
||||
# raise for reduce up above.
|
||||
|
||||
if is_integer_dtype(x.dtype):
|
||||
m = mask.copy()
|
||||
return IntegerArray(x, m)
|
||||
else:
|
||||
x[mask] = np.nan
|
||||
return x
|
||||
|
||||
result = getattr(ufunc, method)(*inputs2, **kwargs)
|
||||
if isinstance(result, tuple):
|
||||
tuple(reconstruct(x) for x in result)
|
||||
else:
|
||||
return reconstruct(result)
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(len(self)):
|
||||
if self._mask[i]:
|
||||
yield self.dtype.na_value
|
||||
else:
|
||||
yield self._data[i]
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
from pandas.api.extensions import take
|
||||
|
||||
# we always fill with 1 internally
|
||||
# to avoid upcasting
|
||||
data_fill_value = 1 if isna(fill_value) else fill_value
|
||||
result = take(
|
||||
self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill
|
||||
)
|
||||
|
||||
mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill)
|
||||
|
||||
# if we are filling
|
||||
# we only fill where the indexer is null
|
||||
# not existing missing values
|
||||
# TODO(jreback) what if we have a non-na float as a fill value?
|
||||
if allow_fill and notna(fill_value):
|
||||
fill_mask = np.asarray(indexer) == -1
|
||||
result[fill_mask] = fill_value
|
||||
mask = mask ^ fill_mask
|
||||
|
||||
return type(self)(result, mask, copy=False)
|
||||
|
||||
def copy(self):
|
||||
data, mask = self._data, self._mask
|
||||
data = data.copy()
|
||||
mask = mask.copy()
|
||||
return type(self)(data, mask, copy=False)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
_is_scalar = is_scalar(value)
|
||||
if _is_scalar:
|
||||
value = [value]
|
||||
value, mask = coerce_to_array(value, dtype=self.dtype)
|
||||
|
||||
if _is_scalar:
|
||||
value = value[0]
|
||||
mask = mask[0]
|
||||
|
||||
self._data[key] = value
|
||||
self._mask[key] = mask
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return self._data.nbytes + self._mask.nbytes
|
||||
|
||||
def isna(self):
|
||||
return self._mask
|
||||
|
||||
@property
|
||||
def _na_value(self):
|
||||
return np.nan
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = np.concatenate([x._data for x in to_concat])
|
||||
mask = np.concatenate([x._mask for x in to_concat])
|
||||
return cls(data, mask)
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
"""
|
||||
Cast to a NumPy array or IntegerArray with 'dtype'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : str or dtype
|
||||
Typecode or data-type to which the array is cast.
|
||||
copy : bool, default True
|
||||
Whether to copy the data, even if not necessary. If False,
|
||||
a copy is made only if the old dtype does not match the
|
||||
new dtype.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array : ndarray or IntegerArray
|
||||
NumPy ndarray or IntergerArray with 'dtype' for its dtype.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
if incompatible type with an IntegerDtype, equivalent of same_kind
|
||||
casting
|
||||
"""
|
||||
|
||||
# if we are astyping to an existing IntegerDtype we can fastpath
|
||||
if isinstance(dtype, _IntegerDtype):
|
||||
result = self._data.astype(dtype.numpy_dtype, copy=False)
|
||||
return type(self)(result, mask=self._mask, copy=False)
|
||||
|
||||
# coerce
|
||||
data = self._coerce_to_ndarray()
|
||||
return astype_nansafe(data, dtype, copy=None)
|
||||
|
||||
@property
|
||||
def _ndarray_values(self) -> np.ndarray:
|
||||
"""Internal pandas method for lossy conversion to a NumPy ndarray.
|
||||
|
||||
This method is not part of the pandas interface.
|
||||
|
||||
The expectation is that this is cheap to compute, and is primarily
|
||||
used for interacting with our indexers.
|
||||
"""
|
||||
return self._data
|
||||
|
||||
def value_counts(self, dropna=True):
|
||||
"""
|
||||
Returns a Series containing counts of each category.
|
||||
|
||||
Every category will have an entry, even those with a count of 0.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dropna : boolean, default True
|
||||
Don't include counts of NaN.
|
||||
|
||||
Returns
|
||||
-------
|
||||
counts : Series
|
||||
|
||||
See Also
|
||||
--------
|
||||
Series.value_counts
|
||||
|
||||
"""
|
||||
|
||||
from pandas import Index, Series
|
||||
|
||||
# compute counts on the data with no nans
|
||||
data = self._data[~self._mask]
|
||||
value_counts = Index(data).value_counts()
|
||||
array = value_counts.values
|
||||
|
||||
# TODO(extension)
|
||||
# if we have allow Index to hold an ExtensionArray
|
||||
# this is easier
|
||||
index = value_counts.index.astype(object)
|
||||
|
||||
# if we want nans, count the mask
|
||||
if not dropna:
|
||||
|
||||
# TODO(extension)
|
||||
# appending to an Index *always* infers
|
||||
# w/o passing the dtype
|
||||
array = np.append(array, [self._mask.sum()])
|
||||
index = Index(
|
||||
np.concatenate([index.values, np.array([np.nan], dtype=object)]),
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
return Series(array, index=index)
|
||||
|
||||
def _values_for_argsort(self) -> np.ndarray:
|
||||
"""Return values for sorting.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The transformed values should maintain the ordering between values
|
||||
within the array.
|
||||
|
||||
See Also
|
||||
--------
|
||||
ExtensionArray.argsort
|
||||
"""
|
||||
data = self._data.copy()
|
||||
data[self._mask] = data.min() - 1
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def _create_comparison_method(cls, op):
|
||||
def cmp_method(self, other):
|
||||
|
||||
op_name = op.__name__
|
||||
mask = None
|
||||
|
||||
if isinstance(other, (ABCSeries, ABCIndexClass)):
|
||||
# Rely on pandas to unbox and dispatch to us.
|
||||
return NotImplemented
|
||||
|
||||
if isinstance(other, IntegerArray):
|
||||
other, mask = other._data, other._mask
|
||||
|
||||
elif is_list_like(other):
|
||||
other = np.asarray(other)
|
||||
if other.ndim > 0 and len(self) != len(other):
|
||||
raise ValueError("Lengths must match to compare")
|
||||
|
||||
other = lib.item_from_zerodim(other)
|
||||
|
||||
# numpy will show a DeprecationWarning on invalid elementwise
|
||||
# comparisons, this will raise in the future
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
|
||||
with np.errstate(all="ignore"):
|
||||
result = op(self._data, other)
|
||||
|
||||
# nans propagate
|
||||
if mask is None:
|
||||
mask = self._mask
|
||||
else:
|
||||
mask = self._mask | mask
|
||||
|
||||
result[mask] = op_name == "ne"
|
||||
return result
|
||||
|
||||
name = "__{name}__".format(name=op.__name__)
|
||||
return set_function_name(cmp_method, name, cls)
|
||||
|
||||
def _reduce(self, name, skipna=True, **kwargs):
|
||||
data = self._data
|
||||
mask = self._mask
|
||||
|
||||
# coerce to a nan-aware float if needed
|
||||
if mask.any():
|
||||
data = self._data.astype("float64")
|
||||
data[mask] = self._na_value
|
||||
|
||||
op = getattr(nanops, "nan" + name)
|
||||
result = op(data, axis=0, skipna=skipna, mask=mask)
|
||||
|
||||
# if we have a boolean op, don't coerce
|
||||
if name in ["any", "all"]:
|
||||
pass
|
||||
|
||||
# if we have a preservable numeric op,
|
||||
# provide coercion back to an integer type if possible
|
||||
elif name in ["sum", "min", "max", "prod"] and notna(result):
|
||||
int_result = int(result)
|
||||
if int_result == result:
|
||||
result = int_result
|
||||
|
||||
return result
|
||||
|
||||
def _maybe_mask_result(self, result, mask, other, op_name):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
result : array-like
|
||||
mask : array-like bool
|
||||
other : scalar or array-like
|
||||
op_name : str
|
||||
"""
|
||||
|
||||
# may need to fill infs
|
||||
# and mask wraparound
|
||||
if is_float_dtype(result):
|
||||
mask |= (result == np.inf) | (result == -np.inf)
|
||||
|
||||
# if we have a float operand we are by-definition
|
||||
# a float result
|
||||
# or our op is a divide
|
||||
if (is_float_dtype(other) or is_float(other)) or (
|
||||
op_name in ["rtruediv", "truediv"]
|
||||
):
|
||||
result[mask] = np.nan
|
||||
return result
|
||||
|
||||
return type(self)(result, mask, copy=False)
|
||||
|
||||
@classmethod
|
||||
def _create_arithmetic_method(cls, op):
|
||||
def integer_arithmetic_method(self, other):
|
||||
|
||||
op_name = op.__name__
|
||||
mask = None
|
||||
|
||||
if isinstance(other, (ABCSeries, ABCIndexClass)):
|
||||
# Rely on pandas to unbox and dispatch to us.
|
||||
return NotImplemented
|
||||
|
||||
if getattr(other, "ndim", 0) > 1:
|
||||
raise NotImplementedError("can only perform ops with 1-d structures")
|
||||
|
||||
if isinstance(other, IntegerArray):
|
||||
other, mask = other._data, other._mask
|
||||
|
||||
elif getattr(other, "ndim", None) == 0:
|
||||
other = other.item()
|
||||
|
||||
elif is_list_like(other):
|
||||
other = np.asarray(other)
|
||||
if not other.ndim:
|
||||
other = other.item()
|
||||
elif other.ndim == 1:
|
||||
if not (is_float_dtype(other) or is_integer_dtype(other)):
|
||||
raise TypeError("can only perform ops with numeric values")
|
||||
else:
|
||||
if not (is_float(other) or is_integer(other)):
|
||||
raise TypeError("can only perform ops with numeric values")
|
||||
|
||||
# nans propagate
|
||||
if mask is None:
|
||||
mask = self._mask
|
||||
else:
|
||||
mask = self._mask | mask
|
||||
|
||||
# 1 ** np.nan is 1. So we have to unmask those.
|
||||
if op_name == "pow":
|
||||
mask = np.where(self == 1, False, mask)
|
||||
|
||||
elif op_name == "rpow":
|
||||
mask = np.where(other == 1, False, mask)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
result = op(self._data, other)
|
||||
|
||||
# divmod returns a tuple
|
||||
if op_name == "divmod":
|
||||
div, mod = result
|
||||
return (
|
||||
self._maybe_mask_result(div, mask, other, "floordiv"),
|
||||
self._maybe_mask_result(mod, mask, other, "mod"),
|
||||
)
|
||||
|
||||
return self._maybe_mask_result(result, mask, other, op_name)
|
||||
|
||||
name = "__{name}__".format(name=op.__name__)
|
||||
return set_function_name(integer_arithmetic_method, name, cls)
|
||||
|
||||
|
||||
IntegerArray._add_arithmetic_ops()
|
||||
IntegerArray._add_comparison_ops()
|
||||
|
||||
|
||||
_dtype_docstring = """
|
||||
An ExtensionDtype for {dtype} integer data.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
None
|
||||
|
||||
Methods
|
||||
-------
|
||||
None
|
||||
"""
|
||||
|
||||
# create the Dtype
|
||||
Int8Dtype = register_extension_dtype(
|
||||
type(
|
||||
"Int8Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.int8,
|
||||
"name": "Int8",
|
||||
"__doc__": _dtype_docstring.format(dtype="int8"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
Int16Dtype = register_extension_dtype(
|
||||
type(
|
||||
"Int16Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.int16,
|
||||
"name": "Int16",
|
||||
"__doc__": _dtype_docstring.format(dtype="int16"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
Int32Dtype = register_extension_dtype(
|
||||
type(
|
||||
"Int32Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.int32,
|
||||
"name": "Int32",
|
||||
"__doc__": _dtype_docstring.format(dtype="int32"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
Int64Dtype = register_extension_dtype(
|
||||
type(
|
||||
"Int64Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.int64,
|
||||
"name": "Int64",
|
||||
"__doc__": _dtype_docstring.format(dtype="int64"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
UInt8Dtype = register_extension_dtype(
|
||||
type(
|
||||
"UInt8Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.uint8,
|
||||
"name": "UInt8",
|
||||
"__doc__": _dtype_docstring.format(dtype="uint8"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
UInt16Dtype = register_extension_dtype(
|
||||
type(
|
||||
"UInt16Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.uint16,
|
||||
"name": "UInt16",
|
||||
"__doc__": _dtype_docstring.format(dtype="uint16"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
UInt32Dtype = register_extension_dtype(
|
||||
type(
|
||||
"UInt32Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.uint32,
|
||||
"name": "UInt32",
|
||||
"__doc__": _dtype_docstring.format(dtype="uint32"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
UInt64Dtype = register_extension_dtype(
|
||||
type(
|
||||
"UInt64Dtype",
|
||||
(_IntegerDtype,),
|
||||
{
|
||||
"type": np.uint64,
|
||||
"name": "UInt64",
|
||||
"__doc__": _dtype_docstring.format(dtype="uint64"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
_dtypes = {
|
||||
"int8": Int8Dtype(),
|
||||
"int16": Int16Dtype(),
|
||||
"int32": Int32Dtype(),
|
||||
"int64": Int64Dtype(),
|
||||
"uint8": UInt8Dtype(),
|
||||
"uint16": UInt16Dtype(),
|
||||
"uint32": UInt32Dtype(),
|
||||
"uint64": UInt64Dtype(),
|
||||
}
|
||||
1214
venv/lib/python3.6/site-packages/pandas/core/arrays/interval.py
Normal file
1214
venv/lib/python3.6/site-packages/pandas/core/arrays/interval.py
Normal file
File diff suppressed because it is too large
Load Diff
469
venv/lib/python3.6/site-packages/pandas/core/arrays/numpy_.py
Normal file
469
venv/lib/python3.6/site-packages/pandas/core/arrays/numpy_.py
Normal file
@@ -0,0 +1,469 @@
|
||||
import numbers
|
||||
|
||||
import numpy as np
|
||||
from numpy.lib.mixins import NDArrayOperatorsMixin
|
||||
|
||||
from pandas._libs import lib
|
||||
from pandas.compat.numpy import function as nv
|
||||
from pandas.util._decorators import Appender
|
||||
from pandas.util._validators import validate_fillna_kwargs
|
||||
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
|
||||
from pandas.core.dtypes.inference import is_array_like, is_list_like
|
||||
|
||||
from pandas import compat
|
||||
from pandas.core import nanops
|
||||
from pandas.core.algorithms import searchsorted
|
||||
from pandas.core.missing import backfill_1d, pad_1d
|
||||
|
||||
from .base import ExtensionArray, ExtensionOpsMixin
|
||||
|
||||
|
||||
class PandasDtype(ExtensionDtype):
|
||||
"""
|
||||
A Pandas ExtensionDtype for NumPy dtypes.
|
||||
|
||||
.. versionadded:: 0.24.0
|
||||
|
||||
This is mostly for internal compatibility, and is not especially
|
||||
useful on its own.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : numpy.dtype
|
||||
"""
|
||||
|
||||
_metadata = ("_dtype",)
|
||||
|
||||
def __init__(self, dtype):
|
||||
dtype = np.dtype(dtype)
|
||||
self._dtype = dtype
|
||||
self._name = dtype.name
|
||||
self._type = dtype.type
|
||||
|
||||
def __repr__(self):
|
||||
return "PandasDtype({!r})".format(self.name)
|
||||
|
||||
@property
|
||||
def numpy_dtype(self):
|
||||
"""The NumPy dtype this PandasDtype wraps."""
|
||||
return self._dtype
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def _is_numeric(self):
|
||||
# exclude object, str, unicode, void.
|
||||
return self.kind in set("biufc")
|
||||
|
||||
@property
|
||||
def _is_boolean(self):
|
||||
return self.kind == "b"
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
return cls(np.dtype(string))
|
||||
|
||||
def construct_array_type(cls):
|
||||
return PandasArray
|
||||
|
||||
@property
|
||||
def kind(self):
|
||||
return self._dtype.kind
|
||||
|
||||
@property
|
||||
def itemsize(self):
|
||||
"""The element size of this data-type object."""
|
||||
return self._dtype.itemsize
|
||||
|
||||
|
||||
class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
|
||||
"""
|
||||
A pandas ExtensionArray for NumPy data.
|
||||
|
||||
.. versionadded :: 0.24.0
|
||||
|
||||
This is mostly for internal compatibility, and is not especially
|
||||
useful on its own.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : ndarray
|
||||
The NumPy ndarray to wrap. Must be 1-dimensional.
|
||||
copy : bool, default False
|
||||
Whether to copy `values`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
None
|
||||
|
||||
Methods
|
||||
-------
|
||||
None
|
||||
"""
|
||||
|
||||
# If you're wondering why pd.Series(cls) doesn't put the array in an
|
||||
# ExtensionBlock, search for `ABCPandasArray`. We check for
|
||||
# that _typ to ensure that that users don't unnecessarily use EAs inside
|
||||
# pandas internals, which turns off things like block consolidation.
|
||||
_typ = "npy_extension"
|
||||
__array_priority__ = 1000
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Constructors
|
||||
|
||||
def __init__(self, values, copy=False):
|
||||
if isinstance(values, type(self)):
|
||||
values = values._ndarray
|
||||
if not isinstance(values, np.ndarray):
|
||||
raise ValueError("'values' must be a NumPy array.")
|
||||
|
||||
if values.ndim != 1:
|
||||
raise ValueError("PandasArray must be 1-dimensional.")
|
||||
|
||||
if copy:
|
||||
values = values.copy()
|
||||
|
||||
self._ndarray = values
|
||||
self._dtype = PandasDtype(values.dtype)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
if isinstance(dtype, PandasDtype):
|
||||
dtype = dtype._dtype
|
||||
|
||||
result = np.asarray(scalars, dtype=dtype)
|
||||
if copy and result is scalars:
|
||||
result = result.copy()
|
||||
return cls(result)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls(values)
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
return cls(np.concatenate(to_concat))
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Data
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# NumPy Array Interface
|
||||
|
||||
def __array__(self, dtype=None):
|
||||
return np.asarray(self._ndarray, dtype=dtype)
|
||||
|
||||
_HANDLED_TYPES = (np.ndarray, numbers.Number)
|
||||
|
||||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
||||
# Lightly modified version of
|
||||
# https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\
|
||||
# numpy.lib.mixins.NDArrayOperatorsMixin.html
|
||||
# The primary modification is not boxing scalar return values
|
||||
# in PandasArray, since pandas' ExtensionArrays are 1-d.
|
||||
out = kwargs.get("out", ())
|
||||
for x in inputs + out:
|
||||
# Only support operations with instances of _HANDLED_TYPES.
|
||||
# Use PandasArray instead of type(self) for isinstance to
|
||||
# allow subclasses that don't override __array_ufunc__ to
|
||||
# handle PandasArray objects.
|
||||
if not isinstance(x, self._HANDLED_TYPES + (PandasArray,)):
|
||||
return NotImplemented
|
||||
|
||||
# Defer to the implementation of the ufunc on unwrapped values.
|
||||
inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
|
||||
if out:
|
||||
kwargs["out"] = tuple(
|
||||
x._ndarray if isinstance(x, PandasArray) else x for x in out
|
||||
)
|
||||
result = getattr(ufunc, method)(*inputs, **kwargs)
|
||||
|
||||
if type(result) is tuple and len(result):
|
||||
# multiple return values
|
||||
if not lib.is_scalar(result[0]):
|
||||
# re-box array-like results
|
||||
return tuple(type(self)(x) for x in result)
|
||||
else:
|
||||
# but not scalar reductions
|
||||
return result
|
||||
elif method == "at":
|
||||
# no return value
|
||||
return None
|
||||
else:
|
||||
# one return value
|
||||
if not lib.is_scalar(result):
|
||||
# re-box array-like results, but not scalar reductions
|
||||
result = type(self)(result)
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Pandas ExtensionArray Interface
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, type(self)):
|
||||
item = item._ndarray
|
||||
|
||||
result = self._ndarray[item]
|
||||
if not lib.is_scalar(item):
|
||||
result = type(self)(result)
|
||||
return result
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
from pandas.core.internals.arrays import extract_array
|
||||
|
||||
value = extract_array(value, extract_numpy=True)
|
||||
|
||||
if not lib.is_scalar(key) and is_list_like(key):
|
||||
key = np.asarray(key)
|
||||
|
||||
if not lib.is_scalar(value):
|
||||
value = np.asarray(value)
|
||||
|
||||
values = self._ndarray
|
||||
t = np.result_type(value, values)
|
||||
if t != self._ndarray.dtype:
|
||||
values = values.astype(t, casting="safe")
|
||||
values[key] = value
|
||||
self._dtype = PandasDtype(t)
|
||||
self._ndarray = values
|
||||
else:
|
||||
self._ndarray[key] = value
|
||||
|
||||
def __len__(self):
|
||||
return len(self._ndarray)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return self._ndarray.nbytes
|
||||
|
||||
def isna(self):
|
||||
from pandas import isna
|
||||
|
||||
return isna(self._ndarray)
|
||||
|
||||
def fillna(self, value=None, method=None, limit=None):
|
||||
# TODO(_values_for_fillna): remove this
|
||||
value, method = validate_fillna_kwargs(value, method)
|
||||
|
||||
mask = self.isna()
|
||||
|
||||
if is_array_like(value):
|
||||
if len(value) != len(self):
|
||||
raise ValueError(
|
||||
"Length of 'value' does not match. Got ({}) "
|
||||
" expected {}".format(len(value), len(self))
|
||||
)
|
||||
value = value[mask]
|
||||
|
||||
if mask.any():
|
||||
if method is not None:
|
||||
func = pad_1d if method == "pad" else backfill_1d
|
||||
new_values = func(self._ndarray, limit=limit, mask=mask)
|
||||
new_values = self._from_sequence(new_values, dtype=self.dtype)
|
||||
else:
|
||||
# fill with value
|
||||
new_values = self.copy()
|
||||
new_values[mask] = value
|
||||
else:
|
||||
new_values = self.copy()
|
||||
return new_values
|
||||
|
||||
def take(self, indices, allow_fill=False, fill_value=None):
|
||||
from pandas.core.algorithms import take
|
||||
|
||||
result = take(
|
||||
self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value
|
||||
)
|
||||
return type(self)(result)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self._ndarray.copy())
|
||||
|
||||
def _values_for_argsort(self):
|
||||
return self._ndarray
|
||||
|
||||
def _values_for_factorize(self):
|
||||
return self._ndarray, -1
|
||||
|
||||
def unique(self):
|
||||
from pandas import unique
|
||||
|
||||
return type(self)(unique(self._ndarray))
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
def _reduce(self, name, skipna=True, **kwargs):
|
||||
meth = getattr(self, name, None)
|
||||
if meth:
|
||||
return meth(skipna=skipna, **kwargs)
|
||||
else:
|
||||
msg = "'{}' does not implement reduction '{}'"
|
||||
raise TypeError(msg.format(type(self).__name__, name))
|
||||
|
||||
def any(self, axis=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_any((), dict(out=out, keepdims=keepdims))
|
||||
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def all(self, axis=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_all((), dict(out=out, keepdims=keepdims))
|
||||
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def min(self, axis=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_min((), dict(out=out, keepdims=keepdims))
|
||||
return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def max(self, axis=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_max((), dict(out=out, keepdims=keepdims))
|
||||
return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def sum(
|
||||
self,
|
||||
axis=None,
|
||||
dtype=None,
|
||||
out=None,
|
||||
keepdims=False,
|
||||
initial=None,
|
||||
skipna=True,
|
||||
min_count=0,
|
||||
):
|
||||
nv.validate_sum(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
|
||||
)
|
||||
return nanops.nansum(
|
||||
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
|
||||
)
|
||||
|
||||
def prod(
|
||||
self,
|
||||
axis=None,
|
||||
dtype=None,
|
||||
out=None,
|
||||
keepdims=False,
|
||||
initial=None,
|
||||
skipna=True,
|
||||
min_count=0,
|
||||
):
|
||||
nv.validate_prod(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
|
||||
)
|
||||
return nanops.nanprod(
|
||||
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
|
||||
)
|
||||
|
||||
def mean(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims))
|
||||
return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def median(
|
||||
self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True
|
||||
):
|
||||
nv.validate_median(
|
||||
(), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims)
|
||||
)
|
||||
return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
|
||||
nv.validate_stat_ddof_func(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std"
|
||||
)
|
||||
return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
||||
|
||||
def var(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
|
||||
nv.validate_stat_ddof_func(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var"
|
||||
)
|
||||
return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
||||
|
||||
def sem(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
|
||||
nv.validate_stat_ddof_func(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem"
|
||||
)
|
||||
return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
||||
|
||||
def kurt(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_stat_ddof_func(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt"
|
||||
)
|
||||
return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
|
||||
nv.validate_stat_ddof_func(
|
||||
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew"
|
||||
)
|
||||
return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Additional Methods
|
||||
def to_numpy(self, dtype=None, copy=False):
|
||||
"""
|
||||
Convert the PandasArray to a :class:`numpy.ndarray`.
|
||||
|
||||
By default, this requires no coercion or copying of data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : numpy.dtype
|
||||
The NumPy dtype to pass to :func:`numpy.asarray`.
|
||||
copy : bool, default False
|
||||
Whether to copy the underlying data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
"""
|
||||
result = np.asarray(self._ndarray, dtype=dtype)
|
||||
if copy and result is self._ndarray:
|
||||
result = result.copy()
|
||||
|
||||
return result
|
||||
|
||||
@Appender(ExtensionArray.searchsorted.__doc__)
|
||||
def searchsorted(self, value, side="left", sorter=None):
|
||||
return searchsorted(self.to_numpy(), value, side=side, sorter=sorter)
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
def __invert__(self):
|
||||
return type(self)(~self._ndarray)
|
||||
|
||||
@classmethod
|
||||
def _create_arithmetic_method(cls, op):
|
||||
def arithmetic_method(self, other):
|
||||
if isinstance(other, (ABCIndexClass, ABCSeries)):
|
||||
return NotImplemented
|
||||
|
||||
elif isinstance(other, cls):
|
||||
other = other._ndarray
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
result = op(self._ndarray, other)
|
||||
|
||||
if op is divmod:
|
||||
a, b = result
|
||||
return cls(a), cls(b)
|
||||
|
||||
return cls(result)
|
||||
|
||||
return compat.set_function_name(
|
||||
arithmetic_method, "__{}__".format(op.__name__), cls
|
||||
)
|
||||
|
||||
_create_comparison_method = _create_arithmetic_method
|
||||
|
||||
|
||||
PandasArray._add_arithmetic_ops()
|
||||
PandasArray._add_comparison_ops()
|
||||
1027
venv/lib/python3.6/site-packages/pandas/core/arrays/period.py
Normal file
1027
venv/lib/python3.6/site-packages/pandas/core/arrays/period.py
Normal file
File diff suppressed because it is too large
Load Diff
2272
venv/lib/python3.6/site-packages/pandas/core/arrays/sparse.py
Normal file
2272
venv/lib/python3.6/site-packages/pandas/core/arrays/sparse.py
Normal file
File diff suppressed because it is too large
Load Diff
1138
venv/lib/python3.6/site-packages/pandas/core/arrays/timedeltas.py
Normal file
1138
venv/lib/python3.6/site-packages/pandas/core/arrays/timedeltas.py
Normal file
File diff suppressed because it is too large
Load Diff
1649
venv/lib/python3.6/site-packages/pandas/core/base.py
Normal file
1649
venv/lib/python3.6/site-packages/pandas/core/base.py
Normal file
File diff suppressed because it is too large
Load Diff
502
venv/lib/python3.6/site-packages/pandas/core/common.py
Normal file
502
venv/lib/python3.6/site-packages/pandas/core/common.py
Normal file
@@ -0,0 +1,502 @@
|
||||
"""
|
||||
Misc tools for implementing data structures
|
||||
|
||||
Note: pandas.core.common is *not* part of the public API.
|
||||
"""
|
||||
|
||||
import collections
|
||||
from collections import OrderedDict, abc
|
||||
from datetime import datetime, timedelta
|
||||
from functools import partial
|
||||
import inspect
|
||||
from typing import Any, Iterable, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib, tslibs
|
||||
from pandas.compat import PY36
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
from pandas.core.dtypes.common import (
|
||||
is_array_like,
|
||||
is_bool_dtype,
|
||||
is_extension_array_dtype,
|
||||
is_integer,
|
||||
)
|
||||
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
|
||||
from pandas.core.dtypes.inference import _iterable_not_string
|
||||
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
|
||||
|
||||
|
||||
class SettingWithCopyError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class SettingWithCopyWarning(Warning):
|
||||
pass
|
||||
|
||||
|
||||
def flatten(l):
|
||||
"""
|
||||
Flatten an arbitrarily nested sequence.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
l : sequence
|
||||
The non string sequence to flatten
|
||||
|
||||
Notes
|
||||
-----
|
||||
This doesn't consider strings sequences.
|
||||
|
||||
Returns
|
||||
-------
|
||||
flattened : generator
|
||||
"""
|
||||
for el in l:
|
||||
if _iterable_not_string(el):
|
||||
for s in flatten(el):
|
||||
yield s
|
||||
else:
|
||||
yield el
|
||||
|
||||
|
||||
def consensus_name_attr(objs):
|
||||
name = objs[0].name
|
||||
for obj in objs[1:]:
|
||||
try:
|
||||
if obj.name != name:
|
||||
name = None
|
||||
except ValueError:
|
||||
name = None
|
||||
return name
|
||||
|
||||
|
||||
def maybe_box(indexer, values, obj, key):
|
||||
|
||||
# if we have multiples coming back, box em
|
||||
if isinstance(values, np.ndarray):
|
||||
return obj[indexer.get_loc(key)]
|
||||
|
||||
# return the value
|
||||
return values
|
||||
|
||||
|
||||
def maybe_box_datetimelike(value):
|
||||
# turn a datetime like into a Timestamp/timedelta as needed
|
||||
|
||||
if isinstance(value, (np.datetime64, datetime)):
|
||||
value = tslibs.Timestamp(value)
|
||||
elif isinstance(value, (np.timedelta64, timedelta)):
|
||||
value = tslibs.Timedelta(value)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
values_from_object = lib.values_from_object
|
||||
|
||||
|
||||
def is_bool_indexer(key: Any) -> bool:
|
||||
"""
|
||||
Check whether `key` is a valid boolean indexer.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : Any
|
||||
Only list-likes may be considered boolean indexers.
|
||||
All other types are not considered a boolean indexer.
|
||||
For array-like input, boolean ndarrays or ExtensionArrays
|
||||
with ``_is_boolean`` set are considered boolean indexers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When the array is an object-dtype ndarray or ExtensionArray
|
||||
and contains missing values.
|
||||
"""
|
||||
na_msg = "cannot index with vector containing NA / NaN values"
|
||||
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
|
||||
is_array_like(key) and is_extension_array_dtype(key.dtype)
|
||||
):
|
||||
if key.dtype == np.object_:
|
||||
key = np.asarray(values_from_object(key))
|
||||
|
||||
if not lib.is_bool_array(key):
|
||||
if isna(key).any():
|
||||
raise ValueError(na_msg)
|
||||
return False
|
||||
return True
|
||||
elif is_bool_dtype(key.dtype):
|
||||
# an ndarray with bool-dtype by definition has no missing values.
|
||||
# So we only need to check for NAs in ExtensionArrays
|
||||
if is_extension_array_dtype(key.dtype):
|
||||
if np.any(key.isna()):
|
||||
raise ValueError(na_msg)
|
||||
return True
|
||||
elif isinstance(key, list):
|
||||
try:
|
||||
arr = np.asarray(key)
|
||||
return arr.dtype == np.bool_ and len(arr) == len(key)
|
||||
except TypeError: # pragma: no cover
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def cast_scalar_indexer(val):
|
||||
"""
|
||||
To avoid numpy DeprecationWarnings, cast float to integer where valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
val : scalar
|
||||
|
||||
Returns
|
||||
-------
|
||||
outval : scalar
|
||||
"""
|
||||
# assumes lib.is_scalar(val)
|
||||
if lib.is_float(val) and val == int(val):
|
||||
return int(val)
|
||||
return val
|
||||
|
||||
|
||||
def _not_none(*args):
|
||||
"""
|
||||
Returns a generator consisting of the arguments that are not None.
|
||||
"""
|
||||
return (arg for arg in args if arg is not None)
|
||||
|
||||
|
||||
def _any_none(*args):
|
||||
"""
|
||||
Returns a boolean indicating if any argument is None.
|
||||
"""
|
||||
for arg in args:
|
||||
if arg is None:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _all_none(*args):
|
||||
"""
|
||||
Returns a boolean indicating if all arguments are None.
|
||||
"""
|
||||
for arg in args:
|
||||
if arg is not None:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _any_not_none(*args):
|
||||
"""
|
||||
Returns a boolean indicating if any argument is not None.
|
||||
"""
|
||||
for arg in args:
|
||||
if arg is not None:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _all_not_none(*args):
|
||||
"""
|
||||
Returns a boolean indicating if all arguments are not None.
|
||||
"""
|
||||
for arg in args:
|
||||
if arg is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def count_not_none(*args):
|
||||
"""
|
||||
Returns the count of arguments that are not None.
|
||||
"""
|
||||
return sum(x is not None for x in args)
|
||||
|
||||
|
||||
def try_sort(iterable):
|
||||
listed = list(iterable)
|
||||
try:
|
||||
return sorted(listed)
|
||||
except Exception:
|
||||
return listed
|
||||
|
||||
|
||||
def dict_keys_to_ordered_list(mapping):
|
||||
# when pandas drops support for Python < 3.6, this function
|
||||
# can be replaced by a simple list(mapping.keys())
|
||||
if PY36 or isinstance(mapping, OrderedDict):
|
||||
keys = list(mapping.keys())
|
||||
else:
|
||||
keys = try_sort(mapping)
|
||||
return keys
|
||||
|
||||
|
||||
def asarray_tuplesafe(values, dtype=None):
|
||||
|
||||
if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
|
||||
values = list(values)
|
||||
elif isinstance(values, ABCIndexClass):
|
||||
return values.values
|
||||
|
||||
if isinstance(values, list) and dtype in [np.object_, object]:
|
||||
return construct_1d_object_array_from_listlike(values)
|
||||
|
||||
result = np.asarray(values, dtype=dtype)
|
||||
|
||||
if issubclass(result.dtype.type, str):
|
||||
result = np.asarray(values, dtype=object)
|
||||
|
||||
if result.ndim == 2:
|
||||
# Avoid building an array of arrays:
|
||||
# TODO: verify whether any path hits this except #18819 (invalid)
|
||||
values = [tuple(x) for x in values]
|
||||
result = construct_1d_object_array_from_listlike(values)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def index_labels_to_array(labels, dtype=None):
|
||||
"""
|
||||
Transform label or iterable of labels to array, for use in Index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : dtype
|
||||
If specified, use as dtype of the resulting array, otherwise infer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array
|
||||
"""
|
||||
if isinstance(labels, (str, tuple)):
|
||||
labels = [labels]
|
||||
|
||||
if not isinstance(labels, (list, np.ndarray)):
|
||||
try:
|
||||
labels = list(labels)
|
||||
except TypeError: # non-iterable
|
||||
labels = [labels]
|
||||
|
||||
labels = asarray_tuplesafe(labels, dtype=dtype)
|
||||
|
||||
return labels
|
||||
|
||||
|
||||
def maybe_make_list(obj):
|
||||
if obj is not None and not isinstance(obj, (tuple, list)):
|
||||
return [obj]
|
||||
return obj
|
||||
|
||||
|
||||
def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
|
||||
"""
|
||||
If obj is Iterable but not list-like, consume into list.
|
||||
"""
|
||||
if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
|
||||
return list(obj)
|
||||
return obj
|
||||
|
||||
|
||||
def is_null_slice(obj):
|
||||
"""
|
||||
We have a null slice.
|
||||
"""
|
||||
return (
|
||||
isinstance(obj, slice)
|
||||
and obj.start is None
|
||||
and obj.stop is None
|
||||
and obj.step is None
|
||||
)
|
||||
|
||||
|
||||
def is_true_slices(l):
|
||||
"""
|
||||
Find non-trivial slices in "l": return a list of booleans with same length.
|
||||
"""
|
||||
return [isinstance(k, slice) and not is_null_slice(k) for k in l]
|
||||
|
||||
|
||||
# TODO: used only once in indexing; belongs elsewhere?
|
||||
def is_full_slice(obj, l):
|
||||
"""
|
||||
We have a full length slice.
|
||||
"""
|
||||
return (
|
||||
isinstance(obj, slice) and obj.start == 0 and obj.stop == l and obj.step is None
|
||||
)
|
||||
|
||||
|
||||
def get_callable_name(obj):
|
||||
# typical case has name
|
||||
if hasattr(obj, "__name__"):
|
||||
return getattr(obj, "__name__")
|
||||
# some objects don't; could recurse
|
||||
if isinstance(obj, partial):
|
||||
return get_callable_name(obj.func)
|
||||
# fall back to class name
|
||||
if hasattr(obj, "__call__"):
|
||||
return obj.__class__.__name__
|
||||
# everything failed (probably because the argument
|
||||
# wasn't actually callable); we return None
|
||||
# instead of the empty string in this case to allow
|
||||
# distinguishing between no name and a name of ''
|
||||
return None
|
||||
|
||||
|
||||
def apply_if_callable(maybe_callable, obj, **kwargs):
|
||||
"""
|
||||
Evaluate possibly callable input using obj and kwargs if it is callable,
|
||||
otherwise return as it is.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
maybe_callable : possibly a callable
|
||||
obj : NDFrame
|
||||
**kwargs
|
||||
"""
|
||||
|
||||
if callable(maybe_callable):
|
||||
return maybe_callable(obj, **kwargs)
|
||||
|
||||
return maybe_callable
|
||||
|
||||
|
||||
def dict_compat(d):
|
||||
"""
|
||||
Helper function to convert datetimelike-keyed dicts
|
||||
to Timestamp-keyed dict.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d: dict like object
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
|
||||
"""
|
||||
return {maybe_box_datetimelike(key): value for key, value in d.items()}
|
||||
|
||||
|
||||
def standardize_mapping(into):
|
||||
"""
|
||||
Helper function to standardize a supplied mapping.
|
||||
|
||||
.. versionadded:: 0.21.0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
into : instance or subclass of collections.abc.Mapping
|
||||
Must be a class, an initialized collections.defaultdict,
|
||||
or an instance of a collections.abc.Mapping subclass.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mapping : a collections.abc.Mapping subclass or other constructor
|
||||
a callable object that can accept an iterator to create
|
||||
the desired Mapping.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.to_dict
|
||||
Series.to_dict
|
||||
"""
|
||||
if not inspect.isclass(into):
|
||||
if isinstance(into, collections.defaultdict):
|
||||
return partial(collections.defaultdict, into.default_factory)
|
||||
into = type(into)
|
||||
if not issubclass(into, abc.Mapping):
|
||||
raise TypeError("unsupported type: {into}".format(into=into))
|
||||
elif into == collections.defaultdict:
|
||||
raise TypeError("to_dict() only accepts initialized defaultdicts")
|
||||
return into
|
||||
|
||||
|
||||
def random_state(state=None):
|
||||
"""
|
||||
Helper function for processing random_state arguments.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
state : int, np.random.RandomState, None.
|
||||
If receives an int, passes to np.random.RandomState() as seed.
|
||||
If receives an np.random.RandomState object, just returns object.
|
||||
If receives `None`, returns np.random.
|
||||
If receives anything else, raises an informative ValueError.
|
||||
Default None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.random.RandomState
|
||||
"""
|
||||
|
||||
if is_integer(state):
|
||||
return np.random.RandomState(state)
|
||||
elif isinstance(state, np.random.RandomState):
|
||||
return state
|
||||
elif state is None:
|
||||
return np.random
|
||||
else:
|
||||
raise ValueError(
|
||||
"random_state must be an integer, a numpy " "RandomState, or None"
|
||||
)
|
||||
|
||||
|
||||
def _pipe(obj, func, *args, **kwargs):
|
||||
"""
|
||||
Apply a function ``func`` to object ``obj`` either by passing obj as the
|
||||
first argument to the function or, in the case that the func is a tuple,
|
||||
interpret the first element of the tuple as a function and pass the obj to
|
||||
that function as a keyword argument whose key is the value of the second
|
||||
element of the tuple.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable or tuple of (callable, string)
|
||||
Function to apply to this object or, alternatively, a
|
||||
``(callable, data_keyword)`` tuple where ``data_keyword`` is a
|
||||
string indicating the keyword of `callable`` that expects the
|
||||
object.
|
||||
args : iterable, optional
|
||||
positional arguments passed into ``func``.
|
||||
kwargs : dict, optional
|
||||
a dictionary of keyword arguments passed into ``func``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
object : the return type of ``func``.
|
||||
"""
|
||||
if isinstance(func, tuple):
|
||||
func, target = func
|
||||
if target in kwargs:
|
||||
msg = "%s is both the pipe target and a keyword argument" % target
|
||||
raise ValueError(msg)
|
||||
kwargs[target] = obj
|
||||
return func(*args, **kwargs)
|
||||
else:
|
||||
return func(obj, *args, **kwargs)
|
||||
|
||||
|
||||
def _get_rename_function(mapper):
|
||||
"""
|
||||
Returns a function that will map names/labels, dependent if mapper
|
||||
is a dict, Series or just a function.
|
||||
"""
|
||||
if isinstance(mapper, (abc.Mapping, ABCSeries)):
|
||||
|
||||
def f(x):
|
||||
if x in mapper:
|
||||
return mapper[x]
|
||||
else:
|
||||
return x
|
||||
|
||||
else:
|
||||
f = mapper
|
||||
|
||||
return f
|
||||
@@ -0,0 +1,175 @@
|
||||
"""Core eval alignment algorithms
|
||||
"""
|
||||
|
||||
from functools import partial, wraps
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import _result_type_many
|
||||
|
||||
|
||||
def _align_core_single_unary_op(term):
|
||||
if isinstance(term.value, np.ndarray):
|
||||
typ = partial(np.asanyarray, dtype=term.value.dtype)
|
||||
else:
|
||||
typ = type(term.value)
|
||||
ret = (typ,)
|
||||
|
||||
if not hasattr(term.value, "axes"):
|
||||
ret += (None,)
|
||||
else:
|
||||
ret += (_zip_axes_from_type(typ, term.value.axes),)
|
||||
return ret
|
||||
|
||||
|
||||
def _zip_axes_from_type(typ, new_axes):
|
||||
axes = {ax_name: new_axes[ax_ind] for ax_ind, ax_name in typ._AXIS_NAMES.items()}
|
||||
return axes
|
||||
|
||||
|
||||
def _any_pandas_objects(terms):
|
||||
"""Check a sequence of terms for instances of PandasObject."""
|
||||
return any(isinstance(term.value, pd.core.generic.PandasObject) for term in terms)
|
||||
|
||||
|
||||
def _filter_special_cases(f):
|
||||
@wraps(f)
|
||||
def wrapper(terms):
|
||||
# single unary operand
|
||||
if len(terms) == 1:
|
||||
return _align_core_single_unary_op(terms[0])
|
||||
|
||||
term_values = (term.value for term in terms)
|
||||
|
||||
# we don't have any pandas objects
|
||||
if not _any_pandas_objects(terms):
|
||||
return _result_type_many(*term_values), None
|
||||
|
||||
return f(terms)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@_filter_special_cases
|
||||
def _align_core(terms):
|
||||
term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")]
|
||||
term_dims = [terms[i].value.ndim for i in term_index]
|
||||
ndims = pd.Series(dict(zip(term_index, term_dims)))
|
||||
|
||||
# initial axes are the axes of the largest-axis'd term
|
||||
biggest = terms[ndims.idxmax()].value
|
||||
typ = biggest._constructor
|
||||
axes = biggest.axes
|
||||
naxes = len(axes)
|
||||
gt_than_one_axis = naxes > 1
|
||||
|
||||
for value in (terms[i].value for i in term_index):
|
||||
is_series = isinstance(value, pd.Series)
|
||||
is_series_and_gt_one_axis = is_series and gt_than_one_axis
|
||||
|
||||
for axis, items in enumerate(value.axes):
|
||||
if is_series_and_gt_one_axis:
|
||||
ax, itm = naxes - 1, value.index
|
||||
else:
|
||||
ax, itm = axis, items
|
||||
|
||||
if not axes[ax].is_(itm):
|
||||
axes[ax] = axes[ax].join(itm, how="outer")
|
||||
|
||||
for i, ndim in ndims.items():
|
||||
for axis, items in zip(range(ndim), axes):
|
||||
ti = terms[i].value
|
||||
|
||||
if hasattr(ti, "reindex"):
|
||||
transpose = isinstance(ti, pd.Series) and naxes > 1
|
||||
reindexer = axes[naxes - 1] if transpose else items
|
||||
|
||||
term_axis_size = len(ti.axes[axis])
|
||||
reindexer_size = len(reindexer)
|
||||
|
||||
ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
|
||||
if ordm >= 1 and reindexer_size >= 10000:
|
||||
w = (
|
||||
"Alignment difference on axis {axis} is larger "
|
||||
"than an order of magnitude on term {term!r}, by "
|
||||
"more than {ordm:.4g}; performance may suffer"
|
||||
).format(axis=axis, term=terms[i].name, ordm=ordm)
|
||||
warnings.warn(w, category=PerformanceWarning, stacklevel=6)
|
||||
|
||||
f = partial(ti.reindex, reindexer, axis=axis, copy=False)
|
||||
|
||||
terms[i].update(f())
|
||||
|
||||
terms[i].update(terms[i].value.values)
|
||||
|
||||
return typ, _zip_axes_from_type(typ, axes)
|
||||
|
||||
|
||||
def _align(terms):
|
||||
"""Align a set of terms"""
|
||||
try:
|
||||
# flatten the parse tree (a nested list, really)
|
||||
terms = list(com.flatten(terms))
|
||||
except TypeError:
|
||||
# can't iterate so it must just be a constant or single variable
|
||||
if isinstance(terms.value, pd.core.generic.NDFrame):
|
||||
typ = type(terms.value)
|
||||
return typ, _zip_axes_from_type(typ, terms.value.axes)
|
||||
return np.result_type(terms.type), None
|
||||
|
||||
# if all resolved variables are numeric scalars
|
||||
if all(term.is_scalar for term in terms):
|
||||
return _result_type_many(*(term.value for term in terms)).type, None
|
||||
|
||||
# perform the main alignment
|
||||
typ, axes = _align_core(terms)
|
||||
return typ, axes
|
||||
|
||||
|
||||
def _reconstruct_object(typ, obj, axes, dtype):
|
||||
"""Reconstruct an object given its type, raw value, and possibly empty
|
||||
(None) axes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
typ : object
|
||||
A type
|
||||
obj : object
|
||||
The value to use in the type constructor
|
||||
axes : dict
|
||||
The axes to use to construct the resulting pandas object
|
||||
|
||||
Returns
|
||||
-------
|
||||
ret : typ
|
||||
An object of type ``typ`` with the value `obj` and possible axes
|
||||
`axes`.
|
||||
"""
|
||||
try:
|
||||
typ = typ.type
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
res_t = np.result_type(obj.dtype, dtype)
|
||||
|
||||
if not isinstance(typ, partial) and issubclass(typ, pd.core.generic.PandasObject):
|
||||
return typ(obj, dtype=res_t, **axes)
|
||||
|
||||
# special case for pathological things like ~True/~False
|
||||
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
|
||||
ret_value = res_t.type(obj)
|
||||
else:
|
||||
ret_value = typ(obj).astype(res_t)
|
||||
# The condition is to distinguish 0-dim array (returned in case of
|
||||
# scalar) and 1 element array
|
||||
# e.g. np.array(0) and np.array([0])
|
||||
if len(obj.shape) == 1 and len(obj) == 1:
|
||||
if not isinstance(ret_value, np.ndarray):
|
||||
ret_value = np.array([ret_value]).astype(res_t)
|
||||
|
||||
return ret_value
|
||||
@@ -0,0 +1,3 @@
|
||||
# flake8: noqa
|
||||
|
||||
from pandas.core.computation.eval import eval
|
||||
@@ -0,0 +1,10 @@
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
ne = import_optional_dependency("numexpr", raise_on_missing=False, on_version="warn")
|
||||
_NUMEXPR_INSTALLED = ne is not None
|
||||
if _NUMEXPR_INSTALLED:
|
||||
_NUMEXPR_VERSION = ne.__version__
|
||||
else:
|
||||
_NUMEXPR_VERSION = None
|
||||
|
||||
__all__ = ["_NUMEXPR_INSTALLED", "_NUMEXPR_VERSION"]
|
||||
@@ -0,0 +1,38 @@
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# A token value Python's tokenizer probably will never use.
|
||||
_BACKTICK_QUOTED_STRING = 100
|
||||
|
||||
|
||||
def _ensure_decoded(s):
|
||||
""" if we have bytes, decode them to unicode """
|
||||
if isinstance(s, (np.bytes_, bytes)):
|
||||
s = s.decode(pd.get_option("display.encoding"))
|
||||
return s
|
||||
|
||||
|
||||
def _result_type_many(*arrays_and_dtypes):
|
||||
""" wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
|
||||
argument limit """
|
||||
try:
|
||||
return np.result_type(*arrays_and_dtypes)
|
||||
except ValueError:
|
||||
# we have > NPY_MAXARGS terms in our expression
|
||||
return reduce(np.result_type, arrays_and_dtypes)
|
||||
|
||||
|
||||
def _remove_spaces_column_name(name):
|
||||
"""Check if name contains any spaces, if it contains any spaces
|
||||
the spaces will be removed and an underscore suffix is added."""
|
||||
if not isinstance(name, str) or " " not in name:
|
||||
return name
|
||||
|
||||
return name.replace(" ", "_") + "_BACKTICK_QUOTED_STRING"
|
||||
|
||||
|
||||
class NameResolutionError(NameError):
|
||||
pass
|
||||
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Engine classes for :func:`~pandas.eval`
|
||||
"""
|
||||
|
||||
import abc
|
||||
|
||||
from pandas.core.computation.align import _align, _reconstruct_object
|
||||
from pandas.core.computation.ops import UndefinedVariableError, _mathops, _reductions
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
_ne_builtins = frozenset(_mathops + _reductions)
|
||||
|
||||
|
||||
class NumExprClobberingError(NameError):
|
||||
pass
|
||||
|
||||
|
||||
def _check_ne_builtin_clash(expr):
|
||||
"""Attempt to prevent foot-shooting in a helpful way.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
terms : Term
|
||||
Terms can contain
|
||||
"""
|
||||
names = expr.names
|
||||
overlap = names & _ne_builtins
|
||||
|
||||
if overlap:
|
||||
s = ", ".join(map(repr, overlap))
|
||||
raise NumExprClobberingError(
|
||||
'Variables in expression "{expr}" '
|
||||
"overlap with builtins: ({s})".format(expr=expr, s=s)
|
||||
)
|
||||
|
||||
|
||||
class AbstractEngine(metaclass=abc.ABCMeta):
|
||||
"""Object serving as a base class for all engines."""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def __init__(self, expr):
|
||||
self.expr = expr
|
||||
self.aligned_axes = None
|
||||
self.result_type = None
|
||||
|
||||
def convert(self):
|
||||
"""Convert an expression for evaluation.
|
||||
|
||||
Defaults to return the expression as a string.
|
||||
"""
|
||||
return printing.pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self):
|
||||
"""Run the engine on the expression
|
||||
|
||||
This method performs alignment which is necessary no matter what engine
|
||||
is being used, thus its implementation is in the base class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
obj : object
|
||||
The result of the passed expression.
|
||||
"""
|
||||
if not self._is_aligned:
|
||||
self.result_type, self.aligned_axes = _align(self.expr.terms)
|
||||
|
||||
# make sure no names in resolvers and locals/globals clash
|
||||
res = self._evaluate()
|
||||
return _reconstruct_object(
|
||||
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
|
||||
)
|
||||
|
||||
@property
|
||||
def _is_aligned(self):
|
||||
return self.aligned_axes is not None and self.result_type is not None
|
||||
|
||||
@abc.abstractmethod
|
||||
def _evaluate(self):
|
||||
"""Return an evaluated expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
The local and global environment in which to evaluate an
|
||||
expression.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class NumExprEngine(AbstractEngine):
|
||||
|
||||
"""NumExpr engine class"""
|
||||
|
||||
has_neg_frac = True
|
||||
|
||||
def __init__(self, expr):
|
||||
super().__init__(expr)
|
||||
|
||||
def convert(self):
|
||||
return str(super().convert())
|
||||
|
||||
def _evaluate(self):
|
||||
import numexpr as ne
|
||||
|
||||
# convert the expression to a valid numexpr expression
|
||||
s = self.convert()
|
||||
|
||||
try:
|
||||
env = self.expr.env
|
||||
scope = env.full_scope
|
||||
truediv = scope["truediv"]
|
||||
_check_ne_builtin_clash(self.expr)
|
||||
return ne.evaluate(s, local_dict=scope, truediv=truediv)
|
||||
except KeyError as e:
|
||||
# python 3 compat kludge
|
||||
try:
|
||||
msg = e.message
|
||||
except AttributeError:
|
||||
msg = str(e)
|
||||
raise UndefinedVariableError(msg)
|
||||
|
||||
|
||||
class PythonEngine(AbstractEngine):
|
||||
|
||||
"""Evaluate an expression in Python space.
|
||||
|
||||
Mostly for testing purposes.
|
||||
"""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def __init__(self, expr):
|
||||
super().__init__(expr)
|
||||
|
||||
def evaluate(self):
|
||||
return self.expr()
|
||||
|
||||
def _evaluate(self):
|
||||
pass
|
||||
|
||||
|
||||
_engines = {"numexpr": NumExprEngine, "python": PythonEngine}
|
||||
380
venv/lib/python3.6/site-packages/pandas/core/computation/eval.py
Normal file
380
venv/lib/python3.6/site-packages/pandas/core/computation/eval.py
Normal file
@@ -0,0 +1,380 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Top level ``eval`` module.
|
||||
"""
|
||||
|
||||
import tokenize
|
||||
import warnings
|
||||
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from pandas.core.computation.engines import _engines
|
||||
from pandas.core.computation.scope import _ensure_scope
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
|
||||
def _check_engine(engine):
|
||||
"""
|
||||
Make sure a valid engine is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
engine : str
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid engine is passed
|
||||
ImportError
|
||||
* If numexpr was requested but doesn't exist
|
||||
|
||||
Returns
|
||||
-------
|
||||
string engine
|
||||
"""
|
||||
from pandas.core.computation.check import _NUMEXPR_INSTALLED
|
||||
|
||||
if engine is None:
|
||||
if _NUMEXPR_INSTALLED:
|
||||
engine = "numexpr"
|
||||
else:
|
||||
engine = "python"
|
||||
|
||||
if engine not in _engines:
|
||||
valid = list(_engines.keys())
|
||||
raise KeyError(
|
||||
"Invalid engine {engine!r} passed, valid engines are"
|
||||
" {valid}".format(engine=engine, valid=valid)
|
||||
)
|
||||
|
||||
# TODO: validate this in a more general way (thinking of future engines
|
||||
# that won't necessarily be import-able)
|
||||
# Could potentially be done on engine instantiation
|
||||
if engine == "numexpr":
|
||||
if not _NUMEXPR_INSTALLED:
|
||||
raise ImportError(
|
||||
"'numexpr' is not installed or an "
|
||||
"unsupported version. Cannot use "
|
||||
"engine='numexpr' for query/eval "
|
||||
"if 'numexpr' is not installed"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
def _check_parser(parser):
|
||||
"""
|
||||
Make sure a valid parser is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parser : str
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid parser is passed
|
||||
"""
|
||||
from pandas.core.computation.expr import _parsers
|
||||
|
||||
if parser not in _parsers:
|
||||
raise KeyError(
|
||||
"Invalid parser {parser!r} passed, valid parsers are"
|
||||
" {valid}".format(parser=parser, valid=_parsers.keys())
|
||||
)
|
||||
|
||||
|
||||
def _check_resolvers(resolvers):
|
||||
if resolvers is not None:
|
||||
for resolver in resolvers:
|
||||
if not hasattr(resolver, "__getitem__"):
|
||||
name = type(resolver).__name__
|
||||
raise TypeError(
|
||||
"Resolver of type {name!r} does not implement "
|
||||
"the __getitem__ method".format(name=name)
|
||||
)
|
||||
|
||||
|
||||
def _check_expression(expr):
|
||||
"""
|
||||
Make sure an expression is not an empty string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
An object that can be converted to a string
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If expr is an empty string
|
||||
"""
|
||||
if not expr:
|
||||
raise ValueError("expr cannot be an empty string")
|
||||
|
||||
|
||||
def _convert_expression(expr):
|
||||
"""
|
||||
Convert an object to an expression.
|
||||
|
||||
Thus function converts an object to an expression (a unicode string) and
|
||||
checks to make sure it isn't empty after conversion. This is used to
|
||||
convert operators to their string representation for recursive calls to
|
||||
:func:`~pandas.eval`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
The object to be converted to a string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s : unicode
|
||||
The string representation of an object.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If the expression is empty.
|
||||
"""
|
||||
s = pprint_thing(expr)
|
||||
_check_expression(s)
|
||||
return s
|
||||
|
||||
|
||||
def _check_for_locals(expr, stack_level, parser):
|
||||
from pandas.core.computation.expr import tokenize_string
|
||||
|
||||
at_top_of_stack = stack_level == 0
|
||||
not_pandas_parser = parser != "pandas"
|
||||
|
||||
if not_pandas_parser:
|
||||
msg = "The '@' prefix is only supported by the pandas parser"
|
||||
elif at_top_of_stack:
|
||||
msg = (
|
||||
"The '@' prefix is not allowed in "
|
||||
"top-level eval calls, \nplease refer to "
|
||||
"your variables by name without the '@' "
|
||||
"prefix"
|
||||
)
|
||||
|
||||
if at_top_of_stack or not_pandas_parser:
|
||||
for toknum, tokval in tokenize_string(expr):
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
def eval(
|
||||
expr,
|
||||
parser="pandas",
|
||||
engine=None,
|
||||
truediv=True,
|
||||
local_dict=None,
|
||||
global_dict=None,
|
||||
resolvers=(),
|
||||
level=0,
|
||||
target=None,
|
||||
inplace=False,
|
||||
):
|
||||
"""
|
||||
Evaluate a Python expression as a string using various backends.
|
||||
|
||||
The following arithmetic operations are supported: ``+``, ``-``, ``*``,
|
||||
``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
|
||||
boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
|
||||
Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
|
||||
:keyword:`or`, and :keyword:`not` with the same semantics as the
|
||||
corresponding bitwise operators. :class:`~pandas.Series` and
|
||||
:class:`~pandas.DataFrame` objects are supported and behave as they would
|
||||
with plain ol' Python evaluation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str or unicode
|
||||
The expression to evaluate. This string cannot contain any Python
|
||||
`statements
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
|
||||
only Python `expressions
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
|
||||
parser : string, default 'pandas', {'pandas', 'python'}
|
||||
The parser to use to construct the syntax tree from the expression. The
|
||||
default of ``'pandas'`` parses code slightly different than standard
|
||||
Python. Alternatively, you can parse an expression using the
|
||||
``'python'`` parser to retain strict Python semantics. See the
|
||||
:ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
engine : string or None, default 'numexpr', {'python', 'numexpr'}
|
||||
|
||||
The engine used to evaluate the expression. Supported engines are
|
||||
|
||||
- None : tries to use ``numexpr``, falls back to ``python``
|
||||
- ``'numexpr'``: This default engine evaluates pandas objects using
|
||||
numexpr for large speed ups in complex expressions
|
||||
with large frames.
|
||||
- ``'python'``: Performs operations as if you had ``eval``'d in top
|
||||
level python. This engine is generally not that useful.
|
||||
|
||||
More backends may be available in the future.
|
||||
|
||||
truediv : bool, optional
|
||||
Whether to use true division, like in Python >= 3
|
||||
local_dict : dict or None, optional
|
||||
A dictionary of local variables, taken from locals() by default.
|
||||
global_dict : dict or None, optional
|
||||
A dictionary of global variables, taken from globals() by default.
|
||||
resolvers : list of dict-like or None, optional
|
||||
A list of objects implementing the ``__getitem__`` special method that
|
||||
you can use to inject an additional collection of namespaces to use for
|
||||
variable lookup. For example, this is used in the
|
||||
:meth:`~DataFrame.query` method to inject the
|
||||
``DataFrame.index`` and ``DataFrame.columns``
|
||||
variables that refer to their respective :class:`~pandas.DataFrame`
|
||||
instance attributes.
|
||||
level : int, optional
|
||||
The number of prior stack frames to traverse and add to the current
|
||||
scope. Most users will **not** need to change this parameter.
|
||||
target : object, optional, default None
|
||||
This is the target object for assignment. It is used when there is
|
||||
variable assignment in the expression. If so, then `target` must
|
||||
support item assignment with string keys, and if a copy is being
|
||||
returned, it must also support `.copy()`.
|
||||
inplace : bool, default False
|
||||
If `target` is provided, and the expression mutates `target`, whether
|
||||
to modify `target` inplace. Otherwise, return a copy of `target` with
|
||||
the mutation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray, numeric scalar, DataFrame, Series
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
There are many instances where such an error can be raised:
|
||||
|
||||
- `target=None`, but the expression is multiline.
|
||||
- The expression is multiline, but not all them have item assignment.
|
||||
An example of such an arrangement is this:
|
||||
|
||||
a = b + 1
|
||||
a + 2
|
||||
|
||||
Here, there are expressions on different lines, making it multiline,
|
||||
but the last line has no variable assigned to the output of `a + 2`.
|
||||
- `inplace=True`, but the expression is missing item assignment.
|
||||
- Item assignment is provided, but the `target` does not support
|
||||
string item assignment.
|
||||
- Item assignment is provided and `inplace=False`, but the `target`
|
||||
does not support the `.copy()` method
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.query
|
||||
DataFrame.eval
|
||||
|
||||
Notes
|
||||
-----
|
||||
The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
|
||||
recursively cast to ``float64``.
|
||||
|
||||
See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
"""
|
||||
from pandas.core.computation.expr import Expr
|
||||
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
|
||||
if isinstance(expr, str):
|
||||
_check_expression(expr)
|
||||
exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
|
||||
else:
|
||||
exprs = [expr]
|
||||
multi_line = len(exprs) > 1
|
||||
|
||||
if multi_line and target is None:
|
||||
raise ValueError(
|
||||
"multi-line expressions are only valid in the "
|
||||
"context of data, use DataFrame.eval"
|
||||
)
|
||||
|
||||
ret = None
|
||||
first_expr = True
|
||||
target_modified = False
|
||||
|
||||
for expr in exprs:
|
||||
expr = _convert_expression(expr)
|
||||
engine = _check_engine(engine)
|
||||
_check_parser(parser)
|
||||
_check_resolvers(resolvers)
|
||||
_check_for_locals(expr, level, parser)
|
||||
|
||||
# get our (possibly passed-in) scope
|
||||
env = _ensure_scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, truediv=truediv)
|
||||
|
||||
# construct the engine and evaluate the parsed expression
|
||||
eng = _engines[engine]
|
||||
eng_inst = eng(parsed_expr)
|
||||
ret = eng_inst.evaluate()
|
||||
|
||||
if parsed_expr.assigner is None:
|
||||
if multi_line:
|
||||
raise ValueError(
|
||||
"Multi-line expressions are only valid"
|
||||
" if all expressions contain an assignment"
|
||||
)
|
||||
elif inplace:
|
||||
raise ValueError("Cannot operate inplace " "if there is no assignment")
|
||||
|
||||
# assign if needed
|
||||
assigner = parsed_expr.assigner
|
||||
if env.target is not None and assigner is not None:
|
||||
target_modified = True
|
||||
|
||||
# if returning a copy, copy only on the first assignment
|
||||
if not inplace and first_expr:
|
||||
try:
|
||||
target = env.target.copy()
|
||||
except AttributeError:
|
||||
raise ValueError("Cannot return a copy of the target")
|
||||
else:
|
||||
target = env.target
|
||||
|
||||
# TypeError is most commonly raised (e.g. int, list), but you
|
||||
# get IndexError if you try to do this assignment on np.ndarray.
|
||||
# we will ignore numpy warnings here; e.g. if trying
|
||||
# to use a non-numeric indexer
|
||||
try:
|
||||
with warnings.catch_warnings(record=True):
|
||||
# TODO: Filter the warnings we actually care about here.
|
||||
target[assigner] = ret
|
||||
except (TypeError, IndexError):
|
||||
raise ValueError("Cannot assign expression output to target")
|
||||
|
||||
if not resolvers:
|
||||
resolvers = ({assigner: ret},)
|
||||
else:
|
||||
# existing resolver needs updated to handle
|
||||
# case of mutating existing column in copy
|
||||
for resolver in resolvers:
|
||||
if assigner in resolver:
|
||||
resolver[assigner] = ret
|
||||
break
|
||||
else:
|
||||
resolvers += ({assigner: ret},)
|
||||
|
||||
ret = None
|
||||
first_expr = False
|
||||
|
||||
# We want to exclude `inplace=None` as being False.
|
||||
if inplace is False:
|
||||
return target if target_modified else ret
|
||||
854
venv/lib/python3.6/site-packages/pandas/core/computation/expr.py
Normal file
854
venv/lib/python3.6/site-packages/pandas/core/computation/expr.py
Normal file
@@ -0,0 +1,854 @@
|
||||
""":func:`~pandas.eval` parsers
|
||||
"""
|
||||
|
||||
import ast
|
||||
from functools import partial, reduce
|
||||
from io import StringIO
|
||||
import itertools as it
|
||||
import operator
|
||||
import tokenize
|
||||
from typing import Type
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import common as com
|
||||
from pandas.core.base import StringMixin
|
||||
from pandas.core.computation.common import (
|
||||
_BACKTICK_QUOTED_STRING,
|
||||
_remove_spaces_column_name,
|
||||
)
|
||||
from pandas.core.computation.ops import (
|
||||
_LOCAL_TAG,
|
||||
BinOp,
|
||||
Constant,
|
||||
Div,
|
||||
FuncNode,
|
||||
Op,
|
||||
Term,
|
||||
UnaryOp,
|
||||
UndefinedVariableError,
|
||||
_arith_ops_syms,
|
||||
_bool_ops_syms,
|
||||
_cmp_ops_syms,
|
||||
_mathops,
|
||||
_reductions,
|
||||
_unary_ops_syms,
|
||||
is_term,
|
||||
)
|
||||
from pandas.core.computation.scope import Scope
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
|
||||
def tokenize_string(source):
|
||||
"""Tokenize a Python source code string.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
A Python source code string
|
||||
"""
|
||||
line_reader = StringIO(source).readline
|
||||
token_generator = tokenize.generate_tokens(line_reader)
|
||||
|
||||
# Loop over all tokens till a backtick (`) is found.
|
||||
# Then, take all tokens till the next backtick to form a backtick quoted
|
||||
# string.
|
||||
for toknum, tokval, _, _, _ in token_generator:
|
||||
if tokval == "`":
|
||||
tokval = " ".join(
|
||||
it.takewhile(
|
||||
lambda tokval: tokval != "`",
|
||||
map(operator.itemgetter(1), token_generator),
|
||||
)
|
||||
)
|
||||
toknum = _BACKTICK_QUOTED_STRING
|
||||
yield toknum, tokval
|
||||
|
||||
|
||||
def _rewrite_assign(tok):
|
||||
"""Rewrite the assignment operator for PyTables expressions that use ``=``
|
||||
as a substitute for ``==``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
return toknum, "==" if tokval == "=" else tokval
|
||||
|
||||
|
||||
def _replace_booleans(tok):
|
||||
"""Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
|
||||
precedence is changed to boolean precedence.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP:
|
||||
if tokval == "&":
|
||||
return tokenize.NAME, "and"
|
||||
elif tokval == "|":
|
||||
return tokenize.NAME, "or"
|
||||
return toknum, tokval
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _replace_locals(tok):
|
||||
"""Replace local variables with a syntactically valid name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
|
||||
``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
|
||||
is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
return tokenize.OP, _LOCAL_TAG
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _clean_spaces_backtick_quoted_names(tok):
|
||||
"""Clean up a column name if surrounded by backticks.
|
||||
|
||||
Backtick quoted string are indicated by a certain tokval value. If a string
|
||||
is a backtick quoted token it will processed by
|
||||
:func:`_remove_spaces_column_name` so that the parser can find this
|
||||
string when the query is executed.
|
||||
See also :meth:`NDFrame._get_space_character_free_column_resolver`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == _BACKTICK_QUOTED_STRING:
|
||||
return tokenize.NAME, _remove_spaces_column_name(tokval)
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _compose2(f, g):
|
||||
"""Compose 2 callables"""
|
||||
return lambda *args, **kwargs: f(g(*args, **kwargs))
|
||||
|
||||
|
||||
def _compose(*funcs):
|
||||
"""Compose 2 or more callables"""
|
||||
assert len(funcs) > 1, "At least 2 callables must be passed to compose"
|
||||
return reduce(_compose2, funcs)
|
||||
|
||||
|
||||
def _preparse(
|
||||
source,
|
||||
f=_compose(
|
||||
_replace_locals,
|
||||
_replace_booleans,
|
||||
_rewrite_assign,
|
||||
_clean_spaces_backtick_quoted_names,
|
||||
),
|
||||
):
|
||||
"""Compose a collection of tokenization functions
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
A Python source code string
|
||||
f : callable
|
||||
This takes a tuple of (toknum, tokval) as its argument and returns a
|
||||
tuple with the same structure but possibly different elements. Defaults
|
||||
to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
|
||||
``_replace_locals``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
s : str
|
||||
Valid Python source code
|
||||
|
||||
Notes
|
||||
-----
|
||||
The `f` parameter can be any callable that takes *and* returns input of the
|
||||
form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
|
||||
the ``tokenize`` module and ``tokval`` is a string.
|
||||
"""
|
||||
assert callable(f), "f must be callable"
|
||||
return tokenize.untokenize((f(x) for x in tokenize_string(source)))
|
||||
|
||||
|
||||
def _is_type(t):
|
||||
"""Factory for a type checking function of type ``t`` or tuple of types."""
|
||||
return lambda x: isinstance(x.value, t)
|
||||
|
||||
|
||||
_is_list = _is_type(list)
|
||||
_is_str = _is_type(str)
|
||||
|
||||
|
||||
# partition all AST nodes
|
||||
_all_nodes = frozenset(
|
||||
filter(
|
||||
lambda x: isinstance(x, type) and issubclass(x, ast.AST),
|
||||
(getattr(ast, node) for node in dir(ast)),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _filter_nodes(superclass, all_nodes=_all_nodes):
|
||||
"""Filter out AST nodes that are subclasses of ``superclass``."""
|
||||
node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))
|
||||
return frozenset(node_names)
|
||||
|
||||
|
||||
_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
|
||||
_mod_nodes = _filter_nodes(ast.mod)
|
||||
_stmt_nodes = _filter_nodes(ast.stmt)
|
||||
_expr_nodes = _filter_nodes(ast.expr)
|
||||
_expr_context_nodes = _filter_nodes(ast.expr_context)
|
||||
_slice_nodes = _filter_nodes(ast.slice)
|
||||
_boolop_nodes = _filter_nodes(ast.boolop)
|
||||
_operator_nodes = _filter_nodes(ast.operator)
|
||||
_unary_op_nodes = _filter_nodes(ast.unaryop)
|
||||
_cmp_op_nodes = _filter_nodes(ast.cmpop)
|
||||
_comprehension_nodes = _filter_nodes(ast.comprehension)
|
||||
_handler_nodes = _filter_nodes(ast.excepthandler)
|
||||
_arguments_nodes = _filter_nodes(ast.arguments)
|
||||
_keyword_nodes = _filter_nodes(ast.keyword)
|
||||
_alias_nodes = _filter_nodes(ast.alias)
|
||||
|
||||
|
||||
# nodes that we don't support directly but are needed for parsing
|
||||
_hacked_nodes = frozenset(["Assign", "Module", "Expr"])
|
||||
|
||||
|
||||
_unsupported_expr_nodes = frozenset(
|
||||
[
|
||||
"Yield",
|
||||
"GeneratorExp",
|
||||
"IfExp",
|
||||
"DictComp",
|
||||
"SetComp",
|
||||
"Repr",
|
||||
"Lambda",
|
||||
"Set",
|
||||
"AST",
|
||||
"Is",
|
||||
"IsNot",
|
||||
]
|
||||
)
|
||||
|
||||
# these nodes are low priority or won't ever be supported (e.g., AST)
|
||||
_unsupported_nodes = (
|
||||
_stmt_nodes
|
||||
| _mod_nodes
|
||||
| _handler_nodes
|
||||
| _arguments_nodes
|
||||
| _keyword_nodes
|
||||
| _alias_nodes
|
||||
| _expr_context_nodes
|
||||
| _unsupported_expr_nodes
|
||||
) - _hacked_nodes
|
||||
|
||||
# we're adding a different assignment in some cases to be equality comparison
|
||||
# and we don't want `stmt` and friends in their so get only the class whose
|
||||
# names are capitalized
|
||||
_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
|
||||
_msg = "cannot both support and not support {intersection}".format(
|
||||
intersection=_unsupported_nodes & _base_supported_nodes
|
||||
)
|
||||
assert not _unsupported_nodes & _base_supported_nodes, _msg
|
||||
|
||||
|
||||
def _node_not_implemented(node_name, cls):
|
||||
"""Return a function that raises a NotImplementedError with a passed node
|
||||
name.
|
||||
"""
|
||||
|
||||
def f(self, *args, **kwargs):
|
||||
raise NotImplementedError(
|
||||
"{name!r} nodes are not " "implemented".format(name=node_name)
|
||||
)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def disallow(nodes):
|
||||
"""Decorator to disallow certain nodes from parsing. Raises a
|
||||
NotImplementedError instead.
|
||||
|
||||
Returns
|
||||
-------
|
||||
disallowed : callable
|
||||
"""
|
||||
|
||||
def disallowed(cls):
|
||||
cls.unsupported_nodes = ()
|
||||
for node in nodes:
|
||||
new_method = _node_not_implemented(node, cls)
|
||||
name = "visit_{node}".format(node=node)
|
||||
cls.unsupported_nodes += (name,)
|
||||
setattr(cls, name, new_method)
|
||||
return cls
|
||||
|
||||
return disallowed
|
||||
|
||||
|
||||
def _op_maker(op_class, op_symbol):
|
||||
"""Return a function to create an op class with its symbol already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
f : callable
|
||||
"""
|
||||
|
||||
def f(self, node, *args, **kwargs):
|
||||
"""Return a partial function with an Op subclass with an operator
|
||||
already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
f : callable
|
||||
"""
|
||||
return partial(op_class, op_symbol, *args, **kwargs)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
_op_classes = {"binary": BinOp, "unary": UnaryOp}
|
||||
|
||||
|
||||
def add_ops(op_classes):
|
||||
"""Decorator to add default implementation of ops."""
|
||||
|
||||
def f(cls):
|
||||
for op_attr_name, op_class in op_classes.items():
|
||||
ops = getattr(cls, "{name}_ops".format(name=op_attr_name))
|
||||
ops_map = getattr(cls, "{name}_op_nodes_map".format(name=op_attr_name))
|
||||
for op in ops:
|
||||
op_node = ops_map[op]
|
||||
if op_node is not None:
|
||||
made_op = _op_maker(op_class, op)
|
||||
setattr(cls, "visit_{node}".format(node=op_node), made_op)
|
||||
return cls
|
||||
|
||||
return f
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes)
|
||||
@add_ops(_op_classes)
|
||||
class BaseExprVisitor(ast.NodeVisitor):
|
||||
|
||||
"""Custom ast walker. Parsers of other engines should subclass this class
|
||||
if necessary.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
preparser : callable
|
||||
"""
|
||||
|
||||
const_type = Constant # type: Type[Term]
|
||||
term_type = Term
|
||||
|
||||
binary_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms
|
||||
binary_op_nodes = (
|
||||
"Gt",
|
||||
"Lt",
|
||||
"GtE",
|
||||
"LtE",
|
||||
"Eq",
|
||||
"NotEq",
|
||||
"In",
|
||||
"NotIn",
|
||||
"BitAnd",
|
||||
"BitOr",
|
||||
"And",
|
||||
"Or",
|
||||
"Add",
|
||||
"Sub",
|
||||
"Mult",
|
||||
None,
|
||||
"Pow",
|
||||
"FloorDiv",
|
||||
"Mod",
|
||||
)
|
||||
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
|
||||
|
||||
unary_ops = _unary_ops_syms
|
||||
unary_op_nodes = "UAdd", "USub", "Invert", "Not"
|
||||
unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
|
||||
|
||||
rewrite_map = {
|
||||
ast.Eq: ast.In,
|
||||
ast.NotEq: ast.NotIn,
|
||||
ast.In: ast.In,
|
||||
ast.NotIn: ast.NotIn,
|
||||
}
|
||||
|
||||
def __init__(self, env, engine, parser, preparser=_preparse):
|
||||
self.env = env
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self.preparser = preparser
|
||||
self.assigner = None
|
||||
|
||||
def visit(self, node, **kwargs):
|
||||
if isinstance(node, str):
|
||||
clean = self.preparser(node)
|
||||
try:
|
||||
node = ast.fix_missing_locations(ast.parse(clean))
|
||||
except SyntaxError as e:
|
||||
from keyword import iskeyword
|
||||
|
||||
if any(iskeyword(x) for x in clean.split()):
|
||||
e.msg = "Python keyword not valid identifier" " in numexpr query"
|
||||
raise e
|
||||
|
||||
method = "visit_" + node.__class__.__name__
|
||||
visitor = getattr(self, method)
|
||||
return visitor(node, **kwargs)
|
||||
|
||||
def visit_Module(self, node, **kwargs):
|
||||
if len(node.body) != 1:
|
||||
raise SyntaxError("only a single expression is allowed")
|
||||
expr = node.body[0]
|
||||
return self.visit(expr, **kwargs)
|
||||
|
||||
def visit_Expr(self, node, **kwargs):
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
# the kind of the operator (is actually an instance)
|
||||
op_instance = node.op
|
||||
op_type = type(op_instance)
|
||||
|
||||
# must be two terms and the comparison operator must be ==/!=/in/not in
|
||||
if is_term(left) and is_term(right) and op_type in self.rewrite_map:
|
||||
|
||||
left_list, right_list = map(_is_list, (left, right))
|
||||
left_str, right_str = map(_is_str, (left, right))
|
||||
|
||||
# if there are any strings or lists in the expression
|
||||
if left_list or right_list or left_str or right_str:
|
||||
op_instance = self.rewrite_map[op_type]()
|
||||
|
||||
# pop the string variable out of locals and replace it with a list
|
||||
# of one string, kind of a hack
|
||||
if right_str:
|
||||
name = self.env.add_tmp([right.value])
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
if left_str:
|
||||
name = self.env.add_tmp([left.value])
|
||||
left = self.term_type(name, self.env)
|
||||
|
||||
op = self.visit(op_instance)
|
||||
return op, op_instance, left, right
|
||||
|
||||
def _maybe_transform_eq_ne(self, node, left=None, right=None):
|
||||
if left is None:
|
||||
left = self.visit(node.left, side="left")
|
||||
if right is None:
|
||||
right = self.visit(node.right, side="right")
|
||||
op, op_class, left, right = self._rewrite_membership_op(node, left, right)
|
||||
return op, op_class, left, right
|
||||
|
||||
def _maybe_downcast_constants(self, left, right):
|
||||
f32 = np.dtype(np.float32)
|
||||
if (
|
||||
left.is_scalar
|
||||
and hasattr(left, "value")
|
||||
and not right.is_scalar
|
||||
and right.return_type == f32
|
||||
):
|
||||
# right is a float32 array, left is a scalar
|
||||
name = self.env.add_tmp(np.float32(left.value))
|
||||
left = self.term_type(name, self.env)
|
||||
if (
|
||||
right.is_scalar
|
||||
and hasattr(right, "value")
|
||||
and not left.is_scalar
|
||||
and left.return_type == f32
|
||||
):
|
||||
# left is a float32 array, right is a scalar
|
||||
name = self.env.add_tmp(np.float32(right.value))
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
return left, right
|
||||
|
||||
def _maybe_eval(self, binop, eval_in_python):
|
||||
# eval `in` and `not in` (for now) in "partial" python space
|
||||
# things that can be evaluated in "eval" space will be turned into
|
||||
# temporary variables. for example,
|
||||
# [1,2] in a + 2 * b
|
||||
# in that case a + 2 * b will be evaluated using numexpr, and the "in"
|
||||
# call will be evaluated using isin (in python space)
|
||||
return binop.evaluate(
|
||||
self.env, self.engine, self.parser, self.term_type, eval_in_python
|
||||
)
|
||||
|
||||
def _maybe_evaluate_binop(
|
||||
self,
|
||||
op,
|
||||
op_class,
|
||||
lhs,
|
||||
rhs,
|
||||
eval_in_python=("in", "not in"),
|
||||
maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="),
|
||||
):
|
||||
res = op(lhs, rhs)
|
||||
|
||||
if res.has_invalid_return_type:
|
||||
raise TypeError(
|
||||
"unsupported operand type(s) for {op}:"
|
||||
" '{lhs}' and '{rhs}'".format(op=res.op, lhs=lhs.type, rhs=rhs.type)
|
||||
)
|
||||
|
||||
if self.engine != "pytables":
|
||||
if (
|
||||
res.op in _cmp_ops_syms
|
||||
and getattr(lhs, "is_datetime", False)
|
||||
or getattr(rhs, "is_datetime", False)
|
||||
):
|
||||
# all date ops must be done in python bc numexpr doesn't work
|
||||
# well with NaT
|
||||
return self._maybe_eval(res, self.binary_ops)
|
||||
|
||||
if res.op in eval_in_python:
|
||||
# "in"/"not in" ops are always evaluated in python
|
||||
return self._maybe_eval(res, eval_in_python)
|
||||
elif self.engine != "pytables":
|
||||
if (
|
||||
getattr(lhs, "return_type", None) == object
|
||||
or getattr(rhs, "return_type", None) == object
|
||||
):
|
||||
# evaluate "==" and "!=" in python if either of our operands
|
||||
# has an object return type
|
||||
return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
|
||||
return res
|
||||
|
||||
def visit_BinOp(self, node, **kwargs):
|
||||
op, op_class, left, right = self._maybe_transform_eq_ne(node)
|
||||
left, right = self._maybe_downcast_constants(left, right)
|
||||
return self._maybe_evaluate_binop(op, op_class, left, right)
|
||||
|
||||
def visit_Div(self, node, **kwargs):
|
||||
truediv = self.env.scope["truediv"]
|
||||
return lambda lhs, rhs: Div(lhs, rhs, truediv)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs):
|
||||
op = self.visit(node.op)
|
||||
operand = self.visit(node.operand)
|
||||
return op(operand)
|
||||
|
||||
def visit_Name(self, node, **kwargs):
|
||||
return self.term_type(node.id, self.env, **kwargs)
|
||||
|
||||
def visit_NameConstant(self, node, **kwargs):
|
||||
return self.const_type(node.value, self.env)
|
||||
|
||||
def visit_Num(self, node, **kwargs):
|
||||
return self.const_type(node.n, self.env)
|
||||
|
||||
def visit_Str(self, node, **kwargs):
|
||||
name = self.env.add_tmp(node.s)
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
def visit_List(self, node, **kwargs):
|
||||
name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
visit_Tuple = visit_List
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
""" df.index[4] """
|
||||
return self.visit(node.value)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs):
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
result = pd.eval(
|
||||
slobj, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
try:
|
||||
# a Term instance
|
||||
v = value.value[result]
|
||||
except AttributeError:
|
||||
# an Op instance
|
||||
lhs = pd.eval(
|
||||
value, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
v = lhs[result]
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, env=self.env)
|
||||
|
||||
def visit_Slice(self, node, **kwargs):
|
||||
""" df.index[slice(4,6)] """
|
||||
lower = node.lower
|
||||
if lower is not None:
|
||||
lower = self.visit(lower).value
|
||||
upper = node.upper
|
||||
if upper is not None:
|
||||
upper = self.visit(upper).value
|
||||
step = node.step
|
||||
if step is not None:
|
||||
step = self.visit(step).value
|
||||
|
||||
return slice(lower, upper, step)
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
"""
|
||||
support a single assignment node, like
|
||||
|
||||
c = a + b
|
||||
|
||||
set the assigner at the top level, must be a Name node which
|
||||
might or might not exist in the resolvers
|
||||
|
||||
"""
|
||||
|
||||
if len(node.targets) != 1:
|
||||
raise SyntaxError("can only assign a single expression")
|
||||
if not isinstance(node.targets[0], ast.Name):
|
||||
raise SyntaxError(
|
||||
"left hand side of an assignment must be a " "single name"
|
||||
)
|
||||
if self.env.target is None:
|
||||
raise ValueError("cannot assign without a target object")
|
||||
|
||||
try:
|
||||
assigner = self.visit(node.targets[0], **kwargs)
|
||||
except UndefinedVariableError:
|
||||
assigner = node.targets[0].id
|
||||
|
||||
self.assigner = getattr(assigner, "name", assigner)
|
||||
if self.assigner is None:
|
||||
raise SyntaxError(
|
||||
"left hand side of an assignment must be a " "single resolvable name"
|
||||
)
|
||||
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = node.ctx
|
||||
if isinstance(ctx, ast.Load):
|
||||
# resolve the value
|
||||
resolved = self.visit(value).value
|
||||
try:
|
||||
v = getattr(resolved, attr)
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, self.env)
|
||||
except AttributeError:
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
|
||||
raise ValueError("Invalid Attribute context {name}".format(name=ctx.__name__))
|
||||
|
||||
def visit_Call(self, node, side=None, **kwargs):
|
||||
|
||||
if isinstance(node.func, ast.Attribute):
|
||||
res = self.visit_Attribute(node.func)
|
||||
elif not isinstance(node.func, ast.Name):
|
||||
raise TypeError("Only named functions are supported")
|
||||
else:
|
||||
try:
|
||||
res = self.visit(node.func)
|
||||
except UndefinedVariableError:
|
||||
# Check if this is a supported function name
|
||||
try:
|
||||
res = FuncNode(node.func.id)
|
||||
except ValueError:
|
||||
# Raise original error
|
||||
raise
|
||||
|
||||
if res is None:
|
||||
raise ValueError("Invalid function call {func}".format(func=node.func.id))
|
||||
if hasattr(res, "value"):
|
||||
res = res.value
|
||||
|
||||
if isinstance(res, FuncNode):
|
||||
|
||||
new_args = [self.visit(arg) for arg in node.args]
|
||||
|
||||
if node.keywords:
|
||||
raise TypeError(
|
||||
'Function "{name}" does not support keyword '
|
||||
"arguments".format(name=res.name)
|
||||
)
|
||||
|
||||
return res(*new_args, **kwargs)
|
||||
|
||||
else:
|
||||
|
||||
new_args = [self.visit(arg).value for arg in node.args]
|
||||
|
||||
for key in node.keywords:
|
||||
if not isinstance(key, ast.keyword):
|
||||
raise ValueError(
|
||||
"keyword error in function call "
|
||||
"'{func}'".format(func=node.func.id)
|
||||
)
|
||||
|
||||
if key.arg:
|
||||
kwargs[key.arg] = self.visit(key.value).value
|
||||
|
||||
return self.const_type(res(*new_args, **kwargs), self.env)
|
||||
|
||||
def translate_In(self, op):
|
||||
return op
|
||||
|
||||
def visit_Compare(self, node, **kwargs):
|
||||
ops = node.ops
|
||||
comps = node.comparators
|
||||
|
||||
# base case: we have something like a CMP b
|
||||
if len(comps) == 1:
|
||||
op = self.translate_In(ops[0])
|
||||
binop = ast.BinOp(op=op, left=node.left, right=comps[0])
|
||||
return self.visit(binop)
|
||||
|
||||
# recursive case: we have a chained comparison, a CMP b CMP c, etc.
|
||||
left = node.left
|
||||
values = []
|
||||
for op, comp in zip(ops, comps):
|
||||
new_node = self.visit(
|
||||
ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
|
||||
)
|
||||
left = comp
|
||||
values.append(new_node)
|
||||
return self.visit(ast.BoolOp(op=ast.And(), values=values))
|
||||
|
||||
def _try_visit_binop(self, bop):
|
||||
if isinstance(bop, (Op, Term)):
|
||||
return bop
|
||||
return self.visit(bop)
|
||||
|
||||
def visit_BoolOp(self, node, **kwargs):
|
||||
def visitor(x, y):
|
||||
lhs = self._try_visit_binop(x)
|
||||
rhs = self._try_visit_binop(y)
|
||||
|
||||
op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)
|
||||
return self._maybe_evaluate_binop(op, node.op, lhs, rhs)
|
||||
|
||||
operands = node.values
|
||||
return reduce(visitor, operands)
|
||||
|
||||
|
||||
_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"])
|
||||
_numexpr_supported_calls = frozenset(_reductions + _mathops)
|
||||
|
||||
|
||||
@disallow(
|
||||
(_unsupported_nodes | _python_not_supported)
|
||||
- (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"]))
|
||||
)
|
||||
class PandasExprVisitor(BaseExprVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
env,
|
||||
engine,
|
||||
parser,
|
||||
preparser=partial(
|
||||
_preparse,
|
||||
f=_compose(
|
||||
_replace_locals, _replace_booleans, _clean_spaces_backtick_quoted_names
|
||||
),
|
||||
),
|
||||
):
|
||||
super().__init__(env, engine, parser, preparser)
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"]))
|
||||
class PythonExprVisitor(BaseExprVisitor):
|
||||
def __init__(self, env, engine, parser, preparser=lambda x: x):
|
||||
super().__init__(env, engine, parser, preparser=preparser)
|
||||
|
||||
|
||||
class Expr(StringMixin):
|
||||
|
||||
"""Object encapsulating an expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str
|
||||
engine : str, optional, default 'numexpr'
|
||||
parser : str, optional, default 'pandas'
|
||||
env : Scope, optional, default None
|
||||
truediv : bool, optional, default True
|
||||
level : int, optional, default 2
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, expr, engine="numexpr", parser="pandas", env=None, truediv=True, level=0
|
||||
):
|
||||
self.expr = expr
|
||||
self.env = env or Scope(level=level + 1)
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self.env.scope["truediv"] = truediv
|
||||
self._visitor = _parsers[parser](self.env, self.engine, self.parser)
|
||||
self.terms = self.parse()
|
||||
|
||||
@property
|
||||
def assigner(self):
|
||||
return getattr(self._visitor, "assigner", None)
|
||||
|
||||
def __call__(self):
|
||||
return self.terms(self.env)
|
||||
|
||||
def __str__(self):
|
||||
return printing.pprint_thing(self.terms)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.expr)
|
||||
|
||||
def parse(self):
|
||||
"""Parse an expression"""
|
||||
return self._visitor.visit(self.expr)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""Get the names in an expression"""
|
||||
if is_term(self.terms):
|
||||
return frozenset([self.terms.name])
|
||||
return frozenset(term.name for term in com.flatten(self.terms))
|
||||
|
||||
|
||||
_parsers = {"python": PythonExprVisitor, "pandas": PandasExprVisitor}
|
||||
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Expressions
|
||||
-----------
|
||||
|
||||
Offer fast expression evaluation through numexpr
|
||||
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas._libs.lib import values_from_object
|
||||
|
||||
from pandas.core.dtypes.generic import ABCDataFrame
|
||||
|
||||
from pandas.core.computation.check import _NUMEXPR_INSTALLED
|
||||
|
||||
if _NUMEXPR_INSTALLED:
|
||||
import numexpr as ne
|
||||
|
||||
_TEST_MODE = None
|
||||
_TEST_RESULT = None
|
||||
_USE_NUMEXPR = _NUMEXPR_INSTALLED
|
||||
_evaluate = None
|
||||
_where = None
|
||||
|
||||
# the set of dtypes that we will allow pass to numexpr
|
||||
_ALLOWED_DTYPES = {
|
||||
"evaluate": {"int64", "int32", "float64", "float32", "bool"},
|
||||
"where": {"int64", "float64", "bool"},
|
||||
}
|
||||
|
||||
# the minimum prod shape that we will use numexpr
|
||||
_MIN_ELEMENTS = 10000
|
||||
|
||||
|
||||
def set_use_numexpr(v=True):
|
||||
# set/unset to use numexpr
|
||||
global _USE_NUMEXPR
|
||||
if _NUMEXPR_INSTALLED:
|
||||
_USE_NUMEXPR = v
|
||||
|
||||
# choose what we are going to do
|
||||
global _evaluate, _where
|
||||
if not _USE_NUMEXPR:
|
||||
_evaluate = _evaluate_standard
|
||||
_where = _where_standard
|
||||
else:
|
||||
_evaluate = _evaluate_numexpr
|
||||
_where = _where_numexpr
|
||||
|
||||
|
||||
def set_numexpr_threads(n=None):
|
||||
# if we are using numexpr, set the threads to n
|
||||
# otherwise reset
|
||||
if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
|
||||
if n is None:
|
||||
n = ne.detect_number_of_cores()
|
||||
ne.set_num_threads(n)
|
||||
|
||||
|
||||
def _evaluate_standard(op, op_str, a, b, **eval_kwargs):
|
||||
""" standard evaluation """
|
||||
if _TEST_MODE:
|
||||
_store_test_result(False)
|
||||
with np.errstate(all="ignore"):
|
||||
return op(a, b)
|
||||
|
||||
|
||||
def _can_use_numexpr(op, op_str, a, b, dtype_check):
|
||||
""" return a boolean if we WILL be using numexpr """
|
||||
if op_str is not None:
|
||||
|
||||
# required min elements (otherwise we are adding overhead)
|
||||
if np.prod(a.shape) > _MIN_ELEMENTS:
|
||||
|
||||
# check for dtype compatibility
|
||||
dtypes = set()
|
||||
for o in [a, b]:
|
||||
if hasattr(o, "dtypes"):
|
||||
s = o.dtypes.value_counts()
|
||||
if len(s) > 1:
|
||||
return False
|
||||
dtypes |= set(s.index.astype(str))
|
||||
elif isinstance(o, np.ndarray):
|
||||
dtypes |= {o.dtype.name}
|
||||
|
||||
# allowed are a superset
|
||||
if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs):
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(op, op_str, a, b, "evaluate"):
|
||||
try:
|
||||
|
||||
# we were originally called by a reversed op
|
||||
# method
|
||||
if reversed:
|
||||
a, b = b, a
|
||||
|
||||
a_value = getattr(a, "values", a)
|
||||
b_value = getattr(b, "values", b)
|
||||
result = ne.evaluate(
|
||||
"a_value {op} b_value".format(op=op_str),
|
||||
local_dict={"a_value": a_value, "b_value": b_value},
|
||||
casting="safe",
|
||||
truediv=truediv,
|
||||
**eval_kwargs
|
||||
)
|
||||
except ValueError as detail:
|
||||
if "unknown type object" in str(detail):
|
||||
pass
|
||||
|
||||
if _TEST_MODE:
|
||||
_store_test_result(result is not None)
|
||||
|
||||
if result is None:
|
||||
result = _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _where_standard(cond, a, b):
|
||||
return np.where(
|
||||
values_from_object(cond), values_from_object(a), values_from_object(b)
|
||||
)
|
||||
|
||||
|
||||
def _where_numexpr(cond, a, b):
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(None, "where", a, b, "where"):
|
||||
|
||||
try:
|
||||
cond_value = getattr(cond, "values", cond)
|
||||
a_value = getattr(a, "values", a)
|
||||
b_value = getattr(b, "values", b)
|
||||
result = ne.evaluate(
|
||||
"where(cond_value, a_value, b_value)",
|
||||
local_dict={
|
||||
"cond_value": cond_value,
|
||||
"a_value": a_value,
|
||||
"b_value": b_value,
|
||||
},
|
||||
casting="safe",
|
||||
)
|
||||
except ValueError as detail:
|
||||
if "unknown type object" in str(detail):
|
||||
pass
|
||||
except Exception as detail:
|
||||
raise TypeError(str(detail))
|
||||
|
||||
if result is None:
|
||||
result = _where_standard(cond, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# turn myself on
|
||||
set_use_numexpr(get_option("compute.use_numexpr"))
|
||||
|
||||
|
||||
def _has_bool_dtype(x):
|
||||
try:
|
||||
if isinstance(x, ABCDataFrame):
|
||||
return "bool" in x.dtypes
|
||||
else:
|
||||
return x.dtype == bool
|
||||
except AttributeError:
|
||||
return isinstance(x, (bool, np.bool_))
|
||||
|
||||
|
||||
def _bool_arith_check(
|
||||
op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
|
||||
):
|
||||
if unsupported is None:
|
||||
unsupported = {"+": "|", "*": "&", "-": "^"}
|
||||
|
||||
if _has_bool_dtype(a) and _has_bool_dtype(b):
|
||||
if op_str in unsupported:
|
||||
warnings.warn(
|
||||
"evaluating in Python space because the {op!r} "
|
||||
"operator is not supported by numexpr for "
|
||||
"the bool dtype, use {alt_op!r} instead".format(
|
||||
op=op_str, alt_op=unsupported[op_str]
|
||||
)
|
||||
)
|
||||
return False
|
||||
|
||||
if op_str in not_allowed:
|
||||
raise NotImplementedError(
|
||||
"operator {op!r} not implemented for " "bool dtypes".format(op=op_str)
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs):
|
||||
""" evaluate and return the expression of the op on a and b
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
op : the actual operand
|
||||
op_str: the string version of the op
|
||||
a : left operand
|
||||
b : right operand
|
||||
use_numexpr : whether to try to use numexpr (default True)
|
||||
"""
|
||||
|
||||
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
|
||||
if use_numexpr:
|
||||
return _evaluate(op, op_str, a, b, **eval_kwargs)
|
||||
return _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
|
||||
def where(cond, a, b, use_numexpr=True):
|
||||
""" evaluate the where condition cond on a and b
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
cond : a boolean array
|
||||
a : return if cond is True
|
||||
b : return if cond is False
|
||||
use_numexpr : whether to try to use numexpr (default True)
|
||||
"""
|
||||
|
||||
if use_numexpr:
|
||||
return _where(cond, a, b)
|
||||
return _where_standard(cond, a, b)
|
||||
|
||||
|
||||
def set_test_mode(v=True):
|
||||
"""
|
||||
Keeps track of whether numexpr was used. Stores an additional ``True``
|
||||
for every successful use of evaluate with numexpr since the last
|
||||
``get_test_result``
|
||||
"""
|
||||
global _TEST_MODE, _TEST_RESULT
|
||||
_TEST_MODE = v
|
||||
_TEST_RESULT = []
|
||||
|
||||
|
||||
def _store_test_result(used_numexpr):
|
||||
global _TEST_RESULT
|
||||
if used_numexpr:
|
||||
_TEST_RESULT.append(used_numexpr)
|
||||
|
||||
|
||||
def get_test_result():
|
||||
"""get test result and reset test_results"""
|
||||
global _TEST_RESULT
|
||||
res = _TEST_RESULT
|
||||
_TEST_RESULT = []
|
||||
return res
|
||||
587
venv/lib/python3.6/site-packages/pandas/core/computation/ops.py
Normal file
587
venv/lib/python3.6/site-packages/pandas/core/computation/ops.py
Normal file
@@ -0,0 +1,587 @@
|
||||
"""Operator classes for eval.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from distutils.version import LooseVersion
|
||||
from functools import partial
|
||||
import operator as op
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like, is_scalar
|
||||
|
||||
from pandas.core.base import StringMixin
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import _ensure_decoded, _result_type_many
|
||||
from pandas.core.computation.scope import _DEFAULT_GLOBALS
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded
|
||||
|
||||
_reductions = "sum", "prod"
|
||||
|
||||
_unary_math_ops = (
|
||||
"sin",
|
||||
"cos",
|
||||
"exp",
|
||||
"log",
|
||||
"expm1",
|
||||
"log1p",
|
||||
"sqrt",
|
||||
"sinh",
|
||||
"cosh",
|
||||
"tanh",
|
||||
"arcsin",
|
||||
"arccos",
|
||||
"arctan",
|
||||
"arccosh",
|
||||
"arcsinh",
|
||||
"arctanh",
|
||||
"abs",
|
||||
"log10",
|
||||
"floor",
|
||||
"ceil",
|
||||
)
|
||||
_binary_math_ops = ("arctan2",)
|
||||
|
||||
_mathops = _unary_math_ops + _binary_math_ops
|
||||
|
||||
|
||||
_LOCAL_TAG = "__pd_eval_local_"
|
||||
|
||||
|
||||
class UndefinedVariableError(NameError):
|
||||
|
||||
"""NameError subclass for local variables."""
|
||||
|
||||
def __init__(self, name, is_local):
|
||||
if is_local:
|
||||
msg = "local variable {0!r} is not defined"
|
||||
else:
|
||||
msg = "name {0!r} is not defined"
|
||||
super().__init__(msg.format(name))
|
||||
|
||||
|
||||
class Term(StringMixin):
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
klass = Constant if not isinstance(name, str) else cls
|
||||
supr_new = super(Term, klass).__new__
|
||||
return supr_new(klass)
|
||||
|
||||
def __init__(self, name, env, side=None, encoding=None):
|
||||
self._name = name
|
||||
self.env = env
|
||||
self.side = side
|
||||
tname = str(name)
|
||||
self.is_local = tname.startswith(_LOCAL_TAG) or tname in _DEFAULT_GLOBALS
|
||||
self._value = self._resolve_name()
|
||||
self.encoding = encoding
|
||||
|
||||
@property
|
||||
def local_name(self):
|
||||
return self.name.replace(_LOCAL_TAG, "")
|
||||
|
||||
def __str__(self):
|
||||
return pprint_thing(self.name)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.value
|
||||
|
||||
def evaluate(self, *args, **kwargs):
|
||||
return self
|
||||
|
||||
def _resolve_name(self):
|
||||
res = self.env.resolve(self.local_name, is_local=self.is_local)
|
||||
self.update(res)
|
||||
|
||||
if hasattr(res, "ndim") and res.ndim > 2:
|
||||
raise NotImplementedError(
|
||||
"N-dimensional objects, where N > 2," " are not supported with eval"
|
||||
)
|
||||
return res
|
||||
|
||||
def update(self, value):
|
||||
"""
|
||||
search order for local (i.e., @variable) variables:
|
||||
|
||||
scope, key_variable
|
||||
[('locals', 'local_name'),
|
||||
('globals', 'local_name'),
|
||||
('locals', 'key'),
|
||||
('globals', 'key')]
|
||||
"""
|
||||
key = self.name
|
||||
|
||||
# if it's a variable name (otherwise a constant)
|
||||
if isinstance(key, str):
|
||||
self.env.swapkey(self.local_name, key, new_value=value)
|
||||
|
||||
self.value = value
|
||||
|
||||
@property
|
||||
def is_scalar(self):
|
||||
return is_scalar(self._value)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
try:
|
||||
# potentially very slow for large, mixed dtype frames
|
||||
return self._value.values.dtype
|
||||
except AttributeError:
|
||||
try:
|
||||
# ndarray
|
||||
return self._value.dtype
|
||||
except AttributeError:
|
||||
# scalar
|
||||
return type(self._value)
|
||||
|
||||
return_type = type
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
return pprint_thing(
|
||||
"{0}(name={1!r}, type={2})"
|
||||
"".format(self.__class__.__name__, self.name, self.type)
|
||||
)
|
||||
|
||||
@property
|
||||
def is_datetime(self):
|
||||
try:
|
||||
t = self.type.type
|
||||
except AttributeError:
|
||||
t = self.type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, new_value):
|
||||
self._value = new_value
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def ndim(self):
|
||||
return self._value.ndim
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def __init__(self, value, env, side=None, encoding=None):
|
||||
super().__init__(value, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.value
|
||||
|
||||
def __str__(self):
|
||||
# in python 2 str() of float
|
||||
# can truncate shorter than repr()
|
||||
return repr(self.name)
|
||||
|
||||
|
||||
_bool_op_map = {"not": "~", "and": "&", "or": "|"}
|
||||
|
||||
|
||||
class Op(StringMixin):
|
||||
|
||||
"""Hold an operator of arbitrary arity
|
||||
"""
|
||||
|
||||
def __init__(self, op, operands, *args, **kwargs):
|
||||
self.op = _bool_op_map.get(op, op)
|
||||
self.operands = operands
|
||||
self.encoding = kwargs.get("encoding", None)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.operands)
|
||||
|
||||
def __str__(self):
|
||||
"""Print a generic n-ary operator and its operands using infix
|
||||
notation"""
|
||||
# recurse over the operands
|
||||
parened = ("({0})".format(pprint_thing(opr)) for opr in self.operands)
|
||||
return pprint_thing(" {0} ".format(self.op).join(parened))
|
||||
|
||||
@property
|
||||
def return_type(self):
|
||||
# clobber types to bool if the op is a boolean operator
|
||||
if self.op in (_cmp_ops_syms + _bool_ops_syms):
|
||||
return np.bool_
|
||||
return _result_type_many(*(term.type for term in com.flatten(self)))
|
||||
|
||||
@property
|
||||
def has_invalid_return_type(self):
|
||||
types = self.operand_types
|
||||
obj_dtype_set = frozenset([np.dtype("object")])
|
||||
return self.return_type == object and types - obj_dtype_set
|
||||
|
||||
@property
|
||||
def operand_types(self):
|
||||
return frozenset(term.type for term in com.flatten(self))
|
||||
|
||||
@property
|
||||
def is_scalar(self):
|
||||
return all(operand.is_scalar for operand in self.operands)
|
||||
|
||||
@property
|
||||
def is_datetime(self):
|
||||
try:
|
||||
t = self.return_type.type
|
||||
except AttributeError:
|
||||
t = self.return_type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
|
||||
def _in(x, y):
|
||||
"""Compute the vectorized membership of ``x in y`` if possible, otherwise
|
||||
use Python.
|
||||
"""
|
||||
try:
|
||||
return x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x in y
|
||||
|
||||
|
||||
def _not_in(x, y):
|
||||
"""Compute the vectorized membership of ``x not in y`` if possible,
|
||||
otherwise use Python.
|
||||
"""
|
||||
try:
|
||||
return ~x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return ~y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x not in y
|
||||
|
||||
|
||||
_cmp_ops_syms = ">", "<", ">=", "<=", "==", "!=", "in", "not in"
|
||||
_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne, _in, _not_in
|
||||
_cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs))
|
||||
|
||||
_bool_ops_syms = "&", "|", "and", "or"
|
||||
_bool_ops_funcs = op.and_, op.or_, op.and_, op.or_
|
||||
_bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs))
|
||||
|
||||
_arith_ops_syms = "+", "-", "*", "/", "**", "//", "%"
|
||||
_arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv, op.pow, op.floordiv, op.mod)
|
||||
_arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs))
|
||||
|
||||
_special_case_arith_ops_syms = "**", "//", "%"
|
||||
_special_case_arith_ops_funcs = op.pow, op.floordiv, op.mod
|
||||
_special_case_arith_ops_dict = dict(
|
||||
zip(_special_case_arith_ops_syms, _special_case_arith_ops_funcs)
|
||||
)
|
||||
|
||||
_binary_ops_dict = {}
|
||||
|
||||
for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
|
||||
_binary_ops_dict.update(d)
|
||||
|
||||
|
||||
def _cast_inplace(terms, acceptable_dtypes, dtype):
|
||||
"""Cast an expression inplace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
terms : Op
|
||||
The expression that should cast.
|
||||
acceptable_dtypes : list of acceptable numpy.dtype
|
||||
Will not cast if term's dtype in this list.
|
||||
|
||||
.. versionadded:: 0.19.0
|
||||
|
||||
dtype : str or numpy.dtype
|
||||
The dtype to cast to.
|
||||
"""
|
||||
dt = np.dtype(dtype)
|
||||
for term in terms:
|
||||
if term.type in acceptable_dtypes:
|
||||
continue
|
||||
|
||||
try:
|
||||
new_value = term.value.astype(dt)
|
||||
except AttributeError:
|
||||
new_value = dt.type(term.value)
|
||||
term.update(new_value)
|
||||
|
||||
|
||||
def is_term(obj):
|
||||
return isinstance(obj, Term)
|
||||
|
||||
|
||||
class BinOp(Op):
|
||||
|
||||
"""Hold a binary operator and its operands
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
left : Term or Op
|
||||
right : Term or Op
|
||||
"""
|
||||
|
||||
def __init__(self, op, lhs, rhs, **kwargs):
|
||||
super().__init__(op, (lhs, rhs))
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
|
||||
self._disallow_scalar_only_bool_ops()
|
||||
|
||||
self.convert_values()
|
||||
|
||||
try:
|
||||
self.func = _binary_ops_dict[op]
|
||||
except KeyError:
|
||||
# has to be made a list for python3
|
||||
keys = list(_binary_ops_dict.keys())
|
||||
raise ValueError(
|
||||
"Invalid binary operator {0!r}, valid"
|
||||
" operators are {1}".format(op, keys)
|
||||
)
|
||||
|
||||
def __call__(self, env):
|
||||
"""Recursively evaluate an expression in Python space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
The result of an evaluated expression.
|
||||
"""
|
||||
# handle truediv
|
||||
if self.op == "/" and env.scope["truediv"]:
|
||||
self.func = op.truediv
|
||||
|
||||
# recurse over the left/right nodes
|
||||
left = self.lhs(env)
|
||||
right = self.rhs(env)
|
||||
|
||||
return self.func(left, right)
|
||||
|
||||
def evaluate(self, env, engine, parser, term_type, eval_in_python):
|
||||
"""Evaluate a binary operation *before* being passed to the engine.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
term_type : type
|
||||
eval_in_python : list
|
||||
|
||||
Returns
|
||||
-------
|
||||
term_type
|
||||
The "pre-evaluated" expression as an instance of ``term_type``
|
||||
"""
|
||||
if engine == "python":
|
||||
res = self(env)
|
||||
else:
|
||||
# recurse over the left/right nodes
|
||||
left = self.lhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
right = self.rhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
|
||||
# base cases
|
||||
if self.op in eval_in_python:
|
||||
res = self.func(left.value, right.value)
|
||||
else:
|
||||
from pandas.core.computation.eval import eval
|
||||
|
||||
res = eval(self, local_dict=env, engine=engine, parser=parser)
|
||||
|
||||
name = env.add_tmp(res)
|
||||
return term_type(name, env=env)
|
||||
|
||||
def convert_values(self):
|
||||
"""Convert datetimes to a comparable value in an expression.
|
||||
"""
|
||||
|
||||
def stringify(value):
|
||||
if self.encoding is not None:
|
||||
encoder = partial(pprint_thing_encoded, encoding=self.encoding)
|
||||
else:
|
||||
encoder = pprint_thing
|
||||
return encoder(value)
|
||||
|
||||
lhs, rhs = self.lhs, self.rhs
|
||||
|
||||
if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
|
||||
v = rhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(_ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.rhs.update(v)
|
||||
|
||||
if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
|
||||
v = lhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(_ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.lhs.update(v)
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self):
|
||||
if (
|
||||
(self.lhs.is_scalar or self.rhs.is_scalar)
|
||||
and self.op in _bool_ops_dict
|
||||
and (
|
||||
not (
|
||||
issubclass(self.rhs.return_type, (bool, np.bool_))
|
||||
and issubclass(self.lhs.return_type, (bool, np.bool_))
|
||||
)
|
||||
)
|
||||
):
|
||||
raise NotImplementedError("cannot evaluate scalar only bool ops")
|
||||
|
||||
|
||||
def isnumeric(dtype):
|
||||
return issubclass(np.dtype(dtype).type, np.number)
|
||||
|
||||
|
||||
class Div(BinOp):
|
||||
|
||||
"""Div operator to special case casting.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lhs, rhs : Term or Op
|
||||
The Terms or Ops in the ``/`` expression.
|
||||
truediv : bool
|
||||
Whether or not to use true division. With Python 3 this happens
|
||||
regardless of the value of ``truediv``.
|
||||
"""
|
||||
|
||||
def __init__(self, lhs, rhs, truediv, *args, **kwargs):
|
||||
super().__init__("/", lhs, rhs, *args, **kwargs)
|
||||
|
||||
if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
|
||||
raise TypeError(
|
||||
"unsupported operand type(s) for {0}:"
|
||||
" '{1}' and '{2}'".format(self.op, lhs.return_type, rhs.return_type)
|
||||
)
|
||||
|
||||
# do not upcast float32s to float64 un-necessarily
|
||||
acceptable_dtypes = [np.float32, np.float_]
|
||||
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
|
||||
|
||||
|
||||
_unary_ops_syms = "+", "-", "~", "not"
|
||||
_unary_ops_funcs = op.pos, op.neg, op.invert, op.invert
|
||||
_unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs))
|
||||
|
||||
|
||||
class UnaryOp(Op):
|
||||
|
||||
"""Hold a unary operator and its operands
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
The token used to represent the operator.
|
||||
operand : Term or Op
|
||||
The Term or Op operand to the operator.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If no function associated with the passed operator token is found.
|
||||
"""
|
||||
|
||||
def __init__(self, op, operand):
|
||||
super().__init__(op, (operand,))
|
||||
self.operand = operand
|
||||
|
||||
try:
|
||||
self.func = _unary_ops_dict[op]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
"Invalid unary operator {0!r}, valid operators "
|
||||
"are {1}".format(op, _unary_ops_syms)
|
||||
)
|
||||
|
||||
def __call__(self, env):
|
||||
operand = self.operand(env)
|
||||
return self.func(operand)
|
||||
|
||||
def __str__(self):
|
||||
return pprint_thing("{0}({1})".format(self.op, self.operand))
|
||||
|
||||
@property
|
||||
def return_type(self):
|
||||
operand = self.operand
|
||||
if operand.return_type == np.dtype("bool"):
|
||||
return np.dtype("bool")
|
||||
if isinstance(operand, Op) and (
|
||||
operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict
|
||||
):
|
||||
return np.dtype("bool")
|
||||
return np.dtype("int")
|
||||
|
||||
|
||||
class MathCall(Op):
|
||||
def __init__(self, func, args):
|
||||
super().__init__(func.name, args)
|
||||
self.func = func
|
||||
|
||||
def __call__(self, env):
|
||||
operands = [op(env) for op in self.operands]
|
||||
with np.errstate(all="ignore"):
|
||||
return self.func.func(*operands)
|
||||
|
||||
def __str__(self):
|
||||
operands = map(str, self.operands)
|
||||
return pprint_thing("{0}({1})".format(self.op, ",".join(operands)))
|
||||
|
||||
|
||||
class FuncNode:
|
||||
def __init__(self, name):
|
||||
from pandas.core.computation.check import _NUMEXPR_INSTALLED, _NUMEXPR_VERSION
|
||||
|
||||
if name not in _mathops or (
|
||||
_NUMEXPR_INSTALLED
|
||||
and _NUMEXPR_VERSION < LooseVersion("2.6.9")
|
||||
and name in ("floor", "ceil")
|
||||
):
|
||||
raise ValueError('"{0}" is not a supported function'.format(name))
|
||||
|
||||
self.name = name
|
||||
self.func = getattr(np, name)
|
||||
|
||||
def __call__(self, *args):
|
||||
return MathCall(self, args)
|
||||
@@ -0,0 +1,609 @@
|
||||
""" manage PyTables query interface via Expressions """
|
||||
|
||||
import ast
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timedelta, Timestamp
|
||||
from pandas.compat.chainmap import DeepChainMap
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.base import StringMixin
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation import expr, ops
|
||||
from pandas.core.computation.common import _ensure_decoded
|
||||
from pandas.core.computation.expr import BaseExprVisitor
|
||||
from pandas.core.computation.ops import UndefinedVariableError, is_term
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded
|
||||
|
||||
|
||||
class Scope(expr.Scope):
|
||||
__slots__ = ("queryables",)
|
||||
|
||||
def __init__(self, level, global_dict=None, local_dict=None, queryables=None):
|
||||
super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict)
|
||||
self.queryables = queryables or dict()
|
||||
|
||||
|
||||
class Term(ops.Term):
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
klass = Constant if not isinstance(name, str) else cls
|
||||
supr_new = StringMixin.__new__
|
||||
return supr_new(klass)
|
||||
|
||||
def __init__(self, name, env, side=None, encoding=None):
|
||||
super().__init__(name, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
# must be a queryables
|
||||
if self.side == "left":
|
||||
if self.name not in self.env.queryables:
|
||||
raise NameError("name {name!r} is not defined".format(name=self.name))
|
||||
return self.name
|
||||
|
||||
# resolve the rhs (and allow it to be None)
|
||||
try:
|
||||
return self.env.resolve(self.name, is_local=False)
|
||||
except UndefinedVariableError:
|
||||
return self.name
|
||||
|
||||
# read-only property overwriting read/write property
|
||||
@property # type: ignore
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def __init__(self, value, env, side=None, encoding=None):
|
||||
super().__init__(value, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
|
||||
class BinOp(ops.BinOp):
|
||||
|
||||
_max_selectors = 31
|
||||
|
||||
def __init__(self, op, lhs, rhs, queryables, encoding):
|
||||
super().__init__(op, lhs, rhs)
|
||||
self.queryables = queryables
|
||||
self.encoding = encoding
|
||||
self.filter = None
|
||||
self.condition = None
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self):
|
||||
pass
|
||||
|
||||
def prune(self, klass):
|
||||
def pr(left, right):
|
||||
""" create and return a new specialized BinOp from myself """
|
||||
|
||||
if left is None:
|
||||
return right
|
||||
elif right is None:
|
||||
return left
|
||||
|
||||
k = klass
|
||||
if isinstance(left, ConditionBinOp):
|
||||
if isinstance(right, ConditionBinOp):
|
||||
k = JointConditionBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
elif isinstance(left, FilterBinOp):
|
||||
if isinstance(right, FilterBinOp):
|
||||
k = JointFilterBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
return k(
|
||||
self.op, left, right, queryables=self.queryables, encoding=self.encoding
|
||||
).evaluate()
|
||||
|
||||
left, right = self.lhs, self.rhs
|
||||
|
||||
if is_term(left) and is_term(right):
|
||||
res = pr(left.value, right.value)
|
||||
elif not is_term(left) and is_term(right):
|
||||
res = pr(left.prune(klass), right.value)
|
||||
elif is_term(left) and not is_term(right):
|
||||
res = pr(left.value, right.prune(klass))
|
||||
elif not (is_term(left) or is_term(right)):
|
||||
res = pr(left.prune(klass), right.prune(klass))
|
||||
|
||||
return res
|
||||
|
||||
def conform(self, rhs):
|
||||
""" inplace conform rhs """
|
||||
if not is_list_like(rhs):
|
||||
rhs = [rhs]
|
||||
if isinstance(rhs, np.ndarray):
|
||||
rhs = rhs.ravel()
|
||||
return rhs
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
""" return True if this is a valid field """
|
||||
return self.lhs in self.queryables
|
||||
|
||||
@property
|
||||
def is_in_table(self):
|
||||
""" return True if this is a valid column name for generation (e.g. an
|
||||
actual column in the table) """
|
||||
return self.queryables.get(self.lhs) is not None
|
||||
|
||||
@property
|
||||
def kind(self):
|
||||
""" the kind of my field """
|
||||
return getattr(self.queryables.get(self.lhs), "kind", None)
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
""" the meta of my field """
|
||||
return getattr(self.queryables.get(self.lhs), "meta", None)
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
""" the metadata of my field """
|
||||
return getattr(self.queryables.get(self.lhs), "metadata", None)
|
||||
|
||||
def generate(self, v):
|
||||
""" create and return the op string for this TermValue """
|
||||
val = v.tostring(self.encoding)
|
||||
return "({lhs} {op} {val})".format(lhs=self.lhs, op=self.op, val=val)
|
||||
|
||||
def convert_value(self, v):
|
||||
""" convert the expression that is in the term to something that is
|
||||
accepted by pytables """
|
||||
|
||||
def stringify(value):
|
||||
if self.encoding is not None:
|
||||
encoder = partial(pprint_thing_encoded, encoding=self.encoding)
|
||||
else:
|
||||
encoder = pprint_thing
|
||||
return encoder(value)
|
||||
|
||||
kind = _ensure_decoded(self.kind)
|
||||
meta = _ensure_decoded(self.meta)
|
||||
if kind == "datetime64" or kind == "datetime":
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = _ensure_decoded(v)
|
||||
v = Timestamp(v)
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
return TermValue(v, v.value, kind)
|
||||
elif kind == "timedelta64" or kind == "timedelta":
|
||||
v = Timedelta(v, unit="s").value
|
||||
return TermValue(int(v), v, kind)
|
||||
elif meta == "category":
|
||||
metadata = com.values_from_object(self.metadata)
|
||||
result = metadata.searchsorted(v, side="left")
|
||||
|
||||
# result returns 0 if v is first element or if v is not in metadata
|
||||
# check that metadata contains v
|
||||
if not result and v not in metadata:
|
||||
result = -1
|
||||
return TermValue(result, result, "integer")
|
||||
elif kind == "integer":
|
||||
v = int(float(v))
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "float":
|
||||
v = float(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "bool":
|
||||
if isinstance(v, str):
|
||||
v = not v.strip().lower() in [
|
||||
"false",
|
||||
"f",
|
||||
"no",
|
||||
"n",
|
||||
"none",
|
||||
"0",
|
||||
"[]",
|
||||
"{}",
|
||||
"",
|
||||
]
|
||||
else:
|
||||
v = bool(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif isinstance(v, str):
|
||||
# string quoting
|
||||
return TermValue(v, stringify(v), "string")
|
||||
else:
|
||||
raise TypeError(
|
||||
"Cannot compare {v} of type {typ} to {kind} column".format(
|
||||
v=v, typ=type(v), kind=kind
|
||||
)
|
||||
)
|
||||
|
||||
def convert_values(self):
|
||||
pass
|
||||
|
||||
|
||||
class FilterBinOp(BinOp):
|
||||
def __str__(self):
|
||||
return pprint_thing(
|
||||
"[Filter : [{lhs}] -> [{op}]".format(lhs=self.filter[0], op=self.filter[1])
|
||||
)
|
||||
|
||||
def invert(self):
|
||||
""" invert the filter """
|
||||
if self.filter is not None:
|
||||
f = list(self.filter)
|
||||
f[1] = self.generate_filter_op(invert=True)
|
||||
self.filter = tuple(f)
|
||||
return self
|
||||
|
||||
def format(self):
|
||||
""" return the actual filter format """
|
||||
return [self.filter]
|
||||
|
||||
def evaluate(self):
|
||||
|
||||
if not self.is_valid:
|
||||
raise ValueError("query term is not valid [{slf}]".format(slf=self))
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = [TermValue(v, v, self.kind).value for v in rhs]
|
||||
|
||||
if self.is_in_table:
|
||||
|
||||
# if too many values to create the expression, use a filter instead
|
||||
if self.op in ["==", "!="] and len(values) > self._max_selectors:
|
||||
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, pd.Index(values))
|
||||
|
||||
return self
|
||||
return None
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, pd.Index(values))
|
||||
|
||||
else:
|
||||
raise TypeError(
|
||||
"passing a filterable condition to a non-table "
|
||||
"indexer [{slf}]".format(slf=self)
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def generate_filter_op(self, invert=False):
|
||||
if (self.op == "!=" and not invert) or (self.op == "==" and invert):
|
||||
return lambda axis, vals: ~axis.isin(vals)
|
||||
else:
|
||||
return lambda axis, vals: axis.isin(vals)
|
||||
|
||||
|
||||
class JointFilterBinOp(FilterBinOp):
|
||||
def format(self):
|
||||
raise NotImplementedError("unable to collapse Joint Filters")
|
||||
|
||||
def evaluate(self):
|
||||
return self
|
||||
|
||||
|
||||
class ConditionBinOp(BinOp):
|
||||
def __str__(self):
|
||||
return pprint_thing("[Condition : [{cond}]]".format(cond=self.condition))
|
||||
|
||||
def invert(self):
|
||||
""" invert the condition """
|
||||
# if self.condition is not None:
|
||||
# self.condition = "~(%s)" % self.condition
|
||||
# return self
|
||||
raise NotImplementedError(
|
||||
"cannot use an invert condition when " "passing to numexpr"
|
||||
)
|
||||
|
||||
def format(self):
|
||||
""" return the actual ne format """
|
||||
return self.condition
|
||||
|
||||
def evaluate(self):
|
||||
|
||||
if not self.is_valid:
|
||||
raise ValueError("query term is not valid [{slf}]".format(slf=self))
|
||||
|
||||
# convert values if we are in the table
|
||||
if not self.is_in_table:
|
||||
return None
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = [self.convert_value(v) for v in rhs]
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
|
||||
# too many values to create the expression?
|
||||
if len(values) <= self._max_selectors:
|
||||
vs = [self.generate(v) for v in values]
|
||||
self.condition = "({cond})".format(cond=" | ".join(vs))
|
||||
|
||||
# use a filter after reading
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
self.condition = self.generate(values[0])
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class JointConditionBinOp(ConditionBinOp):
|
||||
def evaluate(self):
|
||||
self.condition = "({lhs} {op} {rhs})".format(
|
||||
lhs=self.lhs.condition, op=self.op, rhs=self.rhs.condition
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
class UnaryOp(ops.UnaryOp):
|
||||
def prune(self, klass):
|
||||
|
||||
if self.op != "~":
|
||||
raise NotImplementedError("UnaryOp only support invert type ops")
|
||||
|
||||
operand = self.operand
|
||||
operand = operand.prune(klass)
|
||||
|
||||
if operand is not None:
|
||||
if issubclass(klass, ConditionBinOp):
|
||||
if operand.condition is not None:
|
||||
return operand.invert()
|
||||
elif issubclass(klass, FilterBinOp):
|
||||
if operand.filter is not None:
|
||||
return operand.invert()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
_op_classes = {"unary": UnaryOp}
|
||||
|
||||
|
||||
class ExprVisitor(BaseExprVisitor):
|
||||
const_type = Constant
|
||||
term_type = Term
|
||||
|
||||
def __init__(self, env, engine, parser, **kwargs):
|
||||
super().__init__(env, engine, parser)
|
||||
for bin_op in self.binary_ops:
|
||||
bin_node = self.binary_op_nodes_map[bin_op]
|
||||
setattr(
|
||||
self,
|
||||
"visit_{node}".format(node=bin_node),
|
||||
lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs),
|
||||
)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs):
|
||||
if isinstance(node.op, (ast.Not, ast.Invert)):
|
||||
return UnaryOp("~", self.visit(node.operand))
|
||||
elif isinstance(node.op, ast.USub):
|
||||
return self.const_type(-self.visit(node.operand).value, self.env)
|
||||
elif isinstance(node.op, ast.UAdd):
|
||||
raise NotImplementedError("Unary addition not supported")
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
return self.visit(node.value).value
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
cmpr = ast.Compare(
|
||||
ops=[ast.Eq()], left=node.targets[0], comparators=[node.value]
|
||||
)
|
||||
return self.visit(cmpr)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs):
|
||||
# only allow simple subscripts
|
||||
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
try:
|
||||
value = value.value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.const_type(value[slobj], self.env)
|
||||
except TypeError:
|
||||
raise ValueError(
|
||||
"cannot subscript {value!r} with "
|
||||
"{slobj!r}".format(value=value, slobj=slobj)
|
||||
)
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = node.ctx.__class__
|
||||
if ctx == ast.Load:
|
||||
# resolve the value
|
||||
resolved = self.visit(value)
|
||||
|
||||
# try to get the value to see if we are another expression
|
||||
try:
|
||||
resolved = resolved.value
|
||||
except (AttributeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.term_type(getattr(resolved, attr), self.env)
|
||||
except AttributeError:
|
||||
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
|
||||
raise ValueError("Invalid Attribute context {name}".format(name=ctx.__name__))
|
||||
|
||||
def translate_In(self, op):
|
||||
return ast.Eq() if isinstance(op, ast.In) else op
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
return self.visit(node.op), node.op, left, right
|
||||
|
||||
|
||||
def _validate_where(w):
|
||||
"""
|
||||
Validate that the where statement is of the right type.
|
||||
|
||||
The type may either be String, Expr, or list-like of Exprs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : String term expression, Expr, or list-like of Exprs.
|
||||
|
||||
Returns
|
||||
-------
|
||||
where : The original where clause if the check was successful.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError : An invalid data type was passed in for w (e.g. dict).
|
||||
"""
|
||||
|
||||
if not (isinstance(w, (Expr, str)) or is_list_like(w)):
|
||||
raise TypeError(
|
||||
"where must be passed as a string, Expr, " "or list-like of Exprs"
|
||||
)
|
||||
|
||||
return w
|
||||
|
||||
|
||||
class Expr(expr.Expr):
|
||||
|
||||
""" hold a pytables like expression, comprised of possibly multiple 'terms'
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where : string term expression, Expr, or list-like of Exprs
|
||||
queryables : a "kinds" map (dict of column name -> kind), or None if column
|
||||
is non-indexable
|
||||
encoding : an encoding that will encode the query terms
|
||||
|
||||
Returns
|
||||
-------
|
||||
an Expr object
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
'index>=date'
|
||||
"columns=['A', 'D']"
|
||||
'columns=A'
|
||||
'columns==A'
|
||||
"~(columns=['A','B'])"
|
||||
'index>df.index[3] & string="bar"'
|
||||
'(index>df.index[3] & index<=df.index[6]) | string="bar"'
|
||||
"ts>=Timestamp('2012-02-01')"
|
||||
"major_axis>=20130101"
|
||||
"""
|
||||
|
||||
def __init__(self, where, queryables=None, encoding=None, scope_level=0):
|
||||
|
||||
where = _validate_where(where)
|
||||
|
||||
self.encoding = encoding
|
||||
self.condition = None
|
||||
self.filter = None
|
||||
self.terms = None
|
||||
self._visitor = None
|
||||
|
||||
# capture the environment if needed
|
||||
local_dict = DeepChainMap()
|
||||
|
||||
if isinstance(where, Expr):
|
||||
local_dict = where.env.scope
|
||||
where = where.expr
|
||||
|
||||
elif isinstance(where, (list, tuple)):
|
||||
for idx, w in enumerate(where):
|
||||
if isinstance(w, Expr):
|
||||
local_dict = w.env.scope
|
||||
else:
|
||||
w = _validate_where(w)
|
||||
where[idx] = w
|
||||
where = " & ".join(map("({})".format, com.flatten(where))) # noqa
|
||||
|
||||
self.expr = where
|
||||
self.env = Scope(scope_level + 1, local_dict=local_dict)
|
||||
|
||||
if queryables is not None and isinstance(self.expr, str):
|
||||
self.env.queryables.update(queryables)
|
||||
self._visitor = ExprVisitor(
|
||||
self.env,
|
||||
queryables=queryables,
|
||||
parser="pytables",
|
||||
engine="pytables",
|
||||
encoding=encoding,
|
||||
)
|
||||
self.terms = self.parse()
|
||||
|
||||
def __str__(self):
|
||||
if self.terms is not None:
|
||||
return pprint_thing(self.terms)
|
||||
return pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self):
|
||||
""" create and return the numexpr condition and filter """
|
||||
|
||||
try:
|
||||
self.condition = self.terms.prune(ConditionBinOp)
|
||||
except AttributeError:
|
||||
raise ValueError(
|
||||
"cannot process expression [{expr}], [{slf}] "
|
||||
"is not a valid condition".format(expr=self.expr, slf=self)
|
||||
)
|
||||
try:
|
||||
self.filter = self.terms.prune(FilterBinOp)
|
||||
except AttributeError:
|
||||
raise ValueError(
|
||||
"cannot process expression [{expr}], [{slf}] "
|
||||
"is not a valid filter".format(expr=self.expr, slf=self)
|
||||
)
|
||||
|
||||
return self.condition, self.filter
|
||||
|
||||
|
||||
class TermValue:
|
||||
|
||||
""" hold a term value the we use to construct a condition/filter """
|
||||
|
||||
def __init__(self, value, converted, kind):
|
||||
self.value = value
|
||||
self.converted = converted
|
||||
self.kind = kind
|
||||
|
||||
def tostring(self, encoding):
|
||||
""" quote the string if not encoded
|
||||
else encode and return """
|
||||
if self.kind == "string":
|
||||
if encoding is not None:
|
||||
return self.converted
|
||||
return '"{converted}"'.format(converted=self.converted)
|
||||
elif self.kind == "float":
|
||||
# python 2 str(float) is not always
|
||||
# round-trippable so use repr()
|
||||
return repr(self.converted)
|
||||
return self.converted
|
||||
|
||||
|
||||
def maybe_expression(s):
|
||||
""" loose checking if s is a pytables-acceptable expression """
|
||||
if not isinstance(s, str):
|
||||
return False
|
||||
ops = ExprVisitor.binary_ops + ExprVisitor.unary_ops + ("=",)
|
||||
|
||||
# make sure we have an op at least
|
||||
return any(op in s for op in ops)
|
||||
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
Module for scope operations
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import inspect
|
||||
from io import StringIO
|
||||
import itertools
|
||||
import pprint
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat.chainmap import DeepChainMap
|
||||
|
||||
from pandas.core.base import StringMixin
|
||||
import pandas.core.computation as compu
|
||||
|
||||
|
||||
def _ensure_scope(
|
||||
level, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs
|
||||
):
|
||||
"""Ensure that we are grabbing the correct scope."""
|
||||
return Scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
|
||||
def _replacer(x):
|
||||
"""Replace a number with its hexadecimal representation. Used to tag
|
||||
temporary variables with their calling scope's id.
|
||||
"""
|
||||
# get the hex repr of the binary char and remove 0x and pad by pad_size
|
||||
# zeros
|
||||
try:
|
||||
hexin = ord(x)
|
||||
except TypeError:
|
||||
# bytes literals masquerade as ints when iterating in py3
|
||||
hexin = x
|
||||
|
||||
return hex(hexin)
|
||||
|
||||
|
||||
def _raw_hex_id(obj):
|
||||
"""Return the padded hexadecimal id of ``obj``."""
|
||||
# interpret as a pointer since that's what really what id returns
|
||||
packed = struct.pack("@P", id(obj))
|
||||
return "".join(map(_replacer, packed))
|
||||
|
||||
|
||||
_DEFAULT_GLOBALS = {
|
||||
"Timestamp": Timestamp,
|
||||
"datetime": datetime.datetime,
|
||||
"True": True,
|
||||
"False": False,
|
||||
"list": list,
|
||||
"tuple": tuple,
|
||||
"inf": np.inf,
|
||||
"Inf": np.inf,
|
||||
}
|
||||
|
||||
|
||||
def _get_pretty_string(obj):
|
||||
"""Return a prettier version of obj
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
Object to pretty print
|
||||
|
||||
Returns
|
||||
-------
|
||||
s : str
|
||||
Pretty print object repr
|
||||
"""
|
||||
sio = StringIO()
|
||||
pprint.pprint(obj, stream=sio)
|
||||
return sio.getvalue()
|
||||
|
||||
|
||||
class Scope(StringMixin):
|
||||
|
||||
"""Object to hold scope, with a few bells to deal with some custom syntax
|
||||
and contexts added by pandas.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int
|
||||
global_dict : dict or None, optional, default None
|
||||
local_dict : dict or Scope or None, optional, default None
|
||||
resolvers : list-like or None, optional, default None
|
||||
target : object
|
||||
|
||||
Attributes
|
||||
----------
|
||||
level : int
|
||||
scope : DeepChainMap
|
||||
target : object
|
||||
temps : dict
|
||||
"""
|
||||
|
||||
__slots__ = "level", "scope", "target", "temps"
|
||||
|
||||
def __init__(
|
||||
self, level, global_dict=None, local_dict=None, resolvers=(), target=None
|
||||
):
|
||||
self.level = level + 1
|
||||
|
||||
# shallow copy because we don't want to keep filling this up with what
|
||||
# was there before if there are multiple calls to Scope/_ensure_scope
|
||||
self.scope = DeepChainMap(_DEFAULT_GLOBALS.copy())
|
||||
self.target = target
|
||||
|
||||
if isinstance(local_dict, Scope):
|
||||
self.scope.update(local_dict.scope)
|
||||
if local_dict.target is not None:
|
||||
self.target = local_dict.target
|
||||
self.update(local_dict.level)
|
||||
|
||||
frame = sys._getframe(self.level)
|
||||
|
||||
try:
|
||||
# shallow copy here because we don't want to replace what's in
|
||||
# scope when we align terms (alignment accesses the underlying
|
||||
# numpy array of pandas objects)
|
||||
self.scope = self.scope.new_child((global_dict or frame.f_globals).copy())
|
||||
if not isinstance(local_dict, Scope):
|
||||
self.scope = self.scope.new_child((local_dict or frame.f_locals).copy())
|
||||
finally:
|
||||
del frame
|
||||
|
||||
# assumes that resolvers are going from outermost scope to inner
|
||||
if isinstance(local_dict, Scope):
|
||||
resolvers += tuple(local_dict.resolvers.maps)
|
||||
self.resolvers = DeepChainMap(*resolvers)
|
||||
self.temps = {}
|
||||
|
||||
def __str__(self):
|
||||
scope_keys = _get_pretty_string(list(self.scope.keys()))
|
||||
res_keys = _get_pretty_string(list(self.resolvers.keys()))
|
||||
unicode_str = "{name}(scope={scope_keys}, resolvers={res_keys})"
|
||||
return unicode_str.format(
|
||||
name=type(self).__name__, scope_keys=scope_keys, res_keys=res_keys
|
||||
)
|
||||
|
||||
@property
|
||||
def has_resolvers(self):
|
||||
"""Return whether we have any extra scope.
|
||||
|
||||
For example, DataFrames pass Their columns as resolvers during calls to
|
||||
``DataFrame.eval()`` and ``DataFrame.query()``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
hr : bool
|
||||
"""
|
||||
return bool(len(self.resolvers))
|
||||
|
||||
def resolve(self, key, is_local):
|
||||
"""Resolve a variable name in a possibly local context
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
A variable name
|
||||
is_local : bool
|
||||
Flag indicating whether the variable is local or not (prefixed with
|
||||
the '@' symbol)
|
||||
|
||||
Returns
|
||||
-------
|
||||
value : object
|
||||
The value of a particular variable
|
||||
"""
|
||||
try:
|
||||
# only look for locals in outer scope
|
||||
if is_local:
|
||||
return self.scope[key]
|
||||
|
||||
# not a local variable so check in resolvers if we have them
|
||||
if self.has_resolvers:
|
||||
return self.resolvers[key]
|
||||
|
||||
# if we're here that means that we have no locals and we also have
|
||||
# no resolvers
|
||||
assert not is_local and not self.has_resolvers
|
||||
return self.scope[key]
|
||||
except KeyError:
|
||||
try:
|
||||
# last ditch effort we look in temporaries
|
||||
# these are created when parsing indexing expressions
|
||||
# e.g., df[df > 0]
|
||||
return self.temps[key]
|
||||
except KeyError:
|
||||
raise compu.ops.UndefinedVariableError(key, is_local)
|
||||
|
||||
def swapkey(self, old_key, new_key, new_value=None):
|
||||
"""Replace a variable name, with a potentially new value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
old_key : str
|
||||
Current variable name to replace
|
||||
new_key : str
|
||||
New variable name to replace `old_key` with
|
||||
new_value : object
|
||||
Value to be replaced along with the possible renaming
|
||||
"""
|
||||
if self.has_resolvers:
|
||||
maps = self.resolvers.maps + self.scope.maps
|
||||
else:
|
||||
maps = self.scope.maps
|
||||
|
||||
maps.append(self.temps)
|
||||
|
||||
for mapping in maps:
|
||||
if old_key in mapping:
|
||||
mapping[new_key] = new_value
|
||||
return
|
||||
|
||||
def _get_vars(self, stack, scopes):
|
||||
"""Get specifically scoped variables from a list of stack frames.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stack : list
|
||||
A list of stack frames as returned by ``inspect.stack()``
|
||||
scopes : sequence of strings
|
||||
A sequence containing valid stack frame attribute names that
|
||||
evaluate to a dictionary. For example, ('locals', 'globals')
|
||||
"""
|
||||
variables = itertools.product(scopes, stack)
|
||||
for scope, (frame, _, _, _, _, _) in variables:
|
||||
try:
|
||||
d = getattr(frame, "f_" + scope)
|
||||
self.scope = self.scope.new_child(d)
|
||||
finally:
|
||||
# won't remove it, but DECREF it
|
||||
# in Py3 this probably isn't necessary since frame won't be
|
||||
# scope after the loop
|
||||
del frame
|
||||
|
||||
def update(self, level):
|
||||
"""Update the current scope by going back `level` levels.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int or None, optional, default None
|
||||
"""
|
||||
sl = level + 1
|
||||
|
||||
# add sl frames to the scope starting with the
|
||||
# most distant and overwriting with more current
|
||||
# makes sure that we can capture variable scope
|
||||
stack = inspect.stack()
|
||||
|
||||
try:
|
||||
self._get_vars(stack[:sl], scopes=["locals"])
|
||||
finally:
|
||||
del stack[:], stack
|
||||
|
||||
def add_tmp(self, value):
|
||||
"""Add a temporary variable to the scope.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : object
|
||||
An arbitrary object to be assigned to a temporary variable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : basestring
|
||||
The name of the temporary variable created.
|
||||
"""
|
||||
name = "{name}_{num}_{hex_id}".format(
|
||||
name=type(value).__name__, num=self.ntemps, hex_id=_raw_hex_id(self)
|
||||
)
|
||||
|
||||
# add to inner most scope
|
||||
assert name not in self.temps
|
||||
self.temps[name] = value
|
||||
assert name in self.temps
|
||||
|
||||
# only increment if the variable gets put in the scope
|
||||
return name
|
||||
|
||||
@property
|
||||
def ntemps(self):
|
||||
"""The number of temporary variables in this scope"""
|
||||
return len(self.temps)
|
||||
|
||||
@property
|
||||
def full_scope(self):
|
||||
"""Return the full scope for use with passing to engines transparently
|
||||
as a mapping.
|
||||
|
||||
Returns
|
||||
-------
|
||||
vars : DeepChainMap
|
||||
All variables in this scope.
|
||||
"""
|
||||
maps = [self.temps] + self.resolvers.maps + self.scope.maps
|
||||
return DeepChainMap(*maps)
|
||||
648
venv/lib/python3.6/site-packages/pandas/core/config_init.py
Normal file
648
venv/lib/python3.6/site-packages/pandas/core/config_init.py
Normal file
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
This module is imported from the pandas package __init__.py file
|
||||
in order to ensure that the core.config options registered here will
|
||||
be available as soon as the user loads the package. if register_option
|
||||
is invoked inside specific modules, they will not be registered until that
|
||||
module is imported, which may or may not be a problem.
|
||||
|
||||
If you need to make sure options are available even before a certain
|
||||
module is imported, register them here rather then in the module.
|
||||
|
||||
"""
|
||||
import importlib
|
||||
|
||||
import pandas._config.config as cf
|
||||
from pandas._config.config import (
|
||||
is_bool,
|
||||
is_callable,
|
||||
is_instance_factory,
|
||||
is_int,
|
||||
is_one_of_factory,
|
||||
is_text,
|
||||
)
|
||||
|
||||
# compute
|
||||
|
||||
use_bottleneck_doc = """
|
||||
: bool
|
||||
Use the bottleneck library to accelerate if it is installed,
|
||||
the default is True
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
|
||||
def use_bottleneck_cb(key):
|
||||
from pandas.core import nanops
|
||||
|
||||
nanops.set_use_bottleneck(cf.get_option(key))
|
||||
|
||||
|
||||
use_numexpr_doc = """
|
||||
: bool
|
||||
Use the numexpr library to accelerate computation if it is installed,
|
||||
the default is True
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
|
||||
def use_numexpr_cb(key):
|
||||
from pandas.core.computation import expressions
|
||||
|
||||
expressions.set_use_numexpr(cf.get_option(key))
|
||||
|
||||
|
||||
with cf.config_prefix("compute"):
|
||||
cf.register_option(
|
||||
"use_bottleneck",
|
||||
True,
|
||||
use_bottleneck_doc,
|
||||
validator=is_bool,
|
||||
cb=use_bottleneck_cb,
|
||||
)
|
||||
cf.register_option(
|
||||
"use_numexpr", True, use_numexpr_doc, validator=is_bool, cb=use_numexpr_cb
|
||||
)
|
||||
#
|
||||
# options from the "display" namespace
|
||||
|
||||
pc_precision_doc = """
|
||||
: int
|
||||
Floating point output precision (number of significant digits). This is
|
||||
only a suggestion
|
||||
"""
|
||||
|
||||
pc_colspace_doc = """
|
||||
: int
|
||||
Default space for DataFrame columns.
|
||||
"""
|
||||
|
||||
pc_max_rows_doc = """
|
||||
: int
|
||||
If max_rows is exceeded, switch to truncate view. Depending on
|
||||
`large_repr`, objects are either centrally truncated or printed as
|
||||
a summary view. 'None' value means unlimited.
|
||||
|
||||
In case python/IPython is running in a terminal and `large_repr`
|
||||
equals 'truncate' this can be set to 0 and pandas will auto-detect
|
||||
the height of the terminal and print a truncated object which fits
|
||||
the screen height. The IPython notebook, IPython qtconsole, or
|
||||
IDLE do not run in a terminal and hence it is not possible to do
|
||||
correct auto-detection.
|
||||
"""
|
||||
|
||||
pc_min_rows_doc = """
|
||||
: int
|
||||
The numbers of rows to show in a truncated view (when `max_rows` is
|
||||
exceeded). Ignored when `max_rows` is set to None or 0. When set to
|
||||
None, follows the value of `max_rows`.
|
||||
"""
|
||||
|
||||
pc_max_cols_doc = """
|
||||
: int
|
||||
If max_cols is exceeded, switch to truncate view. Depending on
|
||||
`large_repr`, objects are either centrally truncated or printed as
|
||||
a summary view. 'None' value means unlimited.
|
||||
|
||||
In case python/IPython is running in a terminal and `large_repr`
|
||||
equals 'truncate' this can be set to 0 and pandas will auto-detect
|
||||
the width of the terminal and print a truncated object which fits
|
||||
the screen width. The IPython notebook, IPython qtconsole, or IDLE
|
||||
do not run in a terminal and hence it is not possible to do
|
||||
correct auto-detection.
|
||||
"""
|
||||
|
||||
pc_max_categories_doc = """
|
||||
: int
|
||||
This sets the maximum number of categories pandas should output when
|
||||
printing out a `Categorical` or a Series of dtype "category".
|
||||
"""
|
||||
|
||||
pc_max_info_cols_doc = """
|
||||
: int
|
||||
max_info_columns is used in DataFrame.info method to decide if
|
||||
per column information will be printed.
|
||||
"""
|
||||
|
||||
pc_nb_repr_h_doc = """
|
||||
: boolean
|
||||
When True, IPython notebook will use html representation for
|
||||
pandas objects (if it is available).
|
||||
"""
|
||||
|
||||
pc_pprint_nest_depth = """
|
||||
: int
|
||||
Controls the number of nested levels to process when pretty-printing
|
||||
"""
|
||||
|
||||
pc_multi_sparse_doc = """
|
||||
: boolean
|
||||
"sparsify" MultiIndex display (don't display repeated
|
||||
elements in outer levels within groups)
|
||||
"""
|
||||
|
||||
float_format_doc = """
|
||||
: callable
|
||||
The callable should accept a floating point number and return
|
||||
a string with the desired format of the number. This is used
|
||||
in some places like SeriesFormatter.
|
||||
See formats.format.EngFormatter for an example.
|
||||
"""
|
||||
|
||||
max_colwidth_doc = """
|
||||
: int
|
||||
The maximum width in characters of a column in the repr of
|
||||
a pandas data structure. When the column overflows, a "..."
|
||||
placeholder is embedded in the output.
|
||||
"""
|
||||
|
||||
colheader_justify_doc = """
|
||||
: 'left'/'right'
|
||||
Controls the justification of column headers. used by DataFrameFormatter.
|
||||
"""
|
||||
|
||||
pc_expand_repr_doc = """
|
||||
: boolean
|
||||
Whether to print out the full DataFrame repr for wide DataFrames across
|
||||
multiple lines, `max_columns` is still respected, but the output will
|
||||
wrap-around across multiple "pages" if its width exceeds `display.width`.
|
||||
"""
|
||||
|
||||
pc_show_dimensions_doc = """
|
||||
: boolean or 'truncate'
|
||||
Whether to print out dimensions at the end of DataFrame repr.
|
||||
If 'truncate' is specified, only print out the dimensions if the
|
||||
frame is truncated (e.g. not display all rows and/or columns)
|
||||
"""
|
||||
|
||||
pc_east_asian_width_doc = """
|
||||
: boolean
|
||||
Whether to use the Unicode East Asian Width to calculate the display text
|
||||
width.
|
||||
Enabling this may affect to the performance (default: False)
|
||||
"""
|
||||
|
||||
pc_ambiguous_as_wide_doc = """
|
||||
: boolean
|
||||
Whether to handle Unicode characters belong to Ambiguous as Wide (width=2)
|
||||
(default: False)
|
||||
"""
|
||||
|
||||
pc_latex_repr_doc = """
|
||||
: boolean
|
||||
Whether to produce a latex DataFrame representation for jupyter
|
||||
environments that support it.
|
||||
(default: False)
|
||||
"""
|
||||
|
||||
pc_table_schema_doc = """
|
||||
: boolean
|
||||
Whether to publish a Table Schema representation for frontends
|
||||
that support it.
|
||||
(default: False)
|
||||
"""
|
||||
|
||||
pc_html_border_doc = """
|
||||
: int
|
||||
A ``border=value`` attribute is inserted in the ``<table>`` tag
|
||||
for the DataFrame HTML repr.
|
||||
"""
|
||||
|
||||
pc_html_use_mathjax_doc = """\
|
||||
: boolean
|
||||
When True, Jupyter notebook will process table contents using MathJax,
|
||||
rendering mathematical expressions enclosed by the dollar symbol.
|
||||
(default: True)
|
||||
"""
|
||||
|
||||
pc_width_doc = """
|
||||
: int
|
||||
Width of the display in characters. In case python/IPython is running in
|
||||
a terminal this can be set to None and pandas will correctly auto-detect
|
||||
the width.
|
||||
Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a
|
||||
terminal and hence it is not possible to correctly detect the width.
|
||||
"""
|
||||
|
||||
pc_chop_threshold_doc = """
|
||||
: float or None
|
||||
if set to a float value, all float values smaller then the given threshold
|
||||
will be displayed as exactly 0 by repr and friends.
|
||||
"""
|
||||
|
||||
pc_max_seq_items = """
|
||||
: int or None
|
||||
when pretty-printing a long sequence, no more then `max_seq_items`
|
||||
will be printed. If items are omitted, they will be denoted by the
|
||||
addition of "..." to the resulting string.
|
||||
|
||||
If set to None, the number of items to be printed is unlimited.
|
||||
"""
|
||||
|
||||
pc_max_info_rows_doc = """
|
||||
: int or None
|
||||
df.info() will usually show null-counts for each column.
|
||||
For large frames this can be quite slow. max_info_rows and max_info_cols
|
||||
limit this null check only to frames with smaller dimensions than
|
||||
specified.
|
||||
"""
|
||||
|
||||
pc_large_repr_doc = """
|
||||
: 'truncate'/'info'
|
||||
For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can
|
||||
show a truncated table (the default from 0.13), or switch to the view from
|
||||
df.info() (the behaviour in earlier versions of pandas).
|
||||
"""
|
||||
|
||||
pc_memory_usage_doc = """
|
||||
: bool, string or None
|
||||
This specifies if the memory usage of a DataFrame should be displayed when
|
||||
df.info() is called. Valid values True,False,'deep'
|
||||
"""
|
||||
|
||||
pc_latex_escape = """
|
||||
: bool
|
||||
This specifies if the to_latex method of a Dataframe uses escapes special
|
||||
characters.
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
pc_latex_longtable = """
|
||||
:bool
|
||||
This specifies if the to_latex method of a Dataframe uses the longtable
|
||||
format.
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
pc_latex_multicolumn = """
|
||||
: bool
|
||||
This specifies if the to_latex method of a Dataframe uses multicolumns
|
||||
to pretty-print MultiIndex columns.
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
pc_latex_multicolumn_format = """
|
||||
: string
|
||||
This specifies the format for multicolumn headers.
|
||||
Can be surrounded with '|'.
|
||||
Valid values: 'l', 'c', 'r', 'p{<width>}'
|
||||
"""
|
||||
|
||||
pc_latex_multirow = """
|
||||
: bool
|
||||
This specifies if the to_latex method of a Dataframe uses multirows
|
||||
to pretty-print MultiIndex rows.
|
||||
Valid values: False,True
|
||||
"""
|
||||
|
||||
|
||||
def table_schema_cb(key):
|
||||
from pandas.io.formats.printing import _enable_data_resource_formatter
|
||||
|
||||
_enable_data_resource_formatter(cf.get_option(key))
|
||||
|
||||
|
||||
def is_terminal():
|
||||
"""
|
||||
Detect if Python is running in a terminal.
|
||||
|
||||
Returns True if Python is running in a terminal or False if not.
|
||||
"""
|
||||
try:
|
||||
ip = get_ipython()
|
||||
except NameError: # assume standard Python interpreter in a terminal
|
||||
return True
|
||||
else:
|
||||
if hasattr(ip, "kernel"): # IPython as a Jupyter kernel
|
||||
return False
|
||||
else: # IPython in a terminal
|
||||
return True
|
||||
|
||||
|
||||
with cf.config_prefix("display"):
|
||||
cf.register_option("precision", 6, pc_precision_doc, validator=is_int)
|
||||
cf.register_option(
|
||||
"float_format",
|
||||
None,
|
||||
float_format_doc,
|
||||
validator=is_one_of_factory([None, is_callable]),
|
||||
)
|
||||
cf.register_option("column_space", 12, validator=is_int)
|
||||
cf.register_option(
|
||||
"max_info_rows",
|
||||
1690785,
|
||||
pc_max_info_rows_doc,
|
||||
validator=is_instance_factory((int, type(None))),
|
||||
)
|
||||
cf.register_option(
|
||||
"max_rows",
|
||||
60,
|
||||
pc_max_rows_doc,
|
||||
validator=is_instance_factory([type(None), int]),
|
||||
)
|
||||
cf.register_option(
|
||||
"min_rows",
|
||||
10,
|
||||
pc_min_rows_doc,
|
||||
validator=is_instance_factory([type(None), int]),
|
||||
)
|
||||
cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int)
|
||||
cf.register_option("max_colwidth", 50, max_colwidth_doc, validator=is_int)
|
||||
if is_terminal():
|
||||
max_cols = 0 # automatically determine optimal number of columns
|
||||
else:
|
||||
max_cols = 20 # cannot determine optimal number of columns
|
||||
cf.register_option(
|
||||
"max_columns",
|
||||
max_cols,
|
||||
pc_max_cols_doc,
|
||||
validator=is_instance_factory([type(None), int]),
|
||||
)
|
||||
cf.register_option(
|
||||
"large_repr",
|
||||
"truncate",
|
||||
pc_large_repr_doc,
|
||||
validator=is_one_of_factory(["truncate", "info"]),
|
||||
)
|
||||
cf.register_option("max_info_columns", 100, pc_max_info_cols_doc, validator=is_int)
|
||||
cf.register_option(
|
||||
"colheader_justify", "right", colheader_justify_doc, validator=is_text
|
||||
)
|
||||
cf.register_option("notebook_repr_html", True, pc_nb_repr_h_doc, validator=is_bool)
|
||||
cf.register_option("pprint_nest_depth", 3, pc_pprint_nest_depth, validator=is_int)
|
||||
cf.register_option("multi_sparse", True, pc_multi_sparse_doc, validator=is_bool)
|
||||
cf.register_option("expand_frame_repr", True, pc_expand_repr_doc)
|
||||
cf.register_option(
|
||||
"show_dimensions",
|
||||
"truncate",
|
||||
pc_show_dimensions_doc,
|
||||
validator=is_one_of_factory([True, False, "truncate"]),
|
||||
)
|
||||
cf.register_option("chop_threshold", None, pc_chop_threshold_doc)
|
||||
cf.register_option("max_seq_items", 100, pc_max_seq_items)
|
||||
cf.register_option(
|
||||
"width", 80, pc_width_doc, validator=is_instance_factory([type(None), int])
|
||||
)
|
||||
cf.register_option(
|
||||
"memory_usage",
|
||||
True,
|
||||
pc_memory_usage_doc,
|
||||
validator=is_one_of_factory([None, True, False, "deep"]),
|
||||
)
|
||||
cf.register_option(
|
||||
"unicode.east_asian_width", False, pc_east_asian_width_doc, validator=is_bool
|
||||
)
|
||||
cf.register_option(
|
||||
"unicode.ambiguous_as_wide", False, pc_east_asian_width_doc, validator=is_bool
|
||||
)
|
||||
cf.register_option("latex.repr", False, pc_latex_repr_doc, validator=is_bool)
|
||||
cf.register_option("latex.escape", True, pc_latex_escape, validator=is_bool)
|
||||
cf.register_option("latex.longtable", False, pc_latex_longtable, validator=is_bool)
|
||||
cf.register_option(
|
||||
"latex.multicolumn", True, pc_latex_multicolumn, validator=is_bool
|
||||
)
|
||||
cf.register_option(
|
||||
"latex.multicolumn_format", "l", pc_latex_multicolumn, validator=is_text
|
||||
)
|
||||
cf.register_option("latex.multirow", False, pc_latex_multirow, validator=is_bool)
|
||||
cf.register_option(
|
||||
"html.table_schema",
|
||||
False,
|
||||
pc_table_schema_doc,
|
||||
validator=is_bool,
|
||||
cb=table_schema_cb,
|
||||
)
|
||||
cf.register_option("html.border", 1, pc_html_border_doc, validator=is_int)
|
||||
cf.register_option(
|
||||
"html.use_mathjax", True, pc_html_use_mathjax_doc, validator=is_bool
|
||||
)
|
||||
|
||||
tc_sim_interactive_doc = """
|
||||
: boolean
|
||||
Whether to simulate interactive mode for purposes of testing
|
||||
"""
|
||||
|
||||
with cf.config_prefix("mode"):
|
||||
cf.register_option("sim_interactive", False, tc_sim_interactive_doc)
|
||||
|
||||
use_inf_as_null_doc = """
|
||||
: boolean
|
||||
use_inf_as_null had been deprecated and will be removed in a future
|
||||
version. Use `use_inf_as_na` instead.
|
||||
"""
|
||||
|
||||
use_inf_as_na_doc = """
|
||||
: boolean
|
||||
True means treat None, NaN, INF, -INF as NA (old way),
|
||||
False means None and NaN are null, but INF, -INF are not NA
|
||||
(new way).
|
||||
"""
|
||||
|
||||
# We don't want to start importing everything at the global context level
|
||||
# or we'll hit circular deps.
|
||||
|
||||
|
||||
def use_inf_as_na_cb(key):
|
||||
from pandas.core.dtypes.missing import _use_inf_as_na
|
||||
|
||||
_use_inf_as_na(key)
|
||||
|
||||
|
||||
with cf.config_prefix("mode"):
|
||||
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)
|
||||
cf.register_option(
|
||||
"use_inf_as_null", False, use_inf_as_null_doc, cb=use_inf_as_na_cb
|
||||
)
|
||||
|
||||
cf.deprecate_option(
|
||||
"mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na"
|
||||
)
|
||||
|
||||
|
||||
# user warnings
|
||||
chained_assignment = """
|
||||
: string
|
||||
Raise an exception, warn, or no action if trying to use chained assignment,
|
||||
The default is warn
|
||||
"""
|
||||
|
||||
with cf.config_prefix("mode"):
|
||||
cf.register_option(
|
||||
"chained_assignment",
|
||||
"warn",
|
||||
chained_assignment,
|
||||
validator=is_one_of_factory([None, "warn", "raise"]),
|
||||
)
|
||||
|
||||
|
||||
# Set up the io.excel specific reader configuration.
|
||||
reader_engine_doc = """
|
||||
: string
|
||||
The default Excel reader engine for '{ext}' files. Available options:
|
||||
auto, {others}.
|
||||
"""
|
||||
|
||||
_xls_options = ["xlrd"]
|
||||
_xlsm_options = ["xlrd", "openpyxl"]
|
||||
_xlsx_options = ["xlrd", "openpyxl"]
|
||||
_ods_options = ["odf"]
|
||||
|
||||
|
||||
with cf.config_prefix("io.excel.xls"):
|
||||
cf.register_option(
|
||||
"reader",
|
||||
"auto",
|
||||
reader_engine_doc.format(ext="xls", others=", ".join(_xls_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
with cf.config_prefix("io.excel.xlsm"):
|
||||
cf.register_option(
|
||||
"reader",
|
||||
"auto",
|
||||
reader_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
|
||||
with cf.config_prefix("io.excel.xlsx"):
|
||||
cf.register_option(
|
||||
"reader",
|
||||
"auto",
|
||||
reader_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
|
||||
with cf.config_prefix("io.excel.ods"):
|
||||
cf.register_option(
|
||||
"reader",
|
||||
"auto",
|
||||
reader_engine_doc.format(ext="ods", others=", ".join(_ods_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
|
||||
# Set up the io.excel specific writer configuration.
|
||||
writer_engine_doc = """
|
||||
: string
|
||||
The default Excel writer engine for '{ext}' files. Available options:
|
||||
auto, {others}.
|
||||
"""
|
||||
|
||||
_xls_options = ["xlwt"]
|
||||
_xlsm_options = ["openpyxl"]
|
||||
_xlsx_options = ["openpyxl", "xlsxwriter"]
|
||||
|
||||
|
||||
with cf.config_prefix("io.excel.xls"):
|
||||
cf.register_option(
|
||||
"writer",
|
||||
"auto",
|
||||
writer_engine_doc.format(ext="xls", others=", ".join(_xls_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
with cf.config_prefix("io.excel.xlsm"):
|
||||
cf.register_option(
|
||||
"writer",
|
||||
"auto",
|
||||
writer_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
|
||||
with cf.config_prefix("io.excel.xlsx"):
|
||||
cf.register_option(
|
||||
"writer",
|
||||
"auto",
|
||||
writer_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)),
|
||||
validator=str,
|
||||
)
|
||||
|
||||
|
||||
# Set up the io.parquet specific configuration.
|
||||
parquet_engine_doc = """
|
||||
: string
|
||||
The default parquet reader/writer engine. Available options:
|
||||
'auto', 'pyarrow', 'fastparquet', the default is 'auto'
|
||||
"""
|
||||
|
||||
with cf.config_prefix("io.parquet"):
|
||||
cf.register_option(
|
||||
"engine",
|
||||
"auto",
|
||||
parquet_engine_doc,
|
||||
validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]),
|
||||
)
|
||||
|
||||
# --------
|
||||
# Plotting
|
||||
# ---------
|
||||
|
||||
plotting_backend_doc = """
|
||||
: str
|
||||
The plotting backend to use. The default value is "matplotlib", the
|
||||
backend provided with pandas. Other backends can be specified by
|
||||
prodiving the name of the module that implements the backend.
|
||||
"""
|
||||
|
||||
|
||||
def register_plotting_backend_cb(key):
|
||||
backend_str = cf.get_option(key)
|
||||
if backend_str == "matplotlib":
|
||||
try:
|
||||
import pandas.plotting._matplotlib # noqa
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"matplotlib is required for plotting when the "
|
||||
'default backend "matplotlib" is selected.'
|
||||
)
|
||||
else:
|
||||
return
|
||||
|
||||
try:
|
||||
importlib.import_module(backend_str)
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
'"{}" does not seem to be an installed module. '
|
||||
"A pandas plotting backend must be a module that "
|
||||
"can be imported".format(backend_str)
|
||||
)
|
||||
|
||||
|
||||
with cf.config_prefix("plotting"):
|
||||
cf.register_option(
|
||||
"backend",
|
||||
defval="matplotlib",
|
||||
doc=plotting_backend_doc,
|
||||
validator=str,
|
||||
cb=register_plotting_backend_cb,
|
||||
)
|
||||
|
||||
|
||||
register_converter_doc = """
|
||||
: bool
|
||||
Whether to register converters with matplotlib's units registry for
|
||||
dates, times, datetimes, and Periods. Toggling to False will remove
|
||||
the converters, restoring any converters that pandas overwrote.
|
||||
"""
|
||||
|
||||
|
||||
def register_converter_cb(key):
|
||||
from pandas.plotting import register_matplotlib_converters
|
||||
from pandas.plotting import deregister_matplotlib_converters
|
||||
|
||||
if cf.get_option(key):
|
||||
register_matplotlib_converters()
|
||||
else:
|
||||
deregister_matplotlib_converters()
|
||||
|
||||
|
||||
with cf.config_prefix("plotting.matplotlib"):
|
||||
cf.register_option(
|
||||
"register_converters",
|
||||
True,
|
||||
register_converter_doc,
|
||||
validator=bool,
|
||||
cb=register_converter_cb,
|
||||
)
|
||||
47
venv/lib/python3.6/site-packages/pandas/core/dtypes/api.py
Normal file
47
venv/lib/python3.6/site-packages/pandas/core/dtypes/api.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# flake8: noqa
|
||||
|
||||
from .common import (
|
||||
is_array_like,
|
||||
is_bool,
|
||||
is_bool_dtype,
|
||||
is_categorical,
|
||||
is_categorical_dtype,
|
||||
is_complex,
|
||||
is_complex_dtype,
|
||||
is_datetime64_any_dtype,
|
||||
is_datetime64_dtype,
|
||||
is_datetime64_ns_dtype,
|
||||
is_datetime64tz_dtype,
|
||||
is_datetimetz,
|
||||
is_dict_like,
|
||||
is_dtype_equal,
|
||||
is_extension_array_dtype,
|
||||
is_extension_type,
|
||||
is_file_like,
|
||||
is_float,
|
||||
is_float_dtype,
|
||||
is_hashable,
|
||||
is_int64_dtype,
|
||||
is_integer,
|
||||
is_integer_dtype,
|
||||
is_interval,
|
||||
is_interval_dtype,
|
||||
is_iterator,
|
||||
is_list_like,
|
||||
is_named_tuple,
|
||||
is_number,
|
||||
is_numeric_dtype,
|
||||
is_object_dtype,
|
||||
is_period,
|
||||
is_period_dtype,
|
||||
is_re,
|
||||
is_re_compilable,
|
||||
is_scalar,
|
||||
is_signed_integer_dtype,
|
||||
is_sparse,
|
||||
is_string_dtype,
|
||||
is_timedelta64_dtype,
|
||||
is_timedelta64_ns_dtype,
|
||||
is_unsigned_integer_dtype,
|
||||
pandas_dtype,
|
||||
)
|
||||
298
venv/lib/python3.6/site-packages/pandas/core/dtypes/base.py
Normal file
298
venv/lib/python3.6/site-packages/pandas/core/dtypes/base.py
Normal file
@@ -0,0 +1,298 @@
|
||||
"""Extend pandas with custom array types"""
|
||||
from typing import List, Optional, Tuple, Type
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.errors import AbstractMethodError
|
||||
|
||||
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
|
||||
|
||||
|
||||
class ExtensionDtype:
|
||||
"""
|
||||
A custom data type, to be paired with an ExtensionArray.
|
||||
|
||||
.. versionadded:: 0.23.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
extensions.register_extension_dtype
|
||||
extensions.ExtensionArray
|
||||
|
||||
Notes
|
||||
-----
|
||||
The interface includes the following abstract methods that must
|
||||
be implemented by subclasses:
|
||||
|
||||
* type
|
||||
* name
|
||||
* construct_from_string
|
||||
|
||||
The following attributes influence the behavior of the dtype in
|
||||
pandas operations
|
||||
|
||||
* _is_numeric
|
||||
* _is_boolean
|
||||
|
||||
Optionally one can override construct_array_type for construction
|
||||
with the name of this dtype via the Registry. See
|
||||
:meth:`extensions.register_extension_dtype`.
|
||||
|
||||
* construct_array_type
|
||||
|
||||
The `na_value` class attribute can be used to set the default NA value
|
||||
for this type. :attr:`numpy.nan` is used by default.
|
||||
|
||||
ExtensionDtypes are required to be hashable. The base class provides
|
||||
a default implementation, which relies on the ``_metadata`` class
|
||||
attribute. ``_metadata`` should be a tuple containing the strings
|
||||
that define your data type. For example, with ``PeriodDtype`` that's
|
||||
the ``freq`` attribute.
|
||||
|
||||
**If you have a parametrized dtype you should set the ``_metadata``
|
||||
class property**.
|
||||
|
||||
Ideally, the attributes in ``_metadata`` will match the
|
||||
parameters to your ``ExtensionDtype.__init__`` (if any). If any of
|
||||
the attributes in ``_metadata`` don't implement the standard
|
||||
``__eq__`` or ``__hash__``, the default implementations here will not
|
||||
work.
|
||||
|
||||
.. versionchanged:: 0.24.0
|
||||
|
||||
Added ``_metadata``, ``__hash__``, and changed the default definition
|
||||
of ``__eq__``.
|
||||
|
||||
This class does not inherit from 'abc.ABCMeta' for performance reasons.
|
||||
Methods and properties required by the interface raise
|
||||
``pandas.errors.AbstractMethodError`` and no ``register`` method is
|
||||
provided for registering virtual subclasses.
|
||||
"""
|
||||
|
||||
_metadata = () # type: Tuple[str, ...]
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Check whether 'other' is equal to self.
|
||||
|
||||
By default, 'other' is considered equal if either
|
||||
|
||||
* it's a string matching 'self.name'.
|
||||
* it's an instance of this type and all of the
|
||||
the attributes in ``self._metadata`` are equal between
|
||||
`self` and `other`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : Any
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
if isinstance(other, str):
|
||||
try:
|
||||
other = self.construct_from_string(other)
|
||||
except TypeError:
|
||||
return False
|
||||
if isinstance(other, type(self)):
|
||||
return all(
|
||||
getattr(self, attr) == getattr(other, attr) for attr in self._metadata
|
||||
)
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash(tuple(getattr(self, attr) for attr in self._metadata))
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
@property
|
||||
def na_value(self):
|
||||
"""
|
||||
Default NA value to use for this type.
|
||||
|
||||
This is used in e.g. ExtensionArray.take. This should be the
|
||||
user-facing "boxed" version of the NA value, not the physical NA value
|
||||
for storage. e.g. for JSONArray, this is an empty dictionary.
|
||||
"""
|
||||
return np.nan
|
||||
|
||||
@property
|
||||
def type(self) -> Type:
|
||||
"""
|
||||
The scalar type for the array, e.g. ``int``
|
||||
|
||||
It's expected ``ExtensionArray[item]`` returns an instance
|
||||
of ``ExtensionDtype.type`` for scalar ``item``, assuming
|
||||
that value is valid (not NA). NA values do not need to be
|
||||
instances of `type`.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@property
|
||||
def kind(self) -> str:
|
||||
"""
|
||||
A character code (one of 'biufcmMOSUV'), default 'O'
|
||||
|
||||
This should match the NumPy dtype used when the array is
|
||||
converted to an ndarray, which is probably 'O' for object if
|
||||
the extension type cannot be represented as a built-in NumPy
|
||||
type.
|
||||
|
||||
See Also
|
||||
--------
|
||||
numpy.dtype.kind
|
||||
"""
|
||||
return "O"
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""
|
||||
A string identifying the data type.
|
||||
|
||||
Will be used for display in, e.g. ``Series.dtype``
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@property
|
||||
def names(self) -> Optional[List[str]]:
|
||||
"""Ordered list of field names, or None if there are no fields.
|
||||
|
||||
This is for compatibility with NumPy arrays, and may be removed in the
|
||||
future.
|
||||
"""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""
|
||||
Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string: str):
|
||||
r"""
|
||||
Construct this type from a string.
|
||||
|
||||
This is useful mainly for data types that accept parameters.
|
||||
For example, a period dtype accepts a frequency parameter that
|
||||
can be set as ``period[H]`` (where H means hourly frequency).
|
||||
|
||||
By default, in the abstract class, just the name of the type is
|
||||
expected. But subclasses can overwrite this method to accept
|
||||
parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
string : str
|
||||
The name of the type, for example ``category``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ExtensionDtype
|
||||
Instance of the dtype.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
If a class cannot be constructed from this 'string'.
|
||||
|
||||
Examples
|
||||
--------
|
||||
For extension dtypes with arguments the following may be an
|
||||
adequate implementation.
|
||||
|
||||
>>> @classmethod
|
||||
... def construct_from_string(cls, string):
|
||||
... pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
|
||||
... match = pattern.match(string)
|
||||
... if match:
|
||||
... return cls(**match.groupdict())
|
||||
... else:
|
||||
... raise TypeError("Cannot construct a '{}' from "
|
||||
... "'{}'".format(cls.__name__, string))
|
||||
"""
|
||||
if not isinstance(string, str):
|
||||
raise TypeError("Expects a string, got {}".format(type(string)))
|
||||
if string != cls.name:
|
||||
raise TypeError(
|
||||
"Cannot construct a '{}' from '{}'".format(cls.__name__, string)
|
||||
)
|
||||
return cls()
|
||||
|
||||
@classmethod
|
||||
def is_dtype(cls, dtype) -> bool:
|
||||
"""Check if we match 'dtype'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : object
|
||||
The object to check.
|
||||
|
||||
Returns
|
||||
-------
|
||||
is_dtype : bool
|
||||
|
||||
Notes
|
||||
-----
|
||||
The default implementation is True if
|
||||
|
||||
1. ``cls.construct_from_string(dtype)`` is an instance
|
||||
of ``cls``.
|
||||
2. ``dtype`` is an object and is an instance of ``cls``
|
||||
3. ``dtype`` has a ``dtype`` attribute, and any of the above
|
||||
conditions is true for ``dtype.dtype``.
|
||||
"""
|
||||
dtype = getattr(dtype, "dtype", dtype)
|
||||
|
||||
if isinstance(dtype, (ABCSeries, ABCIndexClass, ABCDataFrame, np.dtype)):
|
||||
# https://github.com/pandas-dev/pandas/issues/22960
|
||||
# avoid passing data to `construct_from_string`. This could
|
||||
# cause a FutureWarning from numpy about failing elementwise
|
||||
# comparison from, e.g., comparing DataFrame == 'category'.
|
||||
return False
|
||||
elif dtype is None:
|
||||
return False
|
||||
elif isinstance(dtype, cls):
|
||||
return True
|
||||
try:
|
||||
return cls.construct_from_string(dtype) is not None
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def _is_numeric(self) -> bool:
|
||||
"""
|
||||
Whether columns with this dtype should be considered numeric.
|
||||
|
||||
By default ExtensionDtypes are assumed to be non-numeric.
|
||||
They'll be excluded from operations that exclude non-numeric
|
||||
columns, like (groupby) reductions, plotting, etc.
|
||||
"""
|
||||
return False
|
||||
|
||||
@property
|
||||
def _is_boolean(self) -> bool:
|
||||
"""
|
||||
Whether this dtype should be considered boolean.
|
||||
|
||||
By default, ExtensionDtypes are assumed to be non-numeric.
|
||||
Setting this to True will affect the behavior of several places,
|
||||
e.g.
|
||||
|
||||
* is_bool
|
||||
* boolean indexing
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
return False
|
||||
1374
venv/lib/python3.6/site-packages/pandas/core/dtypes/cast.py
Normal file
1374
venv/lib/python3.6/site-packages/pandas/core/dtypes/cast.py
Normal file
File diff suppressed because it is too large
Load Diff
2069
venv/lib/python3.6/site-packages/pandas/core/dtypes/common.py
Normal file
2069
venv/lib/python3.6/site-packages/pandas/core/dtypes/common.py
Normal file
File diff suppressed because it is too large
Load Diff
595
venv/lib/python3.6/site-packages/pandas/core/dtypes/concat.py
Normal file
595
venv/lib/python3.6/site-packages/pandas/core/dtypes/concat.py
Normal file
@@ -0,0 +1,595 @@
|
||||
"""
|
||||
Utility functions related to concat
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import tslib, tslibs
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
_NS_DTYPE,
|
||||
_TD_DTYPE,
|
||||
is_bool_dtype,
|
||||
is_categorical_dtype,
|
||||
is_datetime64_dtype,
|
||||
is_datetime64tz_dtype,
|
||||
is_dtype_equal,
|
||||
is_extension_array_dtype,
|
||||
is_object_dtype,
|
||||
is_sparse,
|
||||
is_timedelta64_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDatetimeArray,
|
||||
ABCDatetimeIndex,
|
||||
ABCIndexClass,
|
||||
ABCPeriodIndex,
|
||||
ABCRangeIndex,
|
||||
ABCSparseDataFrame,
|
||||
ABCTimedeltaIndex,
|
||||
)
|
||||
|
||||
|
||||
def get_dtype_kinds(l):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
l : list of arrays
|
||||
|
||||
Returns
|
||||
-------
|
||||
a set of kinds that exist in this list of arrays
|
||||
"""
|
||||
|
||||
typs = set()
|
||||
for arr in l:
|
||||
|
||||
dtype = arr.dtype
|
||||
if is_categorical_dtype(dtype):
|
||||
typ = "category"
|
||||
elif is_sparse(arr):
|
||||
typ = "sparse"
|
||||
elif isinstance(arr, ABCRangeIndex):
|
||||
typ = "range"
|
||||
elif is_datetime64tz_dtype(arr):
|
||||
# if to_concat contains different tz,
|
||||
# the result must be object dtype
|
||||
typ = str(arr.dtype)
|
||||
elif is_datetime64_dtype(dtype):
|
||||
typ = "datetime"
|
||||
elif is_timedelta64_dtype(dtype):
|
||||
typ = "timedelta"
|
||||
elif is_object_dtype(dtype):
|
||||
typ = "object"
|
||||
elif is_bool_dtype(dtype):
|
||||
typ = "bool"
|
||||
elif is_extension_array_dtype(dtype):
|
||||
typ = str(arr.dtype)
|
||||
else:
|
||||
typ = dtype.kind
|
||||
typs.add(typ)
|
||||
return typs
|
||||
|
||||
|
||||
def _get_series_result_type(result, objs=None):
|
||||
"""
|
||||
return appropriate class of Series concat
|
||||
input is either dict or array-like
|
||||
"""
|
||||
from pandas import SparseSeries, SparseDataFrame, DataFrame
|
||||
|
||||
# concat Series with axis 1
|
||||
if isinstance(result, dict):
|
||||
# concat Series with axis 1
|
||||
if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()):
|
||||
return SparseDataFrame
|
||||
else:
|
||||
return DataFrame
|
||||
|
||||
# otherwise it is a SingleBlockManager (axis = 0)
|
||||
return objs[0]._constructor
|
||||
|
||||
|
||||
def _get_frame_result_type(result, objs):
|
||||
"""
|
||||
return appropriate class of DataFrame-like concat
|
||||
if all blocks are sparse, return SparseDataFrame
|
||||
otherwise, return 1st obj
|
||||
"""
|
||||
|
||||
if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)):
|
||||
from pandas.core.sparse.api import SparseDataFrame
|
||||
|
||||
return SparseDataFrame
|
||||
else:
|
||||
return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame))
|
||||
|
||||
|
||||
def _concat_compat(to_concat, axis=0):
|
||||
"""
|
||||
provide concatenation of an array of arrays each of which is a single
|
||||
'normalized' dtypes (in that for example, if it's object, then it is a
|
||||
non-datetimelike and provide a combined dtype for the resulting array that
|
||||
preserves the overall dtype if possible)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_concat : array of arrays
|
||||
axis : axis to provide concatenation
|
||||
|
||||
Returns
|
||||
-------
|
||||
a single array, preserving the combined dtypes
|
||||
"""
|
||||
|
||||
# filter empty arrays
|
||||
# 1-d dtypes always are included here
|
||||
def is_nonempty(x):
|
||||
try:
|
||||
return x.shape[axis] > 0
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
# If all arrays are empty, there's nothing to convert, just short-cut to
|
||||
# the concatenation, #3121.
|
||||
#
|
||||
# Creating an empty array directly is tempting, but the winnings would be
|
||||
# marginal given that it would still require shape & dtype calculation and
|
||||
# np.concatenate which has them both implemented is compiled.
|
||||
|
||||
typs = get_dtype_kinds(to_concat)
|
||||
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
|
||||
_contains_period = any(typ.startswith("period") for typ in typs)
|
||||
|
||||
if "category" in typs:
|
||||
# this must be prior to _concat_datetime,
|
||||
# to support Categorical + datetime-like
|
||||
return _concat_categorical(to_concat, axis=axis)
|
||||
|
||||
elif _contains_datetime or "timedelta" in typs or _contains_period:
|
||||
return _concat_datetime(to_concat, axis=axis, typs=typs)
|
||||
|
||||
# these are mandated to handle empties as well
|
||||
elif "sparse" in typs:
|
||||
return _concat_sparse(to_concat, axis=axis, typs=typs)
|
||||
|
||||
all_empty = all(not is_nonempty(x) for x in to_concat)
|
||||
if any(is_extension_array_dtype(x) for x in to_concat) and axis == 1:
|
||||
to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
|
||||
|
||||
if all_empty:
|
||||
# we have all empties, but may need to coerce the result dtype to
|
||||
# object if we have non-numeric type operands (numpy would otherwise
|
||||
# cast this to float)
|
||||
typs = get_dtype_kinds(to_concat)
|
||||
if len(typs) != 1:
|
||||
|
||||
if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}):
|
||||
# let numpy coerce
|
||||
pass
|
||||
else:
|
||||
# coerce to object
|
||||
to_concat = [x.astype("object") for x in to_concat]
|
||||
|
||||
return np.concatenate(to_concat, axis=axis)
|
||||
|
||||
|
||||
def _concat_categorical(to_concat, axis=0):
|
||||
"""Concatenate an object/categorical array of arrays, each of which is a
|
||||
single dtype
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_concat : array of arrays
|
||||
axis : int
|
||||
Axis to provide concatenation in the current implementation this is
|
||||
always 0, e.g. we only have 1D categoricals
|
||||
|
||||
Returns
|
||||
-------
|
||||
Categorical
|
||||
A single array, preserving the combined dtypes
|
||||
"""
|
||||
|
||||
# we could have object blocks and categoricals here
|
||||
# if we only have a single categoricals then combine everything
|
||||
# else its a non-compat categorical
|
||||
categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)]
|
||||
|
||||
# validate the categories
|
||||
if len(categoricals) != len(to_concat):
|
||||
pass
|
||||
else:
|
||||
# when all categories are identical
|
||||
first = to_concat[0]
|
||||
if all(first.is_dtype_equal(other) for other in to_concat[1:]):
|
||||
return union_categoricals(categoricals)
|
||||
|
||||
# extract the categoricals & coerce to object if needed
|
||||
to_concat = [
|
||||
x._internal_get_values()
|
||||
if is_categorical_dtype(x.dtype)
|
||||
else np.asarray(x).ravel()
|
||||
if not is_datetime64tz_dtype(x)
|
||||
else np.asarray(x.astype(object))
|
||||
for x in to_concat
|
||||
]
|
||||
result = _concat_compat(to_concat)
|
||||
if axis == 1:
|
||||
result = result.reshape(1, len(result))
|
||||
return result
|
||||
|
||||
|
||||
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
|
||||
"""
|
||||
Combine list-like of Categorical-like, unioning categories. All
|
||||
categories must have the same dtype.
|
||||
|
||||
.. versionadded:: 0.19.0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_union : list-like of Categorical, CategoricalIndex,
|
||||
or Series with dtype='category'
|
||||
sort_categories : boolean, default False
|
||||
If true, resulting categories will be lexsorted, otherwise
|
||||
they will be ordered as they appear in the data.
|
||||
ignore_order : boolean, default False
|
||||
If true, the ordered attribute of the Categoricals will be ignored.
|
||||
Results in an unordered categorical.
|
||||
|
||||
.. versionadded:: 0.20.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : Categorical
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
- all inputs do not have the same dtype
|
||||
- all inputs do not have the same ordered property
|
||||
- all inputs are ordered and their categories are not identical
|
||||
- sort_categories=True and Categoricals are ordered
|
||||
ValueError
|
||||
Empty list of categoricals passed
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
To learn more about categories, see `link
|
||||
<http://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html#unioning>`__
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from pandas.api.types import union_categoricals
|
||||
|
||||
If you want to combine categoricals that do not necessarily have
|
||||
the same categories, `union_categoricals` will combine a list-like
|
||||
of categoricals. The new categories will be the union of the
|
||||
categories being combined.
|
||||
|
||||
>>> a = pd.Categorical(["b", "c"])
|
||||
>>> b = pd.Categorical(["a", "b"])
|
||||
>>> union_categoricals([a, b])
|
||||
[b, c, a, b]
|
||||
Categories (3, object): [b, c, a]
|
||||
|
||||
By default, the resulting categories will be ordered as they appear
|
||||
in the `categories` of the data. If you want the categories to be
|
||||
lexsorted, use `sort_categories=True` argument.
|
||||
|
||||
>>> union_categoricals([a, b], sort_categories=True)
|
||||
[b, c, a, b]
|
||||
Categories (3, object): [a, b, c]
|
||||
|
||||
`union_categoricals` also works with the case of combining two
|
||||
categoricals of the same categories and order information (e.g. what
|
||||
you could also `append` for).
|
||||
|
||||
>>> a = pd.Categorical(["a", "b"], ordered=True)
|
||||
>>> b = pd.Categorical(["a", "b", "a"], ordered=True)
|
||||
>>> union_categoricals([a, b])
|
||||
[a, b, a, b, a]
|
||||
Categories (2, object): [a < b]
|
||||
|
||||
Raises `TypeError` because the categories are ordered and not identical.
|
||||
|
||||
>>> a = pd.Categorical(["a", "b"], ordered=True)
|
||||
>>> b = pd.Categorical(["a", "b", "c"], ordered=True)
|
||||
>>> union_categoricals([a, b])
|
||||
TypeError: to union ordered Categoricals, all categories must be the same
|
||||
|
||||
New in version 0.20.0
|
||||
|
||||
Ordered categoricals with different categories or orderings can be
|
||||
combined by using the `ignore_ordered=True` argument.
|
||||
|
||||
>>> a = pd.Categorical(["a", "b", "c"], ordered=True)
|
||||
>>> b = pd.Categorical(["c", "b", "a"], ordered=True)
|
||||
>>> union_categoricals([a, b], ignore_order=True)
|
||||
[a, b, c, c, b, a]
|
||||
Categories (3, object): [a, b, c]
|
||||
|
||||
`union_categoricals` also works with a `CategoricalIndex`, or `Series`
|
||||
containing categorical data, but note that the resulting array will
|
||||
always be a plain `Categorical`
|
||||
|
||||
>>> a = pd.Series(["b", "c"], dtype='category')
|
||||
>>> b = pd.Series(["a", "b"], dtype='category')
|
||||
>>> union_categoricals([a, b])
|
||||
[b, c, a, b]
|
||||
Categories (3, object): [b, c, a]
|
||||
"""
|
||||
from pandas import Index, Categorical, CategoricalIndex, Series
|
||||
from pandas.core.arrays.categorical import _recode_for_categories
|
||||
|
||||
if len(to_union) == 0:
|
||||
raise ValueError("No Categoricals to union")
|
||||
|
||||
def _maybe_unwrap(x):
|
||||
if isinstance(x, (CategoricalIndex, Series)):
|
||||
return x.values
|
||||
elif isinstance(x, Categorical):
|
||||
return x
|
||||
else:
|
||||
raise TypeError("all components to combine must be Categorical")
|
||||
|
||||
to_union = [_maybe_unwrap(x) for x in to_union]
|
||||
first = to_union[0]
|
||||
|
||||
if not all(
|
||||
is_dtype_equal(other.categories.dtype, first.categories.dtype)
|
||||
for other in to_union[1:]
|
||||
):
|
||||
raise TypeError("dtype of categories must be the same")
|
||||
|
||||
ordered = False
|
||||
if all(first.is_dtype_equal(other) for other in to_union[1:]):
|
||||
# identical categories - fastpath
|
||||
categories = first.categories
|
||||
ordered = first.ordered
|
||||
|
||||
if all(first.categories.equals(other.categories) for other in to_union[1:]):
|
||||
new_codes = np.concatenate([c.codes for c in to_union])
|
||||
else:
|
||||
codes = [first.codes] + [
|
||||
_recode_for_categories(other.codes, other.categories, first.categories)
|
||||
for other in to_union[1:]
|
||||
]
|
||||
new_codes = np.concatenate(codes)
|
||||
|
||||
if sort_categories and not ignore_order and ordered:
|
||||
raise TypeError(
|
||||
"Cannot use sort_categories=True with " "ordered Categoricals"
|
||||
)
|
||||
|
||||
if sort_categories and not categories.is_monotonic_increasing:
|
||||
categories = categories.sort_values()
|
||||
indexer = categories.get_indexer(first.categories)
|
||||
|
||||
from pandas.core.algorithms import take_1d
|
||||
|
||||
new_codes = take_1d(indexer, new_codes, fill_value=-1)
|
||||
elif ignore_order or all(not c.ordered for c in to_union):
|
||||
# different categories - union and recode
|
||||
cats = first.categories.append([c.categories for c in to_union[1:]])
|
||||
categories = Index(cats.unique())
|
||||
if sort_categories:
|
||||
categories = categories.sort_values()
|
||||
|
||||
new_codes = [
|
||||
_recode_for_categories(c.codes, c.categories, categories) for c in to_union
|
||||
]
|
||||
new_codes = np.concatenate(new_codes)
|
||||
else:
|
||||
# ordered - to show a proper error message
|
||||
if all(c.ordered for c in to_union):
|
||||
msg = "to union ordered Categoricals, " "all categories must be the same"
|
||||
raise TypeError(msg)
|
||||
else:
|
||||
raise TypeError("Categorical.ordered must be the same")
|
||||
|
||||
if ignore_order:
|
||||
ordered = False
|
||||
|
||||
return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
|
||||
|
||||
|
||||
def _concatenate_2d(to_concat, axis):
|
||||
# coerce to 2d if needed & concatenate
|
||||
if axis == 1:
|
||||
to_concat = [np.atleast_2d(x) for x in to_concat]
|
||||
return np.concatenate(to_concat, axis=axis)
|
||||
|
||||
|
||||
def _concat_datetime(to_concat, axis=0, typs=None):
|
||||
"""
|
||||
provide concatenation of an datetimelike array of arrays each of which is a
|
||||
single M8[ns], datetimet64[ns, tz] or m8[ns] dtype
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_concat : array of arrays
|
||||
axis : axis to provide concatenation
|
||||
typs : set of to_concat dtypes
|
||||
|
||||
Returns
|
||||
-------
|
||||
a single array, preserving the combined dtypes
|
||||
"""
|
||||
|
||||
if typs is None:
|
||||
typs = get_dtype_kinds(to_concat)
|
||||
|
||||
# multiple types, need to coerce to object
|
||||
if len(typs) != 1:
|
||||
return _concatenate_2d(
|
||||
[_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
|
||||
)
|
||||
|
||||
# must be single dtype
|
||||
if any(typ.startswith("datetime") for typ in typs):
|
||||
|
||||
if "datetime" in typs:
|
||||
to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
|
||||
return _concatenate_2d(to_concat, axis=axis).view(_NS_DTYPE)
|
||||
else:
|
||||
# when to_concat has different tz, len(typs) > 1.
|
||||
# thus no need to care
|
||||
return _concat_datetimetz(to_concat)
|
||||
|
||||
elif "timedelta" in typs:
|
||||
return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
|
||||
_TD_DTYPE
|
||||
)
|
||||
|
||||
elif any(typ.startswith("period") for typ in typs):
|
||||
assert len(typs) == 1
|
||||
cls = to_concat[0]
|
||||
new_values = cls._concat_same_type(to_concat)
|
||||
return new_values
|
||||
|
||||
|
||||
def _convert_datetimelike_to_object(x):
|
||||
# coerce datetimelike array to object dtype
|
||||
|
||||
# if dtype is of datetimetz or timezone
|
||||
if x.dtype.kind == _NS_DTYPE.kind:
|
||||
if getattr(x, "tz", None) is not None:
|
||||
x = np.asarray(x.astype(object))
|
||||
else:
|
||||
shape = x.shape
|
||||
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
|
||||
x = x.reshape(shape)
|
||||
|
||||
elif x.dtype == _TD_DTYPE:
|
||||
shape = x.shape
|
||||
x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
|
||||
x = x.reshape(shape)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _concat_datetimetz(to_concat, name=None):
|
||||
"""
|
||||
concat DatetimeIndex with the same tz
|
||||
all inputs must be DatetimeIndex
|
||||
it is used in DatetimeIndex.append also
|
||||
"""
|
||||
# Right now, internals will pass a List[DatetimeArray] here
|
||||
# for reductions like quantile. I would like to disentangle
|
||||
# all this before we get here.
|
||||
sample = to_concat[0]
|
||||
|
||||
if isinstance(sample, ABCIndexClass):
|
||||
return sample._concat_same_dtype(to_concat, name=name)
|
||||
elif isinstance(sample, ABCDatetimeArray):
|
||||
return sample._concat_same_type(to_concat)
|
||||
|
||||
|
||||
def _concat_index_same_dtype(indexes, klass=None):
|
||||
klass = klass if klass is not None else indexes[0].__class__
|
||||
return klass(np.concatenate([x._values for x in indexes]))
|
||||
|
||||
|
||||
def _concat_index_asobject(to_concat, name=None):
|
||||
"""
|
||||
concat all inputs as object. DatetimeIndex, TimedeltaIndex and
|
||||
PeriodIndex are converted to object dtype before concatenation
|
||||
"""
|
||||
from pandas import Index
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray)
|
||||
to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat]
|
||||
|
||||
self = to_concat[0]
|
||||
attribs = self._get_attributes_dict()
|
||||
attribs["name"] = name
|
||||
|
||||
to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]
|
||||
|
||||
return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
|
||||
|
||||
|
||||
def _concat_sparse(to_concat, axis=0, typs=None):
|
||||
"""
|
||||
provide concatenation of an sparse/dense array of arrays each of which is a
|
||||
single dtype
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_concat : array of arrays
|
||||
axis : axis to provide concatenation
|
||||
typs : set of to_concat dtypes
|
||||
|
||||
Returns
|
||||
-------
|
||||
a single array, preserving the combined dtypes
|
||||
"""
|
||||
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)]
|
||||
fill_value = fill_values[0]
|
||||
|
||||
# TODO: Fix join unit generation so we aren't passed this.
|
||||
to_concat = [
|
||||
x
|
||||
if isinstance(x, SparseArray)
|
||||
else SparseArray(x.squeeze(), fill_value=fill_value)
|
||||
for x in to_concat
|
||||
]
|
||||
|
||||
return SparseArray._concat_same_type(to_concat)
|
||||
|
||||
|
||||
def _concat_rangeindex_same_dtype(indexes):
|
||||
"""
|
||||
Concatenates multiple RangeIndex instances. All members of "indexes" must
|
||||
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
|
||||
otherwise. E.g.:
|
||||
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
|
||||
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
|
||||
"""
|
||||
from pandas import Int64Index, RangeIndex
|
||||
|
||||
start = step = next_ = None
|
||||
|
||||
# Filter the empty indexes
|
||||
non_empty_indexes = [obj for obj in indexes if len(obj)]
|
||||
|
||||
for obj in non_empty_indexes:
|
||||
rng = obj._range # type: range
|
||||
|
||||
if start is None:
|
||||
# This is set by the first non-empty index
|
||||
start = rng.start
|
||||
if step is None and len(rng) > 1:
|
||||
step = rng.step
|
||||
elif step is None:
|
||||
# First non-empty index had only one element
|
||||
if rng.start == start:
|
||||
return _concat_index_same_dtype(indexes, klass=Int64Index)
|
||||
step = rng.start - start
|
||||
|
||||
non_consecutive = (step != rng.step and len(rng) > 1) or (
|
||||
next_ is not None and rng.start != next_
|
||||
)
|
||||
if non_consecutive:
|
||||
return _concat_index_same_dtype(indexes, klass=Int64Index)
|
||||
|
||||
if step is not None:
|
||||
next_ = rng[-1] + step
|
||||
|
||||
if non_empty_indexes:
|
||||
# Get the stop value from "next" or alternatively
|
||||
# from the last non-empty index
|
||||
stop = non_empty_indexes[-1].stop if next_ is None else next_
|
||||
return RangeIndex(start, stop, step)
|
||||
|
||||
# Here all "indexes" had 0 length, i.e. were empty.
|
||||
# In this case return an empty range index.
|
||||
return RangeIndex(0, 0)
|
||||
1110
venv/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py
Normal file
1110
venv/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user