8th day of python challenges 111-117
This commit is contained in:
12
venv/lib/python3.6/site-packages/pandas/io/json/__init__.py
Normal file
12
venv/lib/python3.6/site-packages/pandas/io/json/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from pandas.io.json._json import dumps, loads, read_json, to_json
|
||||
from pandas.io.json._normalize import json_normalize
|
||||
from pandas.io.json._table_schema import build_table_schema
|
||||
|
||||
__all__ = [
|
||||
"dumps",
|
||||
"loads",
|
||||
"read_json",
|
||||
"to_json",
|
||||
"json_normalize",
|
||||
"build_table_schema",
|
||||
]
|
1188
venv/lib/python3.6/site-packages/pandas/io/json/_json.py
Normal file
1188
venv/lib/python3.6/site-packages/pandas/io/json/_json.py
Normal file
File diff suppressed because it is too large
Load Diff
343
venv/lib/python3.6/site-packages/pandas/io/json/_normalize.py
Normal file
343
venv/lib/python3.6/site-packages/pandas/io/json/_normalize.py
Normal file
@@ -0,0 +1,343 @@
|
||||
# ---------------------------------------------------------------------
|
||||
# JSON normalization routines
|
||||
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
from typing import DefaultDict, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.writers import convert_json_to_lines
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
def convert_to_line_delimits(s):
|
||||
"""
|
||||
Helper function that converts JSON lists to line delimited JSON.
|
||||
"""
|
||||
|
||||
# Determine we have a JSON list to turn to lines otherwise just return the
|
||||
# json object, only lists can
|
||||
if not s[0] == "[" and s[-1] == "]":
|
||||
return s
|
||||
s = s[1:-1]
|
||||
|
||||
return convert_json_to_lines(s)
|
||||
|
||||
|
||||
def nested_to_record(
|
||||
ds,
|
||||
prefix: str = "",
|
||||
sep: str = ".",
|
||||
level: int = 0,
|
||||
max_level: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
A simplified json_normalize
|
||||
|
||||
Converts a nested dict into a flat dict ("record"), unlike json_normalize,
|
||||
it does not attempt to extract a subset of the data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ds : dict or list of dicts
|
||||
prefix: the prefix, optional, default: ""
|
||||
sep : str, default '.'
|
||||
Nested records will generate names separated by sep,
|
||||
e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
|
||||
|
||||
.. versionadded:: 0.20.0
|
||||
|
||||
level: int, optional, default: 0
|
||||
The number of levels in the json string.
|
||||
|
||||
max_level: int, optional, default: None
|
||||
The max depth to normalize.
|
||||
|
||||
.. versionadded:: 0.25.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
d - dict or list of dicts, matching `ds`
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
|
||||
nested=dict(e=dict(c=1,d=2),d=2)))
|
||||
Out[52]:
|
||||
{'dict1.c': 1,
|
||||
'dict1.d': 2,
|
||||
'flat1': 1,
|
||||
'nested.d': 2,
|
||||
'nested.e.c': 1,
|
||||
'nested.e.d': 2}
|
||||
"""
|
||||
singleton = False
|
||||
if isinstance(ds, dict):
|
||||
ds = [ds]
|
||||
singleton = True
|
||||
new_ds = []
|
||||
for d in ds:
|
||||
new_d = copy.deepcopy(d)
|
||||
for k, v in d.items():
|
||||
# each key gets renamed with prefix
|
||||
if not isinstance(k, str):
|
||||
k = str(k)
|
||||
if level == 0:
|
||||
newkey = k
|
||||
else:
|
||||
newkey = prefix + sep + k
|
||||
|
||||
# flatten if type is dict and
|
||||
# current dict level < maximum level provided and
|
||||
# only dicts gets recurse-flattened
|
||||
# only at level>1 do we rename the rest of the keys
|
||||
if not isinstance(v, dict) or (
|
||||
max_level is not None and level >= max_level
|
||||
):
|
||||
if level != 0: # so we skip copying for top level, common case
|
||||
v = new_d.pop(k)
|
||||
new_d[newkey] = v
|
||||
continue
|
||||
else:
|
||||
v = new_d.pop(k)
|
||||
new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level))
|
||||
new_ds.append(new_d)
|
||||
|
||||
if singleton:
|
||||
return new_ds[0]
|
||||
return new_ds
|
||||
|
||||
|
||||
def json_normalize(
|
||||
data: Union[Dict, List[Dict]],
|
||||
record_path: Optional[Union[str, List]] = None,
|
||||
meta: Optional[Union[str, List]] = None,
|
||||
meta_prefix: Optional[str] = None,
|
||||
record_prefix: Optional[str] = None,
|
||||
errors: Optional[str] = "raise",
|
||||
sep: str = ".",
|
||||
max_level: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Normalize semi-structured JSON data into a flat table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : dict or list of dicts
|
||||
Unserialized JSON objects.
|
||||
record_path : str or list of str, default None
|
||||
Path in each object to list of records. If not passed, data will be
|
||||
assumed to be an array of records.
|
||||
meta : list of paths (str or list of str), default None
|
||||
Fields to use as metadata for each record in resulting table.
|
||||
meta_prefix : str, default None
|
||||
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
|
||||
meta is ['foo', 'bar'].
|
||||
record_prefix : str, default None
|
||||
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
|
||||
path to records is ['foo', 'bar'].
|
||||
errors : {'raise', 'ignore'}, default 'raise'
|
||||
Configures error handling.
|
||||
|
||||
* 'ignore' : will ignore KeyError if keys listed in meta are not
|
||||
always present.
|
||||
* 'raise' : will raise KeyError if keys listed in meta are not
|
||||
always present.
|
||||
|
||||
.. versionadded:: 0.20.0
|
||||
|
||||
sep : str, default '.'
|
||||
Nested records will generate names separated by sep.
|
||||
e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar.
|
||||
|
||||
.. versionadded:: 0.20.0
|
||||
|
||||
max_level : int, default None
|
||||
Max number of levels(depth of dict) to normalize.
|
||||
if None, normalizes all levels.
|
||||
|
||||
.. versionadded:: 0.25.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
frame : DataFrame
|
||||
Normalize semi-structured JSON data into a flat table.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from pandas.io.json import json_normalize
|
||||
>>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
|
||||
... {'name': {'given': 'Mose', 'family': 'Regner'}},
|
||||
... {'id': 2, 'name': 'Faye Raker'}]
|
||||
>>> json_normalize(data)
|
||||
id name name.family name.first name.given name.last
|
||||
0 1.0 NaN NaN Coleen NaN Volk
|
||||
1 NaN NaN Regner NaN Mose NaN
|
||||
2 2.0 Faye Raker NaN NaN NaN NaN
|
||||
|
||||
>>> data = [{'id': 1,
|
||||
... 'name': "Cole Volk",
|
||||
... 'fitness': {'height': 130, 'weight': 60}},
|
||||
... {'name': "Mose Reg",
|
||||
... 'fitness': {'height': 130, 'weight': 60}},
|
||||
... {'id': 2, 'name': 'Faye Raker',
|
||||
... 'fitness': {'height': 130, 'weight': 60}}]
|
||||
>>> json_normalize(data, max_level=0)
|
||||
fitness id name
|
||||
0 {'height': 130, 'weight': 60} 1.0 Cole Volk
|
||||
1 {'height': 130, 'weight': 60} NaN Mose Reg
|
||||
2 {'height': 130, 'weight': 60} 2.0 Faye Raker
|
||||
|
||||
Normalizes nested data upto level 1.
|
||||
|
||||
>>> data = [{'id': 1,
|
||||
... 'name': "Cole Volk",
|
||||
... 'fitness': {'height': 130, 'weight': 60}},
|
||||
... {'name': "Mose Reg",
|
||||
... 'fitness': {'height': 130, 'weight': 60}},
|
||||
... {'id': 2, 'name': 'Faye Raker',
|
||||
... 'fitness': {'height': 130, 'weight': 60}}]
|
||||
>>> json_normalize(data, max_level=1)
|
||||
fitness.height fitness.weight id name
|
||||
0 130 60 1.0 Cole Volk
|
||||
1 130 60 NaN Mose Reg
|
||||
2 130 60 2.0 Faye Raker
|
||||
|
||||
>>> data = [{'state': 'Florida',
|
||||
... 'shortname': 'FL',
|
||||
... 'info': {'governor': 'Rick Scott'},
|
||||
... 'counties': [{'name': 'Dade', 'population': 12345},
|
||||
... {'name': 'Broward', 'population': 40000},
|
||||
... {'name': 'Palm Beach', 'population': 60000}]},
|
||||
... {'state': 'Ohio',
|
||||
... 'shortname': 'OH',
|
||||
... 'info': {'governor': 'John Kasich'},
|
||||
... 'counties': [{'name': 'Summit', 'population': 1234},
|
||||
... {'name': 'Cuyahoga', 'population': 1337}]}]
|
||||
>>> result = json_normalize(data, 'counties', ['state', 'shortname',
|
||||
... ['info', 'governor']])
|
||||
>>> result
|
||||
name population state shortname info.governor
|
||||
0 Dade 12345 Florida FL Rick Scott
|
||||
1 Broward 40000 Florida FL Rick Scott
|
||||
2 Palm Beach 60000 Florida FL Rick Scott
|
||||
3 Summit 1234 Ohio OH John Kasich
|
||||
4 Cuyahoga 1337 Ohio OH John Kasich
|
||||
|
||||
>>> data = {'A': [1, 2]}
|
||||
>>> json_normalize(data, 'A', record_prefix='Prefix.')
|
||||
Prefix.0
|
||||
0 1
|
||||
1 2
|
||||
|
||||
Returns normalized data with columns prefixed with the given string.
|
||||
"""
|
||||
|
||||
def _pull_field(js, spec):
|
||||
result = js
|
||||
if isinstance(spec, list):
|
||||
for field in spec:
|
||||
result = result[field]
|
||||
else:
|
||||
result = result[spec]
|
||||
|
||||
return result
|
||||
|
||||
if isinstance(data, list) and not data:
|
||||
return DataFrame()
|
||||
|
||||
# A bit of a hackjob
|
||||
if isinstance(data, dict):
|
||||
data = [data]
|
||||
|
||||
if record_path is None:
|
||||
if any([isinstance(x, dict) for x in y.values()] for y in data):
|
||||
# naive normalization, this is idempotent for flat records
|
||||
# and potentially will inflate the data considerably for
|
||||
# deeply nested structures:
|
||||
# {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
|
||||
#
|
||||
# TODO: handle record value which are lists, at least error
|
||||
# reasonably
|
||||
data = nested_to_record(data, sep=sep, max_level=max_level)
|
||||
return DataFrame(data)
|
||||
elif not isinstance(record_path, list):
|
||||
record_path = [record_path]
|
||||
|
||||
if meta is None:
|
||||
meta = []
|
||||
elif not isinstance(meta, list):
|
||||
meta = [meta]
|
||||
|
||||
meta = [m if isinstance(m, list) else [m] for m in meta]
|
||||
|
||||
# Disastrously inefficient for now
|
||||
records = [] # type: List
|
||||
lengths = []
|
||||
|
||||
meta_vals = defaultdict(list) # type: DefaultDict
|
||||
meta_keys = [sep.join(val) for val in meta]
|
||||
|
||||
def _recursive_extract(data, path, seen_meta, level=0):
|
||||
if isinstance(data, dict):
|
||||
data = [data]
|
||||
if len(path) > 1:
|
||||
for obj in data:
|
||||
for val, key in zip(meta, meta_keys):
|
||||
if level + 1 == len(val):
|
||||
seen_meta[key] = _pull_field(obj, val[-1])
|
||||
|
||||
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
|
||||
else:
|
||||
for obj in data:
|
||||
recs = _pull_field(obj, path[0])
|
||||
recs = [
|
||||
nested_to_record(r, sep=sep, max_level=max_level)
|
||||
if isinstance(r, dict)
|
||||
else r
|
||||
for r in recs
|
||||
]
|
||||
|
||||
# For repeating the metadata later
|
||||
lengths.append(len(recs))
|
||||
for val, key in zip(meta, meta_keys):
|
||||
if level + 1 > len(val):
|
||||
meta_val = seen_meta[key]
|
||||
else:
|
||||
try:
|
||||
meta_val = _pull_field(obj, val[level:])
|
||||
except KeyError as e:
|
||||
if errors == "ignore":
|
||||
meta_val = np.nan
|
||||
else:
|
||||
raise KeyError(
|
||||
"Try running with "
|
||||
"errors='ignore' as key "
|
||||
"{err} is not always present".format(err=e)
|
||||
)
|
||||
meta_vals[key].append(meta_val)
|
||||
records.extend(recs)
|
||||
|
||||
_recursive_extract(data, record_path, {}, level=0)
|
||||
|
||||
result = DataFrame(records)
|
||||
|
||||
if record_prefix is not None:
|
||||
result = result.rename(columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
|
||||
|
||||
# Data types, a problem
|
||||
for k, v in meta_vals.items():
|
||||
if meta_prefix is not None:
|
||||
k = meta_prefix + k
|
||||
|
||||
if k in result:
|
||||
raise ValueError(
|
||||
"Conflicting metadata name {name}, "
|
||||
"need distinguishing prefix ".format(name=k)
|
||||
)
|
||||
result[k] = np.array(v, dtype=object).repeat(lengths)
|
||||
return result
|
338
venv/lib/python3.6/site-packages/pandas/io/json/_table_schema.py
Normal file
338
venv/lib/python3.6/site-packages/pandas/io/json/_table_schema.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Table Schema builders
|
||||
|
||||
http://specs.frictionlessdata.io/json-table-schema/
|
||||
"""
|
||||
import warnings
|
||||
|
||||
import pandas._libs.json as json
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_categorical_dtype,
|
||||
is_datetime64_dtype,
|
||||
is_datetime64tz_dtype,
|
||||
is_integer_dtype,
|
||||
is_numeric_dtype,
|
||||
is_period_dtype,
|
||||
is_string_dtype,
|
||||
is_timedelta64_dtype,
|
||||
)
|
||||
|
||||
from pandas import DataFrame
|
||||
from pandas.api.types import CategoricalDtype
|
||||
import pandas.core.common as com
|
||||
|
||||
loads = json.loads
|
||||
|
||||
|
||||
def as_json_table_type(x):
|
||||
"""
|
||||
Convert a NumPy / pandas type to its corresponding json_table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array or dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : str
|
||||
the Table Schema data types
|
||||
|
||||
Notes
|
||||
-----
|
||||
This table shows the relationship between NumPy / pandas dtypes,
|
||||
and Table Schema dtypes.
|
||||
|
||||
============== =================
|
||||
Pandas type Table Schema type
|
||||
============== =================
|
||||
int64 integer
|
||||
float64 number
|
||||
bool boolean
|
||||
datetime64[ns] datetime
|
||||
timedelta64[ns] duration
|
||||
object str
|
||||
categorical any
|
||||
=============== =================
|
||||
"""
|
||||
if is_integer_dtype(x):
|
||||
return "integer"
|
||||
elif is_bool_dtype(x):
|
||||
return "boolean"
|
||||
elif is_numeric_dtype(x):
|
||||
return "number"
|
||||
elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x):
|
||||
return "datetime"
|
||||
elif is_timedelta64_dtype(x):
|
||||
return "duration"
|
||||
elif is_categorical_dtype(x):
|
||||
return "any"
|
||||
elif is_string_dtype(x):
|
||||
return "string"
|
||||
else:
|
||||
return "any"
|
||||
|
||||
|
||||
def set_default_names(data):
|
||||
"""Sets index names to 'index' for regular, or 'level_x' for Multi"""
|
||||
if com._all_not_none(*data.index.names):
|
||||
nms = data.index.names
|
||||
if len(nms) == 1 and data.index.name == "index":
|
||||
warnings.warn("Index name of 'index' is not round-trippable")
|
||||
elif len(nms) > 1 and any(x.startswith("level_") for x in nms):
|
||||
warnings.warn(
|
||||
"Index names beginning with 'level_' are not " "round-trippable"
|
||||
)
|
||||
return data
|
||||
|
||||
data = data.copy()
|
||||
if data.index.nlevels > 1:
|
||||
names = [
|
||||
name if name is not None else "level_{}".format(i)
|
||||
for i, name in enumerate(data.index.names)
|
||||
]
|
||||
data.index.names = names
|
||||
else:
|
||||
data.index.name = data.index.name or "index"
|
||||
return data
|
||||
|
||||
|
||||
def convert_pandas_type_to_json_field(arr, dtype=None):
|
||||
dtype = dtype or arr.dtype
|
||||
if arr.name is None:
|
||||
name = "values"
|
||||
else:
|
||||
name = arr.name
|
||||
field = {"name": name, "type": as_json_table_type(dtype)}
|
||||
|
||||
if is_categorical_dtype(arr):
|
||||
if hasattr(arr, "categories"):
|
||||
cats = arr.categories
|
||||
ordered = arr.ordered
|
||||
else:
|
||||
cats = arr.cat.categories
|
||||
ordered = arr.cat.ordered
|
||||
field["constraints"] = {"enum": list(cats)}
|
||||
field["ordered"] = ordered
|
||||
elif is_period_dtype(arr):
|
||||
field["freq"] = arr.freqstr
|
||||
elif is_datetime64tz_dtype(arr):
|
||||
if hasattr(arr, "dt"):
|
||||
field["tz"] = arr.dt.tz.zone
|
||||
else:
|
||||
field["tz"] = arr.tz.zone
|
||||
return field
|
||||
|
||||
|
||||
def convert_json_field_to_pandas_type(field):
|
||||
"""
|
||||
Converts a JSON field descriptor into its corresponding NumPy / pandas type
|
||||
|
||||
Parameters
|
||||
----------
|
||||
field
|
||||
A JSON field descriptor
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the type of the provided field is unknown or currently unsupported
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> convert_json_field_to_pandas_type({'name': 'an_int',
|
||||
'type': 'integer'})
|
||||
'int64'
|
||||
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
|
||||
'type': 'any',
|
||||
'constraints': {'enum': [
|
||||
'a', 'b', 'c']},
|
||||
'ordered': True})
|
||||
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
|
||||
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
|
||||
'type': 'datetime'})
|
||||
'datetime64[ns]'
|
||||
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
|
||||
'type': 'datetime',
|
||||
'tz': 'US/Central'})
|
||||
'datetime64[ns, US/Central]'
|
||||
"""
|
||||
typ = field["type"]
|
||||
if typ == "string":
|
||||
return "object"
|
||||
elif typ == "integer":
|
||||
return "int64"
|
||||
elif typ == "number":
|
||||
return "float64"
|
||||
elif typ == "boolean":
|
||||
return "bool"
|
||||
elif typ == "duration":
|
||||
return "timedelta64"
|
||||
elif typ == "datetime":
|
||||
if field.get("tz"):
|
||||
return "datetime64[ns, {tz}]".format(tz=field["tz"])
|
||||
else:
|
||||
return "datetime64[ns]"
|
||||
elif typ == "any":
|
||||
if "constraints" in field and "ordered" in field:
|
||||
return CategoricalDtype(
|
||||
categories=field["constraints"]["enum"], ordered=field["ordered"]
|
||||
)
|
||||
else:
|
||||
return "object"
|
||||
|
||||
raise ValueError("Unsupported or invalid field type: {}".format(typ))
|
||||
|
||||
|
||||
def build_table_schema(data, index=True, primary_key=None, version=True):
|
||||
"""
|
||||
Create a Table schema from ``data``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : Series, DataFrame
|
||||
index : bool, default True
|
||||
Whether to include ``data.index`` in the schema.
|
||||
primary_key : bool or None, default True
|
||||
column names to designate as the primary key.
|
||||
The default `None` will set `'primaryKey'` to the index
|
||||
level or levels if the index is unique.
|
||||
version : bool, default True
|
||||
Whether to include a field `pandas_version` with the version
|
||||
of pandas that generated the schema.
|
||||
|
||||
Returns
|
||||
-------
|
||||
schema : dict
|
||||
|
||||
Notes
|
||||
-----
|
||||
See `_as_json_table_type` for conversion types.
|
||||
Timedeltas as converted to ISO8601 duration format with
|
||||
9 decimal places after the seconds field for nanosecond precision.
|
||||
|
||||
Categoricals are converted to the `any` dtype, and use the `enum` field
|
||||
constraint to list the allowed values. The `ordered` attribute is included
|
||||
in an `ordered` field.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame(
|
||||
... {'A': [1, 2, 3],
|
||||
... 'B': ['a', 'b', 'c'],
|
||||
... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
|
||||
... }, index=pd.Index(range(3), name='idx'))
|
||||
>>> build_table_schema(df)
|
||||
{'fields': [{'name': 'idx', 'type': 'integer'},
|
||||
{'name': 'A', 'type': 'integer'},
|
||||
{'name': 'B', 'type': 'string'},
|
||||
{'name': 'C', 'type': 'datetime'}],
|
||||
'pandas_version': '0.20.0',
|
||||
'primaryKey': ['idx']}
|
||||
"""
|
||||
if index is True:
|
||||
data = set_default_names(data)
|
||||
|
||||
schema = {}
|
||||
fields = []
|
||||
|
||||
if index:
|
||||
if data.index.nlevels > 1:
|
||||
for level in data.index.levels:
|
||||
fields.append(convert_pandas_type_to_json_field(level))
|
||||
else:
|
||||
fields.append(convert_pandas_type_to_json_field(data.index))
|
||||
|
||||
if data.ndim > 1:
|
||||
for column, s in data.items():
|
||||
fields.append(convert_pandas_type_to_json_field(s))
|
||||
else:
|
||||
fields.append(convert_pandas_type_to_json_field(data))
|
||||
|
||||
schema["fields"] = fields
|
||||
if index and data.index.is_unique and primary_key is None:
|
||||
if data.index.nlevels == 1:
|
||||
schema["primaryKey"] = [data.index.name]
|
||||
else:
|
||||
schema["primaryKey"] = data.index.names
|
||||
elif primary_key is not None:
|
||||
schema["primaryKey"] = primary_key
|
||||
|
||||
if version:
|
||||
schema["pandas_version"] = "0.20.0"
|
||||
return schema
|
||||
|
||||
|
||||
def parse_table_schema(json, precise_float):
|
||||
"""
|
||||
Builds a DataFrame from a given schema
|
||||
|
||||
Parameters
|
||||
----------
|
||||
json :
|
||||
A JSON table schema
|
||||
precise_float : boolean
|
||||
Flag controlling precision when decoding string to double values, as
|
||||
dictated by ``read_json``
|
||||
|
||||
Returns
|
||||
-------
|
||||
df : DataFrame
|
||||
|
||||
Raises
|
||||
------
|
||||
NotImplementedError
|
||||
If the JSON table schema contains either timezone or timedelta data
|
||||
|
||||
Notes
|
||||
-----
|
||||
Because :func:`DataFrame.to_json` uses the string 'index' to denote a
|
||||
name-less :class:`Index`, this function sets the name of the returned
|
||||
:class:`DataFrame` to ``None`` when said string is encountered with a
|
||||
normal :class:`Index`. For a :class:`MultiIndex`, the same limitation
|
||||
applies to any strings beginning with 'level_'. Therefore, an
|
||||
:class:`Index` name of 'index' and :class:`MultiIndex` names starting
|
||||
with 'level_' are not supported.
|
||||
|
||||
See Also
|
||||
--------
|
||||
build_table_schema : Inverse function.
|
||||
pandas.read_json
|
||||
"""
|
||||
table = loads(json, precise_float=precise_float)
|
||||
col_order = [field["name"] for field in table["schema"]["fields"]]
|
||||
df = DataFrame(table["data"], columns=col_order)[col_order]
|
||||
|
||||
dtypes = {
|
||||
field["name"]: convert_json_field_to_pandas_type(field)
|
||||
for field in table["schema"]["fields"]
|
||||
}
|
||||
|
||||
# Cannot directly use as_type with timezone data on object; raise for now
|
||||
if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()):
|
||||
raise NotImplementedError('table="orient" can not yet read timezone ' "data")
|
||||
|
||||
# No ISO constructor for Timedelta as of yet, so need to raise
|
||||
if "timedelta64" in dtypes.values():
|
||||
raise NotImplementedError(
|
||||
'table="orient" can not yet read ' "ISO-formatted Timedelta data"
|
||||
)
|
||||
|
||||
df = df.astype(dtypes)
|
||||
|
||||
if "primaryKey" in table["schema"]:
|
||||
df = df.set_index(table["schema"]["primaryKey"])
|
||||
if len(df.index.names) == 1:
|
||||
if df.index.name == "index":
|
||||
df.index.name = None
|
||||
else:
|
||||
df.index.names = [
|
||||
None if x.startswith("level_") else x for x in df.index.names
|
||||
]
|
||||
|
||||
return df
|
Reference in New Issue
Block a user