174 lines
5.0 KiB
Python
174 lines
5.0 KiB
Python
""" pickle compat """
|
|
from io import BytesIO
|
|
import pickle
|
|
import warnings
|
|
|
|
from numpy.lib.format import read_array
|
|
|
|
from pandas.compat import pickle_compat as pc
|
|
|
|
from pandas.io.common import _get_handle, _stringify_path
|
|
|
|
|
|
def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
|
|
"""
|
|
Pickle (serialize) object to file.
|
|
|
|
Parameters
|
|
----------
|
|
obj : any object
|
|
Any python object.
|
|
path : str
|
|
File path where the pickled object will be stored.
|
|
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
|
|
A string representing the compression to use in the output file. By
|
|
default, infers from the file extension in specified path.
|
|
|
|
.. versionadded:: 0.20.0
|
|
protocol : int
|
|
Int which indicates which protocol should be used by the pickler,
|
|
default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
|
|
values for this parameter depend on the version of Python. For Python
|
|
2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
|
|
For Python >= 3.4, 4 is a valid value. A negative value for the
|
|
protocol parameter is equivalent to setting its value to
|
|
HIGHEST_PROTOCOL.
|
|
|
|
.. [1] https://docs.python.org/3/library/pickle.html
|
|
.. versionadded:: 0.21.0
|
|
|
|
See Also
|
|
--------
|
|
read_pickle : Load pickled pandas object (or any object) from file.
|
|
DataFrame.to_hdf : Write DataFrame to an HDF5 file.
|
|
DataFrame.to_sql : Write DataFrame to a SQL database.
|
|
DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
|
|
|
|
Examples
|
|
--------
|
|
>>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
|
|
>>> original_df
|
|
foo bar
|
|
0 0 5
|
|
1 1 6
|
|
2 2 7
|
|
3 3 8
|
|
4 4 9
|
|
>>> pd.to_pickle(original_df, "./dummy.pkl")
|
|
|
|
>>> unpickled_df = pd.read_pickle("./dummy.pkl")
|
|
>>> unpickled_df
|
|
foo bar
|
|
0 0 5
|
|
1 1 6
|
|
2 2 7
|
|
3 3 8
|
|
4 4 9
|
|
|
|
>>> import os
|
|
>>> os.remove("./dummy.pkl")
|
|
"""
|
|
path = _stringify_path(path)
|
|
f, fh = _get_handle(path, "wb", compression=compression, is_text=False)
|
|
if protocol < 0:
|
|
protocol = pickle.HIGHEST_PROTOCOL
|
|
try:
|
|
f.write(pickle.dumps(obj, protocol=protocol))
|
|
finally:
|
|
f.close()
|
|
for _f in fh:
|
|
_f.close()
|
|
|
|
|
|
def read_pickle(path, compression="infer"):
|
|
"""
|
|
Load pickled pandas object (or any object) from file.
|
|
|
|
.. warning::
|
|
|
|
Loading pickled data received from untrusted sources can be
|
|
unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
File path where the pickled object will be loaded.
|
|
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
|
|
For on-the-fly decompression of on-disk data. If 'infer', then use
|
|
gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
|
|
or '.zip' respectively, and no decompression otherwise.
|
|
Set to None for no decompression.
|
|
|
|
.. versionadded:: 0.20.0
|
|
|
|
Returns
|
|
-------
|
|
unpickled : same type as object stored in file
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
|
|
Series.to_pickle : Pickle (serialize) Series object to file.
|
|
read_hdf : Read HDF5 file into a DataFrame.
|
|
read_sql : Read SQL query or database table into a DataFrame.
|
|
read_parquet : Load a parquet object, returning a DataFrame.
|
|
|
|
Notes
|
|
-----
|
|
read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3.
|
|
|
|
Examples
|
|
--------
|
|
>>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
|
|
>>> original_df
|
|
foo bar
|
|
0 0 5
|
|
1 1 6
|
|
2 2 7
|
|
3 3 8
|
|
4 4 9
|
|
>>> pd.to_pickle(original_df, "./dummy.pkl")
|
|
|
|
>>> unpickled_df = pd.read_pickle("./dummy.pkl")
|
|
>>> unpickled_df
|
|
foo bar
|
|
0 0 5
|
|
1 1 6
|
|
2 2 7
|
|
3 3 8
|
|
4 4 9
|
|
|
|
>>> import os
|
|
>>> os.remove("./dummy.pkl")
|
|
"""
|
|
path = _stringify_path(path)
|
|
f, fh = _get_handle(path, "rb", compression=compression, is_text=False)
|
|
|
|
# 1) try standard libary Pickle
|
|
# 2) try pickle_compat (older pandas version) to handle subclass changes
|
|
# 3) try pickle_compat with latin1 encoding
|
|
|
|
try:
|
|
with warnings.catch_warnings(record=True):
|
|
# We want to silence any warnings about, e.g. moved modules.
|
|
warnings.simplefilter("ignore", Warning)
|
|
return pickle.load(f)
|
|
except Exception: # noqa: E722
|
|
try:
|
|
return pc.load(f, encoding=None)
|
|
except Exception: # noqa: E722
|
|
return pc.load(f, encoding="latin1")
|
|
finally:
|
|
f.close()
|
|
for _f in fh:
|
|
_f.close()
|
|
|
|
|
|
# compat with sparse pickle / unpickle
|
|
|
|
|
|
def _unpickle_array(bytes):
|
|
arr = read_array(BytesIO(bytes))
|
|
|
|
return arr
|