8th day of python challenges 111-117
This commit is contained in:
180
venv/lib/python3.6/site-packages/pandas/io/excel/_odfreader.py
Normal file
180
venv/lib/python3.6/site-packages/pandas/io/excel/_odfreader.py
Normal file
@@ -0,0 +1,180 @@
|
||||
from typing import List
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
import pandas as pd
|
||||
from pandas._typing import FilePathOrBuffer, Scalar
|
||||
|
||||
from pandas.io.excel._base import _BaseExcelReader
|
||||
|
||||
|
||||
class _ODFReader(_BaseExcelReader):
|
||||
"""Read tables out of OpenDocument formatted files
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filepath_or_buffer: string, path to be parsed or
|
||||
an open readable stream.
|
||||
"""
|
||||
|
||||
def __init__(self, filepath_or_buffer: FilePathOrBuffer):
|
||||
import_optional_dependency("odf")
|
||||
super().__init__(filepath_or_buffer)
|
||||
|
||||
@property
|
||||
def _workbook_class(self):
|
||||
from odf.opendocument import OpenDocument
|
||||
|
||||
return OpenDocument
|
||||
|
||||
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
|
||||
from odf.opendocument import load
|
||||
|
||||
return load(filepath_or_buffer)
|
||||
|
||||
@property
|
||||
def empty_value(self) -> str:
|
||||
"""Property for compat with other readers."""
|
||||
return ""
|
||||
|
||||
@property
|
||||
def sheet_names(self) -> List[str]:
|
||||
"""Return a list of sheet names present in the document"""
|
||||
from odf.table import Table
|
||||
|
||||
tables = self.book.getElementsByType(Table)
|
||||
return [t.getAttribute("name") for t in tables]
|
||||
|
||||
def get_sheet_by_index(self, index: int):
|
||||
from odf.table import Table
|
||||
|
||||
tables = self.book.getElementsByType(Table)
|
||||
return tables[index]
|
||||
|
||||
def get_sheet_by_name(self, name: str):
|
||||
from odf.table import Table
|
||||
|
||||
tables = self.book.getElementsByType(Table)
|
||||
|
||||
for table in tables:
|
||||
if table.getAttribute("name") == name:
|
||||
return table
|
||||
|
||||
raise ValueError("sheet {name} not found".format(name))
|
||||
|
||||
def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
|
||||
"""Parse an ODF Table into a list of lists
|
||||
"""
|
||||
from odf.table import CoveredTableCell, TableCell, TableRow
|
||||
|
||||
covered_cell_name = CoveredTableCell().qname
|
||||
table_cell_name = TableCell().qname
|
||||
cell_names = {covered_cell_name, table_cell_name}
|
||||
|
||||
sheet_rows = sheet.getElementsByType(TableRow)
|
||||
empty_rows = 0
|
||||
max_row_len = 0
|
||||
|
||||
table = [] # type: List[List[Scalar]]
|
||||
|
||||
for i, sheet_row in enumerate(sheet_rows):
|
||||
sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
|
||||
empty_cells = 0
|
||||
table_row = [] # type: List[Scalar]
|
||||
|
||||
for j, sheet_cell in enumerate(sheet_cells):
|
||||
if sheet_cell.qname == table_cell_name:
|
||||
value = self._get_cell_value(sheet_cell, convert_float)
|
||||
else:
|
||||
value = self.empty_value
|
||||
|
||||
column_repeat = self._get_column_repeat(sheet_cell)
|
||||
|
||||
# Queue up empty values, writing only if content succeeds them
|
||||
if value == self.empty_value:
|
||||
empty_cells += column_repeat
|
||||
else:
|
||||
table_row.extend([self.empty_value] * empty_cells)
|
||||
empty_cells = 0
|
||||
table_row.extend([value] * column_repeat)
|
||||
|
||||
if max_row_len < len(table_row):
|
||||
max_row_len = len(table_row)
|
||||
|
||||
row_repeat = self._get_row_repeat(sheet_row)
|
||||
if self._is_empty_row(sheet_row):
|
||||
empty_rows += row_repeat
|
||||
else:
|
||||
# add blank rows to our table
|
||||
table.extend([[self.empty_value]] * empty_rows)
|
||||
empty_rows = 0
|
||||
for _ in range(row_repeat):
|
||||
table.append(table_row)
|
||||
|
||||
# Make our table square
|
||||
for row in table:
|
||||
if len(row) < max_row_len:
|
||||
row.extend([self.empty_value] * (max_row_len - len(row)))
|
||||
|
||||
return table
|
||||
|
||||
def _get_row_repeat(self, row) -> int:
|
||||
"""Return number of times this row was repeated
|
||||
Repeating an empty row appeared to be a common way
|
||||
of representing sparse rows in the table.
|
||||
"""
|
||||
from odf.namespaces import TABLENS
|
||||
|
||||
return int(row.attributes.get((TABLENS, "number-rows-repeated"), 1))
|
||||
|
||||
def _get_column_repeat(self, cell) -> int:
|
||||
from odf.namespaces import TABLENS
|
||||
|
||||
return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1))
|
||||
|
||||
def _is_empty_row(self, row) -> bool:
|
||||
"""Helper function to find empty rows
|
||||
"""
|
||||
for column in row.childNodes:
|
||||
if len(column.childNodes) > 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
|
||||
from odf.namespaces import OFFICENS
|
||||
|
||||
cell_type = cell.attributes.get((OFFICENS, "value-type"))
|
||||
if cell_type == "boolean":
|
||||
if str(cell) == "TRUE":
|
||||
return True
|
||||
return False
|
||||
if cell_type is None:
|
||||
return self.empty_value
|
||||
elif cell_type == "float":
|
||||
# GH5394
|
||||
cell_value = float(cell.attributes.get((OFFICENS, "value")))
|
||||
|
||||
if cell_value == 0.0 and str(cell) != cell_value: # NA handling
|
||||
return str(cell)
|
||||
|
||||
if convert_float:
|
||||
val = int(cell_value)
|
||||
if val == cell_value:
|
||||
return val
|
||||
return cell_value
|
||||
elif cell_type == "percentage":
|
||||
cell_value = cell.attributes.get((OFFICENS, "value"))
|
||||
return float(cell_value)
|
||||
elif cell_type == "string":
|
||||
return str(cell)
|
||||
elif cell_type == "currency":
|
||||
cell_value = cell.attributes.get((OFFICENS, "value"))
|
||||
return float(cell_value)
|
||||
elif cell_type == "date":
|
||||
cell_value = cell.attributes.get((OFFICENS, "date-value"))
|
||||
return pd.to_datetime(cell_value)
|
||||
elif cell_type == "time":
|
||||
return pd.to_datetime(str(cell)).time()
|
||||
else:
|
||||
raise ValueError("Unrecognized type {}".format(cell_type))
|
||||
Reference in New Issue
Block a user