A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/io/excel/_xlrd.py

112 lines
3.6 KiB

from datetime import time
import numpy as np
from pandas._typing import StorageOptions
from pandas.compat._optional import import_optional_dependency
from pandas.io.excel._base import BaseExcelReader
class XlrdReader(BaseExcelReader):
def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
"""
Reader using xlrd engine.
Parameters
----------
filepath_or_buffer : str, path object or Workbook
Object to be parsed.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
"""
err_msg = "Install xlrd >= 1.0.0 for Excel support"
import_optional_dependency("xlrd", extra=err_msg)
super().__init__(filepath_or_buffer, storage_options=storage_options)
@property
def _workbook_class(self):
from xlrd import Book
return Book
def load_workbook(self, filepath_or_buffer):
from xlrd import open_workbook
if hasattr(filepath_or_buffer, "read"):
data = filepath_or_buffer.read()
return open_workbook(file_contents=data)
else:
return open_workbook(filepath_or_buffer)
@property
def sheet_names(self):
return self.book.sheet_names()
def get_sheet_by_name(self, name):
self.raise_if_bad_sheet_by_name(name)
return self.book.sheet_by_name(name)
def get_sheet_by_index(self, index):
self.raise_if_bad_sheet_by_index(index)
return self.book.sheet_by_index(index)
def get_sheet_data(self, sheet, convert_float):
from xlrd import (
XL_CELL_BOOLEAN,
XL_CELL_DATE,
XL_CELL_ERROR,
XL_CELL_NUMBER,
xldate,
)
epoch1904 = self.book.datemode
def _parse_cell(cell_contents, cell_typ):
"""
converts the contents of the cell into a pandas appropriate object
"""
if cell_typ == XL_CELL_DATE:
# Use the newer xlrd datetime handling.
try:
cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904)
except OverflowError:
return cell_contents
# Excel doesn't distinguish between dates and time,
# so we treat dates on the epoch as times only.
# Also, Excel supports 1900 and 1904 epochs.
year = (cell_contents.timetuple())[0:3]
if (not epoch1904 and year == (1899, 12, 31)) or (
epoch1904 and year == (1904, 1, 1)
):
cell_contents = time(
cell_contents.hour,
cell_contents.minute,
cell_contents.second,
cell_contents.microsecond,
)
elif cell_typ == XL_CELL_ERROR:
cell_contents = np.nan
elif cell_typ == XL_CELL_BOOLEAN:
cell_contents = bool(cell_contents)
elif convert_float and cell_typ == XL_CELL_NUMBER:
# GH5394 - Excel 'numbers' are always floats
# it's a minimal perf hit and less surprising
val = int(cell_contents)
if val == cell_contents:
cell_contents = val
return cell_contents
data = []
for i in range(sheet.nrows):
row = [
_parse_cell(value, typ)
for value, typ in zip(sheet.row_values(i), sheet.row_types(i))
]
data.append(row)
return data