A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/_libs/writers.pyx

175 lines
4.4 KiB

import cython
import numpy as np
from cpython cimport (
PyBytes_GET_SIZE,
PyUnicode_GET_LENGTH,
)
from numpy cimport (
ndarray,
uint8_t,
)
ctypedef fused pandas_string:
str
bytes
@cython.boundscheck(False)
@cython.wraparound(False)
def write_csv_rows(
list data,
ndarray data_index,
Py_ssize_t nlevels,
ndarray cols,
object writer
) -> None:
"""
Write the given data to the writer object, pre-allocating where possible
for performance improvements.
Parameters
----------
data : list[ArrayLike]
data_index : ndarray
nlevels : int
cols : ndarray
writer : _csv.writer
"""
# In crude testing, N>100 yields little marginal improvement
cdef:
Py_ssize_t i, j = 0, k = len(data_index), N = 100, ncols = len(cols)
list rows
# pre-allocate rows
rows = [[None] * (nlevels + ncols) for _ in range(N)]
if nlevels == 1:
for j in range(k):
row = rows[j % N]
row[0] = data_index[j]
for i in range(ncols):
row[1 + i] = data[i][j]
if j >= N - 1 and j % N == N - 1:
writer.writerows(rows)
elif nlevels > 1:
for j in range(k):
row = rows[j % N]
row[:nlevels] = list(data_index[j])
for i in range(ncols):
row[nlevels + i] = data[i][j]
if j >= N - 1 and j % N == N - 1:
writer.writerows(rows)
else:
for j in range(k):
row = rows[j % N]
for i in range(ncols):
row[i] = data[i][j]
if j >= N - 1 and j % N == N - 1:
writer.writerows(rows)
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
writer.writerows(rows[:((j + 1) % N)])
@cython.boundscheck(False)
@cython.wraparound(False)
def convert_json_to_lines(arr: str) -> str:
"""
replace comma separated json with line feeds, paying special attention
to quotes & brackets
"""
cdef:
Py_ssize_t i = 0, num_open_brackets_seen = 0, length
bint in_quotes = False, is_escaping = False
ndarray[uint8_t, ndim=1] narr
unsigned char val, newline, comma, left_bracket, right_bracket, quote
unsigned char backslash
newline = ord('\n')
comma = ord(',')
left_bracket = ord('{')
right_bracket = ord('}')
quote = ord('"')
backslash = ord('\\')
narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy()
length = narr.shape[0]
for i in range(length):
val = narr[i]
if val == quote and i > 0 and not is_escaping:
in_quotes = ~in_quotes
if val == backslash or is_escaping:
is_escaping = ~is_escaping
if val == comma: # commas that should be \n
if num_open_brackets_seen == 0 and not in_quotes:
narr[i] = newline
elif val == left_bracket:
if not in_quotes:
num_open_brackets_seen += 1
elif val == right_bracket:
if not in_quotes:
num_open_brackets_seen -= 1
return narr.tobytes().decode('utf-8') + '\n' # GH:36888
# stata, pytables
@cython.boundscheck(False)
@cython.wraparound(False)
def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t:
"""
Return the maximum size of elements in a 1-dim string array.
"""
cdef:
Py_ssize_t i, m = 0, wlen = 0, length = arr.shape[0]
pandas_string val
for i in range(length):
val = arr[i]
wlen = word_len(val)
if wlen > m:
m = wlen
return m
cpdef inline Py_ssize_t word_len(object val):
"""
Return the maximum length of a string or bytes value.
"""
cdef:
Py_ssize_t wlen = 0
if isinstance(val, str):
wlen = PyUnicode_GET_LENGTH(val)
elif isinstance(val, bytes):
wlen = PyBytes_GET_SIZE(val)
return wlen
# ------------------------------------------------------------------
# PyTables Helpers
@cython.boundscheck(False)
@cython.wraparound(False)
def string_array_replace_from_nan_rep(
ndarray[object, ndim=1] arr,
object nan_rep,
object replace=np.nan
) -> None:
"""
Replace the values in the array with 'replacement' if
they are 'nan_rep'. Return the same array.
"""
cdef:
Py_ssize_t length = len(arr), i = 0
for i in range(length):
if arr[i] == nan_rep:
arr[i] = replace