A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/_libs/hashtable_func_helper.pxi.in

515 lines
15 KiB

"""
Template for each `dtype` helper function for hashtable
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""
{{py:
# name, dtype, ttype, c_type, to_c_type
dtypes = [('Complex128', 'complex128', 'complex128',
'khcomplex128_t', 'to_khcomplex128_t'),
('Complex64', 'complex64', 'complex64',
'khcomplex64_t', 'to_khcomplex64_t'),
('Float64', 'float64', 'float64', 'float64_t', ''),
('Float32', 'float32', 'float32', 'float32_t', ''),
('UInt64', 'uint64', 'uint64', 'uint64_t', ''),
('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
('Object', 'object', 'pymap', 'object', ''),
('Int64', 'int64', 'int64', 'int64_t', ''),
('Int32', 'int32', 'int32', 'int32_t', ''),
('Int16', 'int16', 'int16', 'int16_t', ''),
('Int8', 'int8', 'int8', 'int8_t', '')]
}}
{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}
cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
{{else}}
cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
{{endif}}
cdef:
Py_ssize_t i = 0
Py_ssize_t n = len(values)
kh_{{ttype}}_t *table
# Don't use Py_ssize_t, since table.n_buckets is unsigned
khiter_t k
{{c_type}} val
int ret = 0
# we track the order in which keys are first seen (GH39009),
# khash-map isn't insertion-ordered, thus:
# table maps keys to counts
# result_keys remembers the original order of keys
result_keys = {{name}}Vector()
table = kh_init_{{ttype}}()
{{if dtype == 'object'}}
kh_resize_{{ttype}}(table, n // 10)
for i in range(n):
val = values[i]
if not dropna or not checknull(val):
k = kh_get_{{ttype}}(table, <PyObject*>val)
if k != table.n_buckets:
table.vals[k] += 1
else:
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
table.vals[k] = 1
result_keys.append(val)
{{else}}
kh_resize_{{ttype}}(table, n)
for i in range(n):
val = {{to_c_type}}(values[i])
if not is_nan_{{c_type}}(val) or not dropna:
k = kh_get_{{ttype}}(table, val)
if k != table.n_buckets:
table.vals[k] += 1
else:
k = kh_put_{{ttype}}(table, val, &ret)
table.vals[k] = 1
result_keys.append(val)
{{endif}}
# collect counts in the order corresponding to result_keys:
cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
for i in range(table.size):
{{if dtype == 'object'}}
k = kh_get_{{ttype}}(table, result_keys.data[i])
{{else}}
k = kh_get_{{ttype}}(table, result_keys.data.data[i])
{{endif}}
result_counts[i] = table.vals[k]
kh_destroy_{{ttype}}(table)
return result_keys.to_array(), result_counts.base
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}
cdef duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
{{else}}
cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
{{endif}}
cdef:
int ret = 0
{{if dtype != 'object'}}
{{c_type}} value
{{endif}}
Py_ssize_t i, n = len(values)
khiter_t k
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
if keep not in ('last', 'first', False):
raise ValueError('keep must be either "first", "last" or False')
if keep == 'last':
{{if dtype == 'object'}}
for i in range(n - 1, -1, -1):
# equivalent: range(n)[::-1], which cython doesn't like in nogil
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i in range(n - 1, -1, -1):
# equivalent: range(n)[::-1], which cython doesn't like in nogil
value = {{to_c_type}}(values[i])
kh_put_{{ttype}}(table, value, &ret)
out[i] = ret == 0
{{endif}}
elif keep == 'first':
{{if dtype == 'object'}}
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
out[i] = ret == 0
{{else}}
with nogil:
for i in range(n):
value = {{to_c_type}}(values[i])
kh_put_{{ttype}}(table, value, &ret)
out[i] = ret == 0
{{endif}}
else:
{{if dtype == 'object'}}
for i in range(n):
value = values[i]
k = kh_get_{{ttype}}(table, <PyObject*>value)
if k != table.n_buckets:
out[table.vals[k]] = 1
out[i] = 1
else:
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
table.vals[k] = i
out[i] = 0
{{else}}
with nogil:
for i in range(n):
value = {{to_c_type}}(values[i])
k = kh_get_{{ttype}}(table, value)
if k != table.n_buckets:
out[table.vals[k]] = 1
out[i] = 1
else:
k = kh_put_{{ttype}}(table, value, &ret)
table.vals[k] = i
out[i] = 0
{{endif}}
kh_destroy_{{ttype}}(table)
return out
# ----------------------------------------------------------------------
# Membership
# ----------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}
cdef ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
{{else}}
cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
{{endif}}
"""
Return boolean of values in arr on an
element by-element basis
Parameters
----------
arr : {{dtype}} ndarray
values : {{dtype}} ndarray
Returns
-------
boolean ndarry len of (arr)
"""
cdef:
Py_ssize_t i, n
khiter_t k
int ret = 0
ndarray[uint8_t] result
{{c_type}} val
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
# construct the table
n = len(values)
kh_resize_{{ttype}}(table, n)
{{if dtype == 'object'}}
for i in range(n):
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
{{else}}
with nogil:
for i in range(n):
val = {{to_c_type}}(values[i])
kh_put_{{ttype}}(table, val, &ret)
{{endif}}
# test membership
n = len(arr)
result = np.empty(n, dtype=np.uint8)
{{if dtype == 'object'}}
for i in range(n):
val = arr[i]
k = kh_get_{{ttype}}(table, <PyObject*>val)
result[i] = (k != table.n_buckets)
{{else}}
with nogil:
for i in range(n):
val = {{to_c_type}}(arr[i])
k = kh_get_{{ttype}}(table, val)
result[i] = (k != table.n_buckets)
{{endif}}
kh_destroy_{{ttype}}(table)
return result.view(np.bool_)
# ----------------------------------------------------------------------
# Mode Computations
# ----------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}
cdef mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
{{else}}
cdef mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
{{endif}}
cdef:
{{if dtype == 'object'}}
ndarray[{{dtype}}] keys
ndarray[{{dtype}}] modes
{{else}}
{{dtype}}_t[:] keys
ndarray[{{dtype}}_t] modes
{{endif}}
int64_t[:] counts
int64_t count, max_count = -1
Py_ssize_t k, j = 0
keys, counts = value_count_{{dtype}}(values, dropna)
{{if dtype == 'object'}}
modes = np.empty(len(keys), dtype=np.object_)
{{else}}
modes = np.empty(len(keys), dtype=np.{{dtype}})
{{endif}}
{{if dtype != 'object'}}
with nogil:
for k in range(len(keys)):
count = counts[k]
if count == max_count:
j += 1
elif count > max_count:
max_count = count
j = 0
else:
continue
modes[j] = keys[k]
{{else}}
for k in range(len(keys)):
count = counts[k]
if count == max_count:
j += 1
elif count > max_count:
max_count = count
j = 0
else:
continue
modes[j] = keys[k]
{{endif}}
return modes[:j + 1]
{{endfor}}
ctypedef fused htfunc_t:
complex128_t
complex64_t
float64_t
float32_t
uint64_t
uint32_t
uint16_t
uint8_t
int64_t
int32_t
int16_t
int8_t
object
cpdef value_count(ndarray[htfunc_t] values, bint dropna):
if htfunc_t is object:
return value_count_object(values, dropna)
elif htfunc_t is int8_t:
return value_count_int8(values, dropna)
elif htfunc_t is int16_t:
return value_count_int16(values, dropna)
elif htfunc_t is int32_t:
return value_count_int32(values, dropna)
elif htfunc_t is int64_t:
return value_count_int64(values, dropna)
elif htfunc_t is uint8_t:
return value_count_uint8(values, dropna)
elif htfunc_t is uint16_t:
return value_count_uint16(values, dropna)
elif htfunc_t is uint32_t:
return value_count_uint32(values, dropna)
elif htfunc_t is uint64_t:
return value_count_uint64(values, dropna)
elif htfunc_t is float64_t:
return value_count_float64(values, dropna)
elif htfunc_t is float32_t:
return value_count_float32(values, dropna)
elif htfunc_t is complex128_t:
return value_count_complex128(values, dropna)
elif htfunc_t is complex64_t:
return value_count_complex64(values, dropna)
else:
raise TypeError(values.dtype)
cpdef duplicated(ndarray[htfunc_t] values, object keep="first"):
if htfunc_t is object:
return duplicated_object(values, keep)
elif htfunc_t is int8_t:
return duplicated_int8(values, keep)
elif htfunc_t is int16_t:
return duplicated_int16(values, keep)
elif htfunc_t is int32_t:
return duplicated_int32(values, keep)
elif htfunc_t is int64_t:
return duplicated_int64(values, keep)
elif htfunc_t is uint8_t:
return duplicated_uint8(values, keep)
elif htfunc_t is uint16_t:
return duplicated_uint16(values, keep)
elif htfunc_t is uint32_t:
return duplicated_uint32(values, keep)
elif htfunc_t is uint64_t:
return duplicated_uint64(values, keep)
elif htfunc_t is float64_t:
return duplicated_float64(values, keep)
elif htfunc_t is float32_t:
return duplicated_float32(values, keep)
elif htfunc_t is complex128_t:
return duplicated_complex128(values, keep)
elif htfunc_t is complex64_t:
return duplicated_complex64(values, keep)
else:
raise TypeError(values.dtype)
cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
if htfunc_t is object:
return ismember_object(arr, values)
elif htfunc_t is int8_t:
return ismember_int8(arr, values)
elif htfunc_t is int16_t:
return ismember_int16(arr, values)
elif htfunc_t is int32_t:
return ismember_int32(arr, values)
elif htfunc_t is int64_t:
return ismember_int64(arr, values)
elif htfunc_t is uint8_t:
return ismember_uint8(arr, values)
elif htfunc_t is uint16_t:
return ismember_uint16(arr, values)
elif htfunc_t is uint32_t:
return ismember_uint32(arr, values)
elif htfunc_t is uint64_t:
return ismember_uint64(arr, values)
elif htfunc_t is float64_t:
return ismember_float64(arr, values)
elif htfunc_t is float32_t:
return ismember_float32(arr, values)
elif htfunc_t is complex128_t:
return ismember_complex128(arr, values)
elif htfunc_t is complex64_t:
return ismember_complex64(arr, values)
else:
raise TypeError(values.dtype)
cpdef mode(ndarray[htfunc_t] values, bint dropna):
if htfunc_t is object:
return mode_object(values, dropna)
elif htfunc_t is int8_t:
return mode_int8(values, dropna)
elif htfunc_t is int16_t:
return mode_int16(values, dropna)
elif htfunc_t is int32_t:
return mode_int32(values, dropna)
elif htfunc_t is int64_t:
return mode_int64(values, dropna)
elif htfunc_t is uint8_t:
return mode_uint8(values, dropna)
elif htfunc_t is uint16_t:
return mode_uint16(values, dropna)
elif htfunc_t is uint32_t:
return mode_uint32(values, dropna)
elif htfunc_t is uint64_t:
return mode_uint64(values, dropna)
elif htfunc_t is float64_t:
return mode_float64(values, dropna)
elif htfunc_t is float32_t:
return mode_float32(values, dropna)
elif htfunc_t is complex128_t:
return mode_complex128(values, dropna)
elif htfunc_t is complex64_t:
return mode_complex64(values, dropna)
else:
raise TypeError(values.dtype)
{{py:
# name, dtype, ttype, c_type
dtypes = [('Int64', 'int64', 'int64', 'int64_t'),
('Int32', 'int32', 'int32', 'int32_t'), ]
}}
{{for name, dtype, ttype, c_type in dtypes}}
@cython.wraparound(False)
@cython.boundscheck(False)
def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
"""
Indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
np.unique(labels, return_index=True)[1]
"""
cdef:
int ret = 0
Py_ssize_t i, n = len(labels)
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
{{name}}Vector idx = {{name}}Vector()
ndarray[{{c_type}}, ndim=1] arr
{{name}}VectorData *ud = idx.data
kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
with nogil:
for i in range(n):
kh_put_{{ttype}}(table, labels[i], &ret)
if ret != 0:
if needs_resize(ud):
with gil:
idx.resize()
append_data_{{ttype}}(ud, i)
kh_destroy_{{ttype}}(table)
arr = idx.to_array()
arr = arr[np.asarray(labels)[arr].argsort()]
return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
{{endfor}}