A PyQT GUI application for converting InfoLease report outputs into Excel files. Handles parsing and summarizing. Learns where files are meant to be store and compiles monthly and yearly summaries.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
InfoLeaseExtract/venv/Lib/site-packages/pandas/_libs/hashtable.pyx

182 lines
4.5 KiB

cimport cython
from cpython.mem cimport (
PyMem_Free,
PyMem_Malloc,
)
from cpython.ref cimport (
Py_INCREF,
PyObject,
)
from libc.stdlib cimport (
free,
malloc,
)
import numpy as np
cimport numpy as cnp
from numpy cimport (
float64_t,
ndarray,
uint8_t,
uint32_t,
)
from numpy.math cimport NAN
cnp.import_array()
from pandas._libs cimport util
from pandas._libs.khash cimport (
KHASH_TRACE_DOMAIN,
are_equivalent_float32_t,
are_equivalent_float64_t,
are_equivalent_khcomplex64_t,
are_equivalent_khcomplex128_t,
kh_needed_n_buckets,
kh_python_hash_equal,
kh_python_hash_func,
kh_str_t,
khcomplex64_t,
khcomplex128_t,
khiter_t,
)
from pandas._libs.missing cimport checknull
def get_hashtable_trace_domain():
return KHASH_TRACE_DOMAIN
def object_hash(obj):
return kh_python_hash_func(obj)
def objects_are_equal(a, b):
return kh_python_hash_equal(a, b)
cdef int64_t NPY_NAT = util.get_nat()
SIZE_HINT_LIMIT = (1 << 20) + 7
cdef Py_ssize_t _INIT_VEC_CAP = 128
include "hashtable_class_helper.pxi"
include "hashtable_func_helper.pxi"
# map derived hash-map types onto basic hash-map types:
if np.dtype(np.intp) == np.dtype(np.int64):
IntpHashTable = Int64HashTable
unique_label_indices = _unique_label_indices_int64
elif np.dtype(np.intp) == np.dtype(np.int32):
IntpHashTable = Int32HashTable
unique_label_indices = _unique_label_indices_int32
else:
raise ValueError(np.dtype(np.intp))
cdef class Factorizer:
cdef readonly:
Py_ssize_t count
def __cinit__(self, size_hint: int):
self.count = 0
def get_count(self) -> int:
return self.count
cdef class ObjectFactorizer(Factorizer):
cdef public:
PyObjectHashTable table
ObjectVector uniques
def __cinit__(self, size_hint: int):
self.table = PyObjectHashTable(size_hint)
self.uniques = ObjectVector()
def factorize(
self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None
) -> np.ndarray:
"""
Returns
-------
np.ndarray[np.intp]
Examples
--------
Factorize values with nans replaced by na_sentinel
>>> fac = ObjectFactorizer(3)
>>> fac.factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
array([ 0, 1, 20])
"""
cdef:
ndarray[intp_t] labels
if self.uniques.external_view_exists:
uniques = ObjectVector()
uniques.extend(self.uniques.to_array())
self.uniques = uniques
labels = self.table.get_labels(values, self.uniques,
self.count, na_sentinel, na_value)
mask = (labels == na_sentinel)
# sort on
if sort:
sorter = self.uniques.to_array().argsort()
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
reverse_indexer.put(sorter, np.arange(len(sorter)))
labels = reverse_indexer.take(labels, mode='clip')
labels[mask] = na_sentinel
self.count = len(self.uniques)
return labels
cdef class Int64Factorizer(Factorizer):
cdef public:
Int64HashTable table
Int64Vector uniques
def __cinit__(self, size_hint: int):
self.table = Int64HashTable(size_hint)
self.uniques = Int64Vector()
def factorize(self, const int64_t[:] values, sort=False,
na_sentinel=-1, na_value=None) -> np.ndarray:
"""
Returns
-------
ndarray[intp_t]
Examples
--------
Factorize values with nans replaced by na_sentinel
>>> fac = Int64Factorizer(3)
>>> fac.factorize(np.array([1,2,3]), na_sentinel=20)
array([0, 1, 2])
"""
cdef:
ndarray[intp_t] labels
if self.uniques.external_view_exists:
uniques = Int64Vector()
uniques.extend(self.uniques.to_array())
self.uniques = uniques
labels = self.table.get_labels(values, self.uniques,
self.count, na_sentinel,
na_value=na_value)
# sort on
if sort:
sorter = self.uniques.to_array().argsort()
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
reverse_indexer.put(sorter, np.arange(len(sorter)))
labels = reverse_indexer.take(labels)
self.count = len(self.uniques)
return labels