You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
19 lines
628 B
19 lines
628 B
'''
|
|
This class describes big chunks of text that may contain date strings
|
|
Each chunk includes of one of more tokens
|
|
Each token is build upon DATE_REGEX matches
|
|
'''
|
|
|
|
|
|
class DateFragment:
|
|
def __init__(self):
|
|
self.match_str = ''
|
|
self.indices = (0, 0)
|
|
self.captures = {}
|
|
|
|
def __repr__(self):
|
|
str_capt = ', '.join(['"{}": [{}]'.format(c, self.captures[c]) for c in self.captures])
|
|
return '{} [{}, {}]\nCaptures: {}'.format(self.match_str, self.indices[0], self.indices[1], str_capt)
|
|
|
|
def get_captures_count(self):
|
|
return sum([len(self.captures[m]) for m in self.captures])
|
|
|