Skip to content

Commit eb001e9

Browse files
authored
Merge pull request #128 from rinkstiekema/feature/parse-initials
Feature: Add first and middle name(s) initials
2 parents d498968 + 377bea2 commit eb001e9

4 files changed

Lines changed: 288 additions & 111 deletions

File tree

docs/usage.rst

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,41 @@ Don't want to include nicknames in your output? No problem. Just omit that keywo
176176
'Dr. Juan de la Vega'
177177

178178

179+
Initials Support
180+
----------------
181+
182+
The HumanName class can try to get the correct representation of initials.
183+
Initials can be tricky as different format usages exist.
184+
If you want to exclude on of the name parts from the initials, you can use the initials format by chainging
185+
:py:attr:`~nameparser.config.Constants.initials_format`
186+
Three attributes exist for the format, `first`, `middle` and `last`.
187+
188+
.. doctest:: initials format
189+
190+
>>> from nameparser.config import CONSTANTS
191+
>>> CONSTANTS.initials_format = "{first} {middle}"
192+
>>> HumanName("Doe, John A. Kenneth, Jr.").initials()
193+
'J. A. K.'
194+
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{last}, {first}).initials()
195+
'D., J.'
196+
197+
198+
Furthermore, the delimiter for the string output can be set through:
199+
:py:attr:`~nameparser.config.Constants.initials_delimiter`
200+
201+
.. doctest:: initials delimiter
202+
203+
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials()
204+
"J; A; K;"
205+
>>> from nameparser.config import CONSTANTS
206+
>>> CONSTANTS.initials_delimiter = "."
207+
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first}{middle}{last}).initials()
208+
"J.A.K.D."
209+
210+
If you want to receive a list representation of the initials, yo ucan use :py:meth:`~nameparser.HumanName.initials_list`.
211+
This function is unaffected by :py:attr:`~nameparser.config.Constants.initials_format`
212+
213+
.. doctest:: list format
214+
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials_list()
215+
["J", "A", "K", "D"]
216+

nameparser/config/__init__.py

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -49,35 +49,37 @@
4949

5050
DEFAULT_ENCODING = 'UTF-8'
5151

52+
5253
class SetManager(Set):
5354
'''
5455
Easily add and remove config variables per module or instance. Subclass of
5556
``collections.abc.Set``.
56-
57+
5758
Only special functionality beyond that provided by set() is
5859
to normalize constants for comparison (lower case, no periods)
5960
when they are add()ed and remove()d and allow passing multiple
6061
string arguments to the :py:func:`add()` and :py:func:`remove()` methods.
61-
62+
6263
'''
64+
6365
def __init__(self, elements):
6466
self.elements = set(elements)
65-
67+
6668
def __call__(self):
6769
return self.elements
68-
70+
6971
def __repr__(self):
70-
return "SetManager({})".format(self.elements) # used for docs
71-
72+
return "SetManager({})".format(self.elements) # used for docs
73+
7274
def __iter__(self):
7375
return iter(self.elements)
74-
76+
7577
def __contains__(self, value):
7678
return value in self.elements
77-
79+
7880
def __len__(self):
7981
return len(self.elements)
80-
82+
8183
def next(self):
8284
return self.__next__()
8385

@@ -89,7 +91,7 @@ def __next__(self):
8991
c = self.count
9092
self.count = c + 1
9193
return getattr(self, self.elements[c]) or next(self)
92-
94+
9395
def add_with_encoding(self, s, encoding=None):
9496
"""
9597
Add the lower case and no-period version of the string to the set. Pass an
@@ -111,7 +113,7 @@ def add(self, *strings):
111113
"""
112114
[self.add_with_encoding(s) for s in strings]
113115
return self
114-
116+
115117
def remove(self, *strings):
116118
"""
117119
Remove the lower case and no-period version of the string arguments from the set.
@@ -126,10 +128,11 @@ class TupleManager(dict):
126128
A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants
127129
more friendly.
128130
'''
131+
129132
def __getattr__(self, attr):
130133
return self.get(attr)
131-
__setattr__= dict.__setitem__
132-
__delattr__= dict.__delitem__
134+
__setattr__ = dict.__setitem__
135+
__delattr__ = dict.__delitem__
133136

134137
def __getstate__(self):
135138
return dict(self)
@@ -140,6 +143,7 @@ def __setstate__(self, state):
140143
def __reduce__(self):
141144
return (TupleManager, (), self.__getstate__())
142145

146+
143147
class Constants(object):
144148
"""
145149
An instance of this class hold all of the configuration constants for the parser.
@@ -163,11 +167,23 @@ class Constants(object):
163167
:param regexes:
164168
:py:attr:`regexes` wrapped with :py:class:`TupleManager`.
165169
"""
166-
170+
167171
string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
168172
"""
169173
The default string format use for all new `HumanName` instances.
170174
"""
175+
176+
initials_format = "{first} {middle} {last}"
177+
"""
178+
The default initials format used for all new `HumanName` instances.
179+
"""
180+
181+
initials_delimiter = "."
182+
"""
183+
The default initials delimiter used for all new `HumanName` instances.
184+
Will be used to add a delimiter between each initial.
185+
"""
186+
171187
empty_attribute_default = ''
172188
"""
173189
Default return value for empty attributes.
@@ -183,6 +199,7 @@ class Constants(object):
183199
'John'
184200
185201
"""
202+
186203
capitalize_name = False
187204
"""
188205
If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to
@@ -197,6 +214,7 @@ class Constants(object):
197214
'Bob V. de la MacDole-Eisenhower Ph.D.'
198215
199216
"""
217+
200218
force_mixed_case_capitalization = False
201219
"""
202220
If set, forces the capitalization of mixed case strings when
@@ -213,27 +231,26 @@ class Constants(object):
213231
214232
"""
215233

216-
217-
def __init__(self,
218-
prefixes=PREFIXES,
219-
suffix_acronyms=SUFFIX_ACRONYMS,
220-
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
221-
titles=TITLES,
222-
first_name_titles=FIRST_NAME_TITLES,
223-
conjunctions=CONJUNCTIONS,
224-
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
225-
regexes=REGEXES
226-
):
227-
self.prefixes = SetManager(prefixes)
228-
self.suffix_acronyms = SetManager(suffix_acronyms)
234+
def __init__(self,
235+
prefixes=PREFIXES,
236+
suffix_acronyms=SUFFIX_ACRONYMS,
237+
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
238+
titles=TITLES,
239+
first_name_titles=FIRST_NAME_TITLES,
240+
conjunctions=CONJUNCTIONS,
241+
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
242+
regexes=REGEXES
243+
):
244+
self.prefixes = SetManager(prefixes)
245+
self.suffix_acronyms = SetManager(suffix_acronyms)
229246
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
230-
self.titles = SetManager(titles)
231-
self.first_name_titles = SetManager(first_name_titles)
232-
self.conjunctions = SetManager(conjunctions)
247+
self.titles = SetManager(titles)
248+
self.first_name_titles = SetManager(first_name_titles)
249+
self.conjunctions = SetManager(conjunctions)
233250
self.capitalization_exceptions = TupleManager(capitalization_exceptions)
234-
self.regexes = TupleManager(regexes)
251+
self.regexes = TupleManager(regexes)
235252
self._pst = None
236-
253+
237254
@property
238255
def suffixes_prefixes_titles(self):
239256
if not self._pst:
@@ -242,15 +259,16 @@ def suffixes_prefixes_titles(self):
242259

243260
def __repr__(self):
244261
return "<Constants() instance>"
245-
262+
246263
def __setstate__(self, state):
247264
self.__init__(state)
248-
265+
249266
def __getstate__(self):
250267
attrs = [x for x in dir(self) if not x.startswith('_')]
251-
return dict([(a,getattr(self, a)) for a in attrs])
268+
return dict([(a, getattr(self, a)) for a in attrs])
269+
252270

253-
#: A module-level instance of the :py:class:`Constants()` class.
271+
#: A module-level instance of the :py:class:`Constants()` class.
254272
#: Provides a common instance for the module to share
255273
#: to easily adjust configuration for the entire module.
256274
#: See `Customizing the Parser with Your Own Configuration <customize.html>`_.

0 commit comments

Comments
 (0)