Source code for owmeta.bibtex_customizations
'''
`bibtexparser` customizations
'''
import re
HOWPUB_URL_RE = re.compile(r'\\url{([^}]+)}')
[docs]def customizations(record):
"""
Standard owmeta `bibtexparser` customizations
Includes: `url`, `note_url`, `doi`, `listify`, and `author`
Parameters
----------
record : dict
the record
Returns
-------
dict
the given `record` with any updates applied
"""
return url(note_url(doi(listify(author(record)))))
[docs]def listify_one(record, name):
'''
If the given field `name` does not have a `list` value, then updates the record by
turning that value into a list.
Parameters
----------
record : dict
The record to update
name : str
The name of the field to turn into a list
Returns
-------
dict
the given `record` with any updates applied
'''
if not isinstance(record[name], (list, tuple)):
record[name] = [record[name]]
elif isinstance(record[name], tuple):
record[name] = list(record[name])
return record
[docs]def listify(record):
'''
Turns every value in the record into a list except for ``ENTRYTYPE`` and ``ID``
'''
# Since some items can be multiples, it simplifies code in most places to
# just make everything a list, even if it cannot appear more than once in
# the properly formatted record.
for val in record:
if val not in ('ID', 'ENTRYTYPE'):
listify_one(record, val)
return record
[docs]def doi(record):
"""
Adds a doi URI to the record if there's a ``doi`` entry in the record
Parameters
----------
record : dict
the record to update
Returns
-------
dict
the given `record` with any updates applied
"""
doi = record.get('doi')
if doi is not None:
if 'link' not in record:
record['link'] = []
for item in record['link']:
if 'doi' in item:
break
else: # no break
if not isinstance(doi, (list, tuple)):
doi = [doi]
for link in doi:
if link.startswith('10'):
link = 'http://dx.doi.org/' + link
record['link'].append(link)
return record
[docs]def author(record):
"""
Split author field by the string 'and' into a list of names.
Parameters
----------
record : dict
the record
Returns
-------
dict
the given `record` with any updates applied
"""
if "author" in record:
if record["author"]:
record["author"] = [i.strip() for i in record["author"].replace('\n', ' ').split(" and ")]
else:
del record["author"]
return record
[docs]def note_url(record):
'''
Extracts URLs from ``note`` entries in the given record
Parameters
----------
record : dict
the record
Returns
-------
dict
the given `record` with any updates applied
'''
note = record.get('note')
if note is not None:
for n in note:
for u in HOWPUB_URL_RE.finditer(n):
url = record.get('url')
if url is None:
record['url'] = [u.group(1)]
else:
listify_one(record, 'url')['url'].append(u.group(1))
return record
[docs]def url(record):
r'''
Merges any URL from ``\url{...}`` in ``howpublished``, and any existing ``link`` or
``url`` values in the record and normalizes them into a `list` in the ``url`` field of
the record
Parameters
----------
record : dict
the record
Returns
-------
dict
the given `record` with any updates applied
'''
u = record.get('howpublished', '')
md = HOWPUB_URL_RE.match(u)
if md:
v = record.get('url')
if isinstance(v, tuple):
record['url'] = list(v)
if isinstance(v, list):
v.append(md[1])
url = record.get('url')
link = record.get('link')
if url is None:
if isinstance(link, tuple):
record['url'] = list(link)
elif isinstance(link, list):
record['url'] = link
elif link is not None:
record['url'] = [link]
return record
if isinstance(url, tuple):
url = list(url)
record['url'] = url
if isinstance(url, list):
if isinstance(link, (list, tuple)):
url.extend(link)
elif link is not None:
url.append(link)
return record