Source code for owmeta.bibtex_customizations

'''
`bibtexparser` customizations
'''
import re


HOWPUB_URL_RE = re.compile(r'\\url{([^}]+)}')


[docs]def customizations(record): """ Standard owmeta `bibtexparser` customizations Includes: `url`, `note_url`, `doi`, `listify`, and `author` Parameters ---------- record : dict the record Returns ------- dict the given `record` with any updates applied """ return url(note_url(doi(listify(author(record)))))
[docs]def listify_one(record, name): ''' If the given field `name` does not have a `list` value, then updates the record by turning that value into a list. Parameters ---------- record : dict The record to update name : str The name of the field to turn into a list Returns ------- dict the given `record` with any updates applied ''' if not isinstance(record[name], (list, tuple)): record[name] = [record[name]] elif isinstance(record[name], tuple): record[name] = list(record[name]) return record
[docs]def listify(record): ''' Turns every value in the record into a list except for ``ENTRYTYPE`` and ``ID`` ''' # Since some items can be multiples, it simplifies code in most places to # just make everything a list, even if it cannot appear more than once in # the properly formatted record. for val in record: if val not in ('ID', 'ENTRYTYPE'): listify_one(record, val) return record
[docs]def doi(record): """ Adds a doi URI to the record if there's a ``doi`` entry in the record Parameters ---------- record : dict the record to update Returns ------- dict the given `record` with any updates applied """ doi = record.get('doi') if doi is not None: if 'link' not in record: record['link'] = [] for item in record['link']: if 'doi' in item: break else: # no break if not isinstance(doi, (list, tuple)): doi = [doi] for link in doi: if link.startswith('10'): link = 'http://dx.doi.org/' + link record['link'].append(link) return record
[docs]def author(record): """ Split author field by the string 'and' into a list of names. Parameters ---------- record : dict the record Returns ------- dict the given `record` with any updates applied """ if "author" in record: if record["author"]: record["author"] = [i.strip() for i in record["author"].replace('\n', ' ').split(" and ")] else: del record["author"] return record
[docs]def note_url(record): ''' Extracts URLs from ``note`` entries in the given record Parameters ---------- record : dict the record Returns ------- dict the given `record` with any updates applied ''' note = record.get('note') if note is not None: for n in note: for u in HOWPUB_URL_RE.finditer(n): url = record.get('url') if url is None: record['url'] = [u.group(1)] else: listify_one(record, 'url')['url'].append(u.group(1)) return record
[docs]def url(record): r''' Merges any URL from ``\url{...}`` in ``howpublished``, and any existing ``link`` or ``url`` values in the record and normalizes them into a `list` in the ``url`` field of the record Parameters ---------- record : dict the record Returns ------- dict the given `record` with any updates applied ''' u = record.get('howpublished', '') md = HOWPUB_URL_RE.match(u) if md: v = record.get('url') if isinstance(v, tuple): record['url'] = list(v) if isinstance(v, list): v.append(md[1]) url = record.get('url') link = record.get('link') if url is None: if isinstance(link, tuple): record['url'] = list(link) elif isinstance(link, list): record['url'] = link elif link is not None: record['url'] = [link] return record if isinstance(url, tuple): url = list(url) record['url'] = url if isinstance(url, list): if isinstance(link, (list, tuple)): url.extend(link) elif link is not None: url.append(link) return record