Skip to content

Commit

Permalink
Merge pull request #560 from 4dn-dcic/ajs_fix_biorxv_upd
Browse files Browse the repository at this point in the history
Bug fix for failing biorxiv update check
  • Loading branch information
aschroed authored Feb 7, 2024
2 parents 3630f80 + ef62c6c commit fee8385
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ foursight
Change Log
----------

4.2.2
=====

* modification of the biorxiv update check to squash a bug
* if a doi is misformatted or contains an unwanted v# in it they are reported

`PR 560: Fix for biorxiv version update check bug <https://github.com/4dn-dcic/foursight/pull/560>`_

4.2.1
=====

Expand Down
36 changes: 33 additions & 3 deletions chalicelib_fourfront/checks/wrangler_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,21 @@ def add_pub_and_replace_biorxiv(connection, **kwargs):
return action


def _remove_prefix_and_version_suffix_from_doi(pub_doi_id):
misformatted = False
try:
doi = pub_doi_id.split(':', 1)[1]
except Exception:
misformatted = True
return None, misformatted
pattern = re.compile(r'v\d+\s*$')
match = pattern.search(doi)
if match:
misformatted = True
doi = doi[:match.start()]
return doi, misformatted


@check_function(action="reindex_biorxiv")
def biorxiv_version_update(connection, **kwargs):
'''Check if current bioRxiv Publications (not yet replaced with PubmedID)
Expand All @@ -512,12 +527,18 @@ def biorxiv_version_update(connection, **kwargs):
current_biorxivs = ff_utils.search_metadata(query, key=connection.ff_keys)

items_to_update = []
problem_ids = []
biorxiv_api = 'https://api.biorxiv.org/details/biorxiv/'
for publication in current_biorxivs:
if not publication['ID'].startswith('doi:'):
pubid = publication.get('ID')
if not pubid.startswith('doi:'):
continue
doi, misformatted = _remove_prefix_and_version_suffix_from_doi(pubid)
if misformatted:
problem_ids.append(pubid)
if not doi:
continue
doi = publication['ID'].split(':')[1]
for count in range(5): # try fetching data a few times
for _ in range(5): # try fetching data a few times
r = requests.get(biorxiv_api + doi)
if r.status_code == 200:
break
Expand All @@ -540,6 +561,15 @@ def biorxiv_version_update(connection, **kwargs):
else:
check.status = 'PASS'
check.summary = check.description = 'All current bioRxiv Publications are up to date'

if problem_ids:
check.status = 'WARN'
check.summary = check.summary + f"There are {len(problem_ids)} misformatted or problematic doi pub IDs"
prob_out = {'problem_ids': problem_ids}
if check.brief_output:
check.brief_output.append(prob_out)
else:
check.brief_output = [prob_out]
return check


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "foursight"
version = "4.2.1"
version = "4.2.2"
description = "Serverless Chalice Application for Monitoring"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit fee8385

Please sign in to comment.