Skip to content

Commit

Permalink
update notes
Browse files Browse the repository at this point in the history
  • Loading branch information
geritwagner committed Jan 27, 2025
1 parent 33c4771 commit b114d2d
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions labot/notes.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,33 @@ def import_missing_references(missing_references: list, references: dict) -> Non
new_record_object = local_index.retrieve_based_on_colrev_pdf_id(
colrev_pdf_id=colrev_pdf_id
)
retrieved_record = new_record_object
retrieved_record_dict = new_record_object.data

except colrev_exceptions.RecordNotInIndexException:

tei = colrev.env.tei_parser.TEIParser(
environment_manager=environment_manager,
pdf_path=pdf_path,
)
retrieved_record = tei.get_metadata()
if "doi" in retrieved_record:
retrieved_record = api.query_doi(doi=retrieved_record["doi"])
retrieved_record_dict = tei.get_metadata()
if "doi" in retrieved_record_dict:
retrieved_record_dict = api.query_doi(
doi=retrieved_record_dict["doi"]
).data

# remove all fields starting with "colrev_"
retrieved_record_dict = {
key: value
for key, value in retrieved_record_dict.items()
if not key.startswith("colrev")
}
# also remove curation_ID and language
retrieved_record_dict.pop("curation_ID", None)
retrieved_record_dict.pop("language", None)

# TODO/TBD: rename pdf? update key?
if missing_reference not in references:
references[missing_reference] = retrieved_record
references[missing_reference] = retrieved_record_dict
else:
print(f"Reference {missing_reference} already exists in references")
continue
Expand Down

0 comments on commit b114d2d

Please sign in to comment.