From 78f57e3b8a058776cbcf43239ba2fe4b5d223db5 Mon Sep 17 00:00:00 2001 From: aschroed Date: Tue, 8 Oct 2024 15:30:26 -0400 Subject: [PATCH 1/3] updated opf status mismatch check to filter on tag; modified bed file search for bed2beddb to respect skip_processing tag if present --- CHANGELOG.rst | 9 +++++++ chalicelib_fourfront/checks/audit_checks.py | 29 ++++++++++++++++----- chalicelib_fourfront/checks/wfr_checks.py | 3 ++- pyproject.toml | 2 +- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d515778c..c5cd5840 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,15 @@ Change Log ---------- +4.9.5 +===== + +`PR 583: Update opf status mismatch check `_ + +* add a filter to filter on 'ignore_status_mismatch' tag on items (opfs, quality metrics or higlass_viewconfs) to ignore in opf status mismatch +* small update to search for bed2beddb files to respect the 'skip_processing' tag if present + + 4.9.4 ===== diff --git a/chalicelib_fourfront/checks/audit_checks.py b/chalicelib_fourfront/checks/audit_checks.py index b0dd1916..e56a78aa 100644 --- a/chalicelib_fourfront/checks/audit_checks.py +++ b/chalicelib_fourfront/checks/audit_checks.py @@ -734,6 +734,9 @@ def check_opf_status_mismatch(connection, **kwargs): ''' check = CheckResult(connection, 'check_opf_status_mismatch') + # list of uuids to filter out as they have a tag to ignore them + tagged2ignore = get_items_with_ignore_tags(connection.ff_keys) + opf_set = ('search/?type=ExperimentSet&other_processed_files.title%21=No+value&field=status' '&field=other_processed_files&field=experiments_in_set.other_processed_files') opf_exp = ('search/?type=ExperimentSet&other_processed_files.title=No+value' @@ -741,34 +744,46 @@ def check_opf_status_mismatch(connection, **kwargs): '&field=experiments_in_set.other_processed_files&field=status') opf_set_results = ff_utils.search_metadata(opf_set, key=connection.ff_keys) opf_exp_results = ff_utils.search_metadata(opf_exp, key=connection.ff_keys) - results = opf_set_results + opf_exp_results - # extract file uuids + results = opf_set_results + opf_exp_results # these are expset and expt items w/opfs + # extract all opf file and higlass viewconf uuids files = [] for result in results: if result.get('other_processed_files'): for case in result['other_processed_files']: - files.extend([i['uuid'] for i in case['files']]) + files.extend([i['uuid'] for i in case['files']]) # if i.get('uuid') not in tagged2ignore]) if case.get('higlass_view_config'): + # if case['higlass_view_config'].get('uuid') not in tagged2ignore: files.append(case['higlass_view_config'].get('uuid')) if result.get('experiments_in_set'): for exp in result['experiments_in_set']: for case in exp['other_processed_files']: - files.extend([i['uuid'] for i in case['files']]) - # get metadata for files, to collect status + files.extend([i['uuid'] for i in case['files']]) # if i.get('uuid') not in tagged2ignore]) + + # get metadata for files, to collect status resp = get_es_metadata(list(set(files)), sources=['links.quality_metric', 'object.status', 'uuid'], key=connection.ff_keys) + # key = opf uuid; value = status opf_status_dict = {item['uuid']: item['object']['status'] for item in resp if item['uuid'] in files} + + # key opf uuid; value = linked quality metric items opf_linked_dict = { item['uuid']: item.get('links', {}).get('quality_metric', []) for item in resp if item['uuid'] in files } + + # quality metric uuids quality_metrics = [uuid for item in resp for uuid in item.get('links', {}).get('quality_metric', [])] + + # get metadata for quality metrics (status) qm_resp = get_es_metadata(list(set(quality_metrics)), sources=['uuid', 'object.status'], key=connection.ff_keys) + + # key = qual met uuid; value = status opf_other_dict = {item['uuid']: item['object']['status'] for item in qm_resp if item not in files} + check.full_output = {} - for result in results: + for result in results: # now go through each expset or experiment again and make sure all the statuses agree hg_dict = {item['title']: item.get('higlass_view_config', {}).get('uuid') for item in result.get('other_processed_files', [])} titles = [item['title'] for item in result.get('other_processed_files', [])] @@ -782,7 +797,7 @@ def check_opf_status_mismatch(connection, **kwargs): file_list.extend([item for exp in result.get('experiments_in_set', []) for fileset in exp['other_processed_files'] for item in fileset['files'] if fileset['title'] == title]) - statuses = set([opf_status_dict[f['uuid']] for f in file_list]) + statuses = set([opf_status_dict[f['uuid']] for f in file_list if f.get('uuid') not in tagged2ignore]) # import pdb; pdb.set_trace() if not statuses: # to account for empty sections that may not yet contain files diff --git a/chalicelib_fourfront/checks/wfr_checks.py b/chalicelib_fourfront/checks/wfr_checks.py index d87d586a..bf1309db 100644 --- a/chalicelib_fourfront/checks/wfr_checks.py +++ b/chalicelib_fourfront/checks/wfr_checks.py @@ -674,7 +674,8 @@ def bed2beddb_status(connection, **kwargs): "&extra_files.file_format.display_title=beddb" "&extra_files.status=uploading" "&extra_files.status=to be uploaded by workflow" - "&status!=uploading&status!=to be uploaded by workflow") + "&status!=uploading&status!=to be uploaded by workflow" + "&tags!=skip_processing") # add date s_date = kwargs.get('start_date') if s_date: diff --git a/pyproject.toml b/pyproject.toml index a5ada40c..34fbe314 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "foursight" -version = "4.9.4" +version = "4.9.5" description = "Serverless Chalice Application for Monitoring" authors = ["4DN-DCIC Team "] license = "MIT" From 17de0d92a64810a01722f04917bde8cb2abec9d3 Mon Sep 17 00:00:00 2001 From: aschroed Date: Tue, 8 Oct 2024 15:44:21 -0400 Subject: [PATCH 2/3] update first query on bed2beddb check --- chalicelib_fourfront/checks/wfr_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chalicelib_fourfront/checks/wfr_checks.py b/chalicelib_fourfront/checks/wfr_checks.py index bf1309db..331e3a6a 100644 --- a/chalicelib_fourfront/checks/wfr_checks.py +++ b/chalicelib_fourfront/checks/wfr_checks.py @@ -654,11 +654,11 @@ def bed2beddb_status(connection, **kwargs): check, skip = wfr_utils.check_indexing(check, connection) if skip: return check - # Build the query (find bg files without bw files) + # Build the query (find bed files without beddb files) query = ("/search/?type=File&file_format.file_format=bed" "&extra_files.file_format.display_title!=beddb" "&status!=uploading&status!=to be uploaded by workflow" - "&status!=archived&status!=archived to project") + "&status!=archived&status!=archived to project&tags!=skip_processing") query += "".join(["&file_type=" + i for i in accepted_types]) # add date s_date = kwargs.get('start_date') From 56d310aa1dd02e7415a9c4160dfd6b043599d843 Mon Sep 17 00:00:00 2001 From: aschroed Date: Tue, 8 Oct 2024 16:26:25 -0400 Subject: [PATCH 3/3] removed commented out pdb --- chalicelib_fourfront/checks/audit_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chalicelib_fourfront/checks/audit_checks.py b/chalicelib_fourfront/checks/audit_checks.py index e56a78aa..0b1f096b 100644 --- a/chalicelib_fourfront/checks/audit_checks.py +++ b/chalicelib_fourfront/checks/audit_checks.py @@ -798,7 +798,6 @@ def check_opf_status_mismatch(connection, **kwargs): for fileset in exp['other_processed_files'] for item in fileset['files'] if fileset['title'] == title]) statuses = set([opf_status_dict[f['uuid']] for f in file_list if f.get('uuid') not in tagged2ignore]) - # import pdb; pdb.set_trace() if not statuses: # to account for empty sections that may not yet contain files pass