4dn-dcic · clarabakker · Mar 21, 2024 · Mar 13, 2024 · Mar 13, 2024 · Mar 14, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,6 +7,14 @@ foursight
 Change Log
 ----------
 
+4.4.3
+=====
+* Add helper to convert user input str to list for select queries in higlass_checks.py
+* Adjust output of check_validation_errors check to list affected items by type in full_output if not too many
+* update lock file to use foursight-core with bug fix for local-check-execution script
+
+`PR 564: Improved handling of user query for higlass items <https://github.com/4dn-dcic/foursight/pull/564>`_
+
 4.4.2
 =====
 * Added 'input_bed' to attr_keys in wfr_utils.py's start_missing_run for ATAC-seq pipeline

diff --git a/chalicelib_fourfront/checks/audit_checks.py b/chalicelib_fourfront/checks/audit_checks.py
@@ -781,17 +781,24 @@ def check_validation_errors(connection, **kwargs):
     returns link to search if found.
     '''
     check = CheckResult(connection, 'check_validation_errors')
-
+    
     search_url = 'search/?validation_errors.name!=No+value&type=Item'
     results = ff_utils.search_metadata(search_url + '&field=@id', key=connection.ff_keys)
     if results:
-        types = {item for result in results for item in result['@type'] if item != 'Item'}
+        ids_by_type = {}
+        for result in results:
+            ids_by_type.setdefault(result.get('@type')[0], []).append(result.get('@id'))
         check.status = 'WARN'
         check.summary = 'Validation errors found'
         check.description = ('{} items found with validation errors, comprising the following '
                              'item types: {}. \nFor search results see link below.'.format(
-                                 len(results), ', '.join(list(types))))
+                                 len(results), ', '.join(ids_by_type.keys())))
         check.ff_link = connection.ff_server + search_url
+        # too many items of a type suggests a possibly general issue for that type
+        for ty, item_ids in ids_by_type.items():
+            if len(item_ids) > 100:
+                ids_by_type[ty] = 'Many items of this type have validation errors'
+        check.full_output = ids_by_type
     else:
         check.status = 'PASS'
         check.summary = 'No validation errors'

diff --git a/chalicelib_fourfront/checks/higlass_checks.py b/chalicelib_fourfront/checks/higlass_checks.py
@@ -354,12 +354,7 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
     check.queries = []
     check.action = action_name
 
-    # If no search query was provided, fail
-    if not search_queries:
-        check.summary = check.description = "No search query provided, nothing to update."
-        check.status = 'PASS'
-        check.allow_action = False
-        return check
+    search_queries = verify_queries(check, search_queries)
 
     # Add the fields we want to return.
     fields_to_include = '&field=' + '&field='.join((
@@ -385,7 +380,7 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
         file_search_query = "/search/?type=File&higlass_uid!=No+value&genome_assembly!=No+value" + query + fields_to_include
 
         # Query the files
-        search_res = ff_utils.search_metadata(file_search_query, key=connection.ff_keys)
+        search_res = get_search_results(connection, check, file_search_query)
 
         # Collate the results into a dict of ExpSets, ordered by accession
         for found_file in search_res:
@@ -430,11 +425,20 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
             check.full_output[full_output_key] = {}
         check.full_output[full_output_key][ga] = target_files_by_ga[ga]
 
+    if check.full_output.get('search_problems'):
+        check.summary = "Please check input query for typo or formatting error"
+        check.description = check.summary + ". See full_output for details."
+        check.allow_action = False
+        check.status = 'FAIL'
+        return check
+
     if not target_files_by_ga:
         # nothing new to generate
         check.summary = check.description = "No new Higlass Items to generate"
         check.allow_action = False
         check.status = 'PASS'
+        return check
+
     else:
         all_files = sum([len(x) for x in check.full_output["ready"].values()])
         check.summary = "Ready to generate %s Higlass Items" % all_files
@@ -827,12 +831,7 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
         "static_content",
     ])
 
-    # If no search query was provided, fail
-    if not search_queries:
-        check.summary = check.description = "No search query provided, nothing to update."
-        check.status = 'PASS'
-        check.allow_action = False
-        return check
+    search_queries = verify_queries(check, search_queries)
 
     expsets_by_accession = {}
     # Use all of the search queries to make a list of the ExpSets we will work on.
@@ -844,7 +843,7 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
         processed_expsets_query = "/search/?type=ExperimentSetReplicate" + query + fields_to_include
 
         # Query the Experiment Sets
-        search_res = ff_utils.search_metadata(processed_expsets_query, key=connection.ff_keys)
+        search_res = get_search_results(connection, check, processed_expsets_query)
 
         # Collate the results into a dict of ExpSets, ordered by accession
         for expset in search_res:
@@ -897,6 +896,14 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
         ready_to_generate_count = sum([len(accessions) for x, accessions in check.full_output["ready_expsets"].items()])
 
     check.summary = ""
+
+    if check.full_output.get('search_problems'):
+        check.summary = "Please check input query for typo or formatting error"
+        check.description = check.summary + ". See full_output for details."
+        check.allow_action = False
+        check.status = 'FAIL'
+        return check
+
     # If there are no files to act upon, we're done.
     if not target_files_by_ga:
         check.summary = check.description = "No new view configs to generate"
@@ -1177,19 +1184,15 @@ def find_expsets_otherprocessedfiles_requiring_higlass_items(connection, check_n
     check.queries = []
     check.action = action_name
 
-    # If no search query was provided and find_opfs_missing_higlass is False, pass with no results
-    if not (search_queries or find_opfs_missing_higlass):
-        check.summary = check.description = "No search query provided, nothing to update."
-        check.status = 'PASS'
-        check.allow_action = False
-        return check
-
     if find_opfs_missing_higlass:
         search_queries = [
             "&experiments_in_set.other_processed_files.files.higlass_uid%21=No+value",
             "&other_processed_files.files.higlass_uid%21=No+value"
         ]
 
+    else:
+        search_queries = verify_queries(check, search_queries)
+
     # get the fields you need to include
     fields_to_include = ""
     for new_field in (
@@ -1216,7 +1219,7 @@ def find_expsets_otherprocessedfiles_requiring_higlass_items(connection, check_n
         expset_query = "/search/?type=ExperimentSetReplicate" + query + fields_to_include
 
         # Store results by accession
-        search_res = ff_utils.search_metadata(expset_query, key=connection.ff_keys)
+        search_res = get_search_results(connection, check, expset_query)
         for expset in search_res:
             expsets_by_accession[ expset["accession"] ] = expset
 
@@ -1368,6 +1371,13 @@ def consider_filegroup(fg):
             all_filegroups_to_update[accession] = filegroups_to_update
             higlass_view_count += len(filegroups_to_update.keys())
 
+    if check.full_output.get('search_problems'):
+        check.summary = "Please check input query for typo or formatting error"
+        check.description = check.summary + ". See full_output for details."
+        check.allow_action = False
+        check.status = 'FAIL'
+        return check
+
     # check announces success
     check.full_output['filegroups_to_update'] = all_filegroups_to_update
     check.full_output['expsets_to_update'] = expsets_to_update
@@ -1697,7 +1707,7 @@ def does_url_exist(path):
             continue
 
         # Query all possible files
-        possibly_reg = ff_utils.search_metadata(search_query, key=connection.ff_keys)
+        possibly_reg = get_search_results(connection, check, search_query)
 
         for procfile in possibly_reg:
             current_file_cat = file_cat
@@ -2287,3 +2297,40 @@ def convert_es_timestamp_to_datetime(raw):
         "%Y-%m-%dT%H:%M:%S"
     )
     return converted_date
+
+def verify_queries(check, search_queries):
+    """
+    Helper to check that a search query if properly formatted and reformat if necessary
+
+    Args:
+        check(CheckResult): Result of check, to be passed from check.
+        search_queries(list or string): A list of search queries. All Files found in at least one of the queries will be modified.
+
+    Returns:
+        Formatted search_queries (list)
+    """
+    # If no search query was provided, pass with no results
+    if not (search_queries):
+        check.summary = check.description = "No search query provided, nothing to update."
+        check.status = 'PASS'
+        check.allow_action = False
+
+    if isinstance(search_queries, str):
+        # for case where (possibly multiple) query is passed in via kwargs
+        queries = search_queries.split(',')
+        search_queries = [q.strip() for q in queries]
+        check.brief_output = {
+            "corrected_query": "The query was not formatted as a list, please double check results"
+        }
+
+    # add '&' when missing from str
+    search_queries = [q if q[0] == '&' else '&' + q for q in search_queries]
+
+    return search_queries
+
+def get_search_results(connection, check, search_query):
+    try:
+        return ff_utils.search_metadata(search_query, key=connection.ff_keys)
+    except Exception as e:
+        check.full_output.setdefault('search_problems', []).append(str(e))
+        return []