Skip to content

Commit

Permalink
Merge pull request #564 from 4dn-dcic/higlass_man_update_pf
Browse files Browse the repository at this point in the history
Improved handling of user query for higlass items
  • Loading branch information
clarabakker authored Mar 21, 2024
2 parents 672d3ac + f8e3a2a commit b456079
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 186 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ foursight
Change Log
----------

4.4.3
=====
* Add helper to convert user input str to list for select queries in higlass_checks.py
* Adjust output of check_validation_errors check to list affected items by type in full_output if not too many
* update lock file to use foursight-core with bug fix for local-check-execution script

`PR 564: Improved handling of user query for higlass items <https://github.com/4dn-dcic/foursight/pull/564>`_

4.4.2
=====
* Added 'input_bed' to attr_keys in wfr_utils.py's start_missing_run for ATAC-seq pipeline
Expand Down
13 changes: 10 additions & 3 deletions chalicelib_fourfront/checks/audit_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,17 +781,24 @@ def check_validation_errors(connection, **kwargs):
returns link to search if found.
'''
check = CheckResult(connection, 'check_validation_errors')

search_url = 'search/?validation_errors.name!=No+value&type=Item'
results = ff_utils.search_metadata(search_url + '&field=@id', key=connection.ff_keys)
if results:
types = {item for result in results for item in result['@type'] if item != 'Item'}
ids_by_type = {}
for result in results:
ids_by_type.setdefault(result.get('@type')[0], []).append(result.get('@id'))
check.status = 'WARN'
check.summary = 'Validation errors found'
check.description = ('{} items found with validation errors, comprising the following '
'item types: {}. \nFor search results see link below.'.format(
len(results), ', '.join(list(types))))
len(results), ', '.join(ids_by_type.keys())))
check.ff_link = connection.ff_server + search_url
# too many items of a type suggests a possibly general issue for that type
for ty, item_ids in ids_by_type.items():
if len(item_ids) > 100:
ids_by_type[ty] = 'Many items of this type have validation errors'
check.full_output = ids_by_type
else:
check.status = 'PASS'
check.summary = 'No validation errors'
Expand Down
93 changes: 70 additions & 23 deletions chalicelib_fourfront/checks/higlass_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,7 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
check.queries = []
check.action = action_name

# If no search query was provided, fail
if not search_queries:
check.summary = check.description = "No search query provided, nothing to update."
check.status = 'PASS'
check.allow_action = False
return check
search_queries = verify_queries(check, search_queries)

# Add the fields we want to return.
fields_to_include = '&field=' + '&field='.join((
Expand All @@ -385,7 +380,7 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
file_search_query = "/search/?type=File&higlass_uid!=No+value&genome_assembly!=No+value" + query + fields_to_include

# Query the files
search_res = ff_utils.search_metadata(file_search_query, key=connection.ff_keys)
search_res = get_search_results(connection, check, file_search_query)

# Collate the results into a dict of ExpSets, ordered by accession
for found_file in search_res:
Expand Down Expand Up @@ -430,11 +425,20 @@ def find_files_requiring_higlass_items(connection, check_name, action_name, sear
check.full_output[full_output_key] = {}
check.full_output[full_output_key][ga] = target_files_by_ga[ga]

if check.full_output.get('search_problems'):
check.summary = "Please check input query for typo or formatting error"
check.description = check.summary + ". See full_output for details."
check.allow_action = False
check.status = 'FAIL'
return check

if not target_files_by_ga:
# nothing new to generate
check.summary = check.description = "No new Higlass Items to generate"
check.allow_action = False
check.status = 'PASS'
return check

else:
all_files = sum([len(x) for x in check.full_output["ready"].values()])
check.summary = "Ready to generate %s Higlass Items" % all_files
Expand Down Expand Up @@ -827,12 +831,7 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
"static_content",
])

# If no search query was provided, fail
if not search_queries:
check.summary = check.description = "No search query provided, nothing to update."
check.status = 'PASS'
check.allow_action = False
return check
search_queries = verify_queries(check, search_queries)

expsets_by_accession = {}
# Use all of the search queries to make a list of the ExpSets we will work on.
Expand All @@ -844,7 +843,7 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
processed_expsets_query = "/search/?type=ExperimentSetReplicate" + query + fields_to_include

# Query the Experiment Sets
search_res = ff_utils.search_metadata(processed_expsets_query, key=connection.ff_keys)
search_res = get_search_results(connection, check, processed_expsets_query)

# Collate the results into a dict of ExpSets, ordered by accession
for expset in search_res:
Expand Down Expand Up @@ -897,6 +896,14 @@ def find_expsets_processedfiles_requiring_higlass_items(connection, check_name,
ready_to_generate_count = sum([len(accessions) for x, accessions in check.full_output["ready_expsets"].items()])

check.summary = ""

if check.full_output.get('search_problems'):
check.summary = "Please check input query for typo or formatting error"
check.description = check.summary + ". See full_output for details."
check.allow_action = False
check.status = 'FAIL'
return check

# If there are no files to act upon, we're done.
if not target_files_by_ga:
check.summary = check.description = "No new view configs to generate"
Expand Down Expand Up @@ -1177,19 +1184,15 @@ def find_expsets_otherprocessedfiles_requiring_higlass_items(connection, check_n
check.queries = []
check.action = action_name

# If no search query was provided and find_opfs_missing_higlass is False, pass with no results
if not (search_queries or find_opfs_missing_higlass):
check.summary = check.description = "No search query provided, nothing to update."
check.status = 'PASS'
check.allow_action = False
return check

if find_opfs_missing_higlass:
search_queries = [
"&experiments_in_set.other_processed_files.files.higlass_uid%21=No+value",
"&other_processed_files.files.higlass_uid%21=No+value"
]

else:
search_queries = verify_queries(check, search_queries)

# get the fields you need to include
fields_to_include = ""
for new_field in (
Expand All @@ -1216,7 +1219,7 @@ def find_expsets_otherprocessedfiles_requiring_higlass_items(connection, check_n
expset_query = "/search/?type=ExperimentSetReplicate" + query + fields_to_include

# Store results by accession
search_res = ff_utils.search_metadata(expset_query, key=connection.ff_keys)
search_res = get_search_results(connection, check, expset_query)
for expset in search_res:
expsets_by_accession[ expset["accession"] ] = expset

Expand Down Expand Up @@ -1368,6 +1371,13 @@ def consider_filegroup(fg):
all_filegroups_to_update[accession] = filegroups_to_update
higlass_view_count += len(filegroups_to_update.keys())

if check.full_output.get('search_problems'):
check.summary = "Please check input query for typo or formatting error"
check.description = check.summary + ". See full_output for details."
check.allow_action = False
check.status = 'FAIL'
return check

# check announces success
check.full_output['filegroups_to_update'] = all_filegroups_to_update
check.full_output['expsets_to_update'] = expsets_to_update
Expand Down Expand Up @@ -1697,7 +1707,7 @@ def does_url_exist(path):
continue

# Query all possible files
possibly_reg = ff_utils.search_metadata(search_query, key=connection.ff_keys)
possibly_reg = get_search_results(connection, check, search_query)

for procfile in possibly_reg:
current_file_cat = file_cat
Expand Down Expand Up @@ -2287,3 +2297,40 @@ def convert_es_timestamp_to_datetime(raw):
"%Y-%m-%dT%H:%M:%S"
)
return converted_date

def verify_queries(check, search_queries):
"""
Helper to check that a search query if properly formatted and reformat if necessary
Args:
check(CheckResult): Result of check, to be passed from check.
search_queries(list or string): A list of search queries. All Files found in at least one of the queries will be modified.
Returns:
Formatted search_queries (list)
"""
# If no search query was provided, pass with no results
if not (search_queries):
check.summary = check.description = "No search query provided, nothing to update."
check.status = 'PASS'
check.allow_action = False

if isinstance(search_queries, str):
# for case where (possibly multiple) query is passed in via kwargs
queries = search_queries.split(',')
search_queries = [q.strip() for q in queries]
check.brief_output = {
"corrected_query": "The query was not formatted as a list, please double check results"
}

# add '&' when missing from str
search_queries = [q if q[0] == '&' else '&' + q for q in search_queries]

return search_queries

def get_search_results(connection, check, search_query):
try:
return ff_utils.search_metadata(search_query, key=connection.ff_keys)
except Exception as e:
check.full_output.setdefault('search_problems', []).append(str(e))
return []
Loading

0 comments on commit b456079

Please sign in to comment.