Skip to content

Commit

Permalink
Merge pull request #6 from MLD3/icd_processing_2022
Browse files Browse the repository at this point in the history
Fix ICD processing issue
  • Loading branch information
shengpu-tang authored Feb 17, 2022
2 parents a8e40d9 + e626322 commit 86b197f
Show file tree
Hide file tree
Showing 10 changed files with 12,585 additions and 334 deletions.
2 changes: 1 addition & 1 deletion FIDDLE/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def get_frequent_numeric_variables(df_time_series, variables, threshold, args):
T, dt = args.T, args.dt

df_types = pd.read_csv(output_dir + 'value_types.csv').set_index(var_col)['value_type']
numeric_vars = [col for col in variables if df_types[col] == 'Numeric']
numeric_vars = [col for col in variables if df_types.get(col) == 'Numeric']
df_num_counts = calculate_variable_counts(df_time_series, df_population)[numeric_vars] #gets the count of each variable for each patient.
variables_num_freq = df_num_counts.columns[df_num_counts.mean() >= threshold * np.floor(T/dt)]
return variables_num_freq
Expand Down
7 changes: 4 additions & 3 deletions FIDDLE/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def parse_variable_data_type(df_data, args):
if len(var_names) == 0: # No hierarchical values
pass
else:
print('Parsing hierarchical values')
for var_name in var_names:
var_type = args.value_type_override[var_name]
df_var = df.loc[df[var_col] == var_name, val_col]
Expand Down Expand Up @@ -143,12 +144,12 @@ def parse_variable_data_type(df_data, args):
var_names = [v for v, ty in data_types if 'numeric' not in ty.lower() and 'none' not in ty.lower()]
df_non_num = df[df[var_col].isin(var_names)].copy()
dup_ = df_non_num.duplicated(subset=[ID_col, t_col, var_col], keep=False)
df_non_num_dup = df_non_num[dup_]
df_non_num_dup = df_non_num[dup_].copy()
dup_var_names = df_non_num_dup[var_col].unique()
df_non_num_dup[var_col] = df_non_num_dup[var_col].astype(str) + ':' + df_non_num_dup[val_col].astype(str)
df_non_num_dup[val_col] = 1
df_non_num[dup_] = df_non_num_dup
df[df[var_col].isin(var_names)] = df_non_num
df_non_num.loc[dup_, :] = df_non_num_dup
df.loc[df[var_col].isin(var_names), :] = df_non_num

return df, df_types['value_type']

Expand Down
3,868 changes: 3,538 additions & 330 deletions tests/icd_test/Run.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 86b197f

Please sign in to comment.