-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalyze2DHelper.py
140 lines (114 loc) · 4.94 KB
/
Analyze2DHelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import traceback
from os.path import splitext
from tkinter import filedialog as fd
from xml.etree.ElementTree import ElementTree
import numpy as np
import pandas
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import signal
npixels = 1024
nm_per_pixel = 0.125
pixel_nums = np.arange(1, 1025)
# Converts num given in nm or cm-1 to the other
def convert_nm_cm(num: float) -> float:
return 10000000 / num
# Opens a file dialog and returns both the name of the selected .xml file as well as the accompanying .txt file
def get_filenames() -> (str, str):
xml_file = fd.askopenfilename(title='Select .xml File', filetypes=[('XML', '*.xml')])
txt_file = splitext(xml_file)[0] + '.txt'
return xml_file, txt_file
# Returns data frame from .txt file matching filename (.xml). Creates 'nm_Offset' column and adjusts column names based on params.
def get_df(filename: str, params: dict) -> pd.DataFrame:
try:
df = pd.read_csv(
filename, delimiter='\t', names=pixel_nums
).transpose()
# center_index is representative of resonant fluorescence; resonance is considered as 0 offset
center_index = int(
(npixels / 2) + (convert_nm_cm(params['scan_start']) - convert_nm_cm(params['scan_start'] - params['offset'])) / nm_per_pixel
)
high_energy_nm = -((center_index - 1) * nm_per_pixel)
low_energy_nm = ((npixels - center_index) * nm_per_pixel)
df.columns = np.linspace(params['scan_start'], params['scan_stop'], params['nsteps'])
df['nm Offset'] = np.linspace(high_energy_nm, low_energy_nm, 1024)
df = df.set_index('nm Offset')
return df
except:
traceback.print_exc()
# Iterates through given .xml file and pulls out params of scan. Returns dict of params.
def get_params(filename: str) -> dict:
params = {'scan_start': None, 'scan_stop': None, 'scan_step': None, 'offset': None, 'nsteps': None}
try:
tree = ElementTree()
tree.parse(filename)
root = tree.getroot()
for elem in root.iter():
if 'DBL' in elem.tag or 'I32' in elem.tag:
if 'Scan.Start' in elem[0].text:
params['scan_start'] = float(elem[1].text)
continue
if 'Scan.Stop' in elem[0].text:
params['scan_stop'] = float(elem[1].text)
continue
if 'Scan.step' in elem[0].text:
params['scan_step'] = float(elem[1].text)
continue
if 'offset' in elem[0].text:
params['offset'] = float(elem[1].text)
continue
if 'NSteps' in elem[0].text:
params['nsteps'] = int(elem[1].text)
continue
except:
traceback.print_exc()
return params
# Graphs the given DataFrame as a heatmap, drawing hlines at each peak index if peaks is given
def graph_2d(df: pandas.DataFrame, peaks=None):
sns.set_theme()
sns.heatmap(df)
if peaks is not None:
for peak in peaks:
plt.hlines(peak, plt.gca().get_xlim()[0], plt.gca().get_xlim()[1])
plt.xlabel('$cm^{-1}$')
plt.tight_layout()
plt.show()
# Returns peak indices and parameters based on an aggregated mean column named Average
def get_2d_peaks(df: pandas.DataFrame, width: float = 5, height: float = 10000, distance: float = 10):
df['Average'] = df.agg('mean', axis='columns')
return signal.find_peaks(df['Average'], width=width, height=height, distance=distance)
# Finds peaks for each row in the passed dataframe. Returns a 2D list of peak information
def get_1d_peaks(df: pandas.DataFrame):
peaks = []
for _, row in df.iterrows():
peaks.append(signal.find_peaks(row.values, prominence=1, width=10)[0])
return peaks
# Returns a DataFrame with each row corresponding to a peak
def get_peak_df(df: pandas.DataFrame, peaks: list[int], peak_params: dict) -> pandas.DataFrame:
peak_df = pandas.DataFrame(columns=df.columns)
for i in range(len(peaks)):
peak_df.loc[df.iloc[[peaks[i]]].index.values[0]] = df.iloc[peak_params['left_bases'][i]:peak_params['right_bases'][i]].agg('mean')
peak_df.drop('Average', axis='columns', inplace=True)
peak_df.index.name = 'nm Offset'
return peak_df
# Used to plot each peak in peak_df (peak_df should be generated by get_peak_df). Plots individual peaks in a FacetGrid
def graph_peaks(peak_df: pandas.DataFrame, peaks=None) -> None:
peak_dfm = peak_df.transpose().melt(ignore_index=False, var_name='nm Offset', value_name='Signal')
peak_dfm.index.name = '$cm^{-1}$'
sns.set_theme()
fg = sns.FacetGrid(data=peak_dfm, col='nm Offset')
fg.map_dataframe(sns.lineplot, x='$cm^{-1}$', y='Signal')
if peaks is not None:
fig = plt.gcf()
axes = fig.axes
for ax in range(len(axes)):
axes[ax].vlines(peak_df.iloc[ax].index.values[peaks[ax]], axes[ax].get_ylim()[0], axes[ax].get_ylim()[1], linestyles='dashed')
plt.tight_layout()
plt.show()
# Creates a dump of the given DataFrame using the given filename. Intended for use with the dataframe returned from get_2d_peaks
def dump_peak_info(df_filename: str, peaks_filename: str, df: pandas.DataFrame, peak_list) -> None:
df.to_csv(df_filename, float_format='%.4f')
peak_df = pd.DataFrame(peak_list)
peak_df = peak_df.set_index(df.index)
peak_df.to_csv(peaks_filename, float_format='%.4f')