-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
330 lines (277 loc) · 14.8 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import streamlit as st
import os
import pandas as pd
from compressor import lzw_compress, save_compressed_file
from decompressor import lzw_decompress, read_compressed_file, extract_compression_params, decompress_file
from visualizer import create_performance_chart
from utils import process_files, load_results
from parameter_tester import test_parameters
import numpy as np
def main():
st.set_page_config(page_title="LZW Compression Tool", layout="wide")
st.title("LZW Compression Tool")
# Sidebar for compression parameters
with st.sidebar:
st.header("Compression Parameters")
# Code bit length input first (since dictionary size depends on it)
code_bit_length = st.select_slider(
"Code Bit Length",
options=[9, 12, 16, 20, 24],
value=12,
help="Number of bits used to represent each code"
)
# Calculate maximum possible dictionary size for selected bit length
max_possible_dict_size = (1 << code_bit_length) - 1
# Dictionary size input with slider
dict_size = st.slider(
"Max Dictionary Size",
min_value=256,
max_value=max_possible_dict_size,
value=min(4096, max_possible_dict_size),
step=256,
help=f"Maximum value limited by Code Bit Length (current max: {max_possible_dict_size})"
)
no_limit = st.checkbox("No Dictionary Size Limit")
if no_limit:
dict_size = None
# Create tabs
tab1, tab2, tab3 = st.tabs([
"Manual Compression",
"File Decompression",
"Parameter Testing"
])
with tab1:
st.header("Manual Compression")
uploaded_files = st.file_uploader(
"Choose text files to compress",
accept_multiple_files=True,
type=['txt']
)
if uploaded_files and st.button("Compress Selected Files"):
try:
# Process files and get compressed data
compressed_files, results = process_files(uploaded_files, dict_size, code_bit_length, ".")
if compressed_files and results:
st.success("Compression completed successfully!")
st.dataframe(pd.DataFrame(results))
# Offer compressed file downloads
st.subheader("Download Compressed Files")
for filepath, compressed_data in compressed_files.items():
# Extract just the filename for display
filename = os.path.basename(filepath)
directory = os.path.dirname(filepath)
st.markdown(f"**Directory:** `{directory}`")
st.download_button(
label=f"Download {filename}",
data=compressed_data,
file_name=filepath, # Use full path as filename
mime="application/octet-stream",
key=filepath # Unique key for each button
)
st.markdown("---")
except Exception as e:
st.error(f"Compression error: {str(e)}")
with tab2:
st.header("File Decompression")
uploaded_compressed_files = st.file_uploader(
"Choose compressed files to decompress",
accept_multiple_files=True,
type=['lzw']
)
if uploaded_compressed_files and st.button("Decompress Selected Files"):
for compressed_file in uploaded_compressed_files:
try:
# Get directory name from file path
file_path = compressed_file.name
# Extract parameters from filename
params = extract_compression_params(file_path)
if params:
dict_size, code_bit_length = params
# Decompress the file
decompressed_data = decompress_file(
compressed_file,
max_dict_size=dict_size,
code_bit_length=code_bit_length
)
# Create download button for decompressed file
base_name = os.path.splitext(os.path.basename(file_path))[0]
# Remove the output_dictXXXX_codeXXbit prefix if present
if base_name.startswith('output_'):
base_name = base_name.split('/')[-1]
st.download_button(
label=f"Download decompressed {base_name}",
data=decompressed_data,
file_name=f"{base_name}_decompressed.txt",
mime="text/plain",
key=f"decomp_{base_name}"
)
# Show success message with file details
st.success(f"""
Successfully decompressed {base_name}
- Dictionary Size: {'No Limit' if dict_size is None else dict_size}
- Code Bit Length: {code_bit_length}
- Decompressed Size: {len(decompressed_data.encode('utf-8'))} bytes
""")
else:
st.error(f"Could not extract parameters from filename: {file_path}")
except Exception as e:
st.error(f"Error decompressing {compressed_file.name}: {str(e)}")
with tab3:
st.header("Parameter Testing")
test_file = st.file_uploader(
"Choose a file to test parameters",
type=['txt']
)
if test_file:
col1, col2 = st.columns(2)
with col1:
test_mode = st.radio(
"Testing Mode",
["Quick Test", "Custom Test"]
)
if test_mode == "Quick Test":
# Let user select which bit lengths to test
selected_bits = st.multiselect(
"Select Code Bit Lengths to Test",
[9, 12, 16, 20, 24],
default=[12, 16]
)
if selected_bits:
# Generate comprehensive test parameters for each bit length
test_params = {}
for bit_length in selected_bits:
max_size = (1 << bit_length) - 1
# Generate logarithmically spaced dictionary sizes
# Using numpy's logspace to create exponentially increasing values
log_sizes = np.logspace(
np.log10(256), # start from 256
np.log10(max_size), # up to max_size
100, # number of points
dtype=int
)
# Convert to list and remove duplicates
sizes = list(dict.fromkeys(log_sizes))
# Ensure important values are included
important_sizes = [256, 512, 1024, 2048, 4096, 8192, 16384, 32768]
sizes.extend([size for size in important_sizes if size <= max_size])
# Remove duplicates and sort
sizes = sorted(list(set(sizes)))
# Add None (No Limit) option at the end
sizes.append(None)
# Show distribution of test points
if len(sizes) > 1:
numeric_sizes = [s for s in sizes if s is not None]
intervals = [numeric_sizes[i+1] - numeric_sizes[i] for i in range(len(numeric_sizes)-1)]
avg_interval_start = sum(intervals[:5]) / 5
avg_interval_end = sum(intervals[-5:]) / 5
st.info(f"""
For {bit_length}-bit encoding:
- Number of test points: {len(sizes)}
- Average interval at start: {avg_interval_start:.0f}
- Average interval at end: {avg_interval_end:.0f}
- Ratio (end/start): {avg_interval_end/avg_interval_start:.1f}x
""")
test_params[bit_length] = sizes
# Show test configuration
total_tests = sum(len(sizes) for sizes in test_params.values())
st.info(f"""
Overall Test Configuration:
- Total number of tests: {total_tests}
- Bit lengths to test: {selected_bits}
- Tests per bit length: ~{total_tests // len(selected_bits)}
""")
if st.button("Run Comprehensive Test"):
progress_bar = st.progress(0)
status_text = st.empty()
# Flatten parameters for testing
all_dict_sizes = []
all_bit_lengths = []
for bit_length, dict_sizes in test_params.items():
all_dict_sizes.extend(dict_sizes)
all_bit_lengths.extend([bit_length] * len(dict_sizes))
with st.spinner("Testing parameters..."):
# Update progress as tests run
test_results = test_parameters(
test_file,
all_dict_sizes,
all_bit_lengths,
progress_callback=lambda p: (
progress_bar.progress(p),
status_text.text(f"Testing: {int(p * 100)}% complete")
)
)
progress_bar.progress(100)
status_text.text("Testing completed!")
# Show results
st.success("Parameter testing completed!")
# Show best results for each bit length
st.subheader("Best Results by Bit Length")
for bit_length in selected_bits:
bit_results = test_results[test_results['Code Bit Length'] == bit_length]
best_result = bit_results.loc[bit_results['Compression Performance (%)'].idxmax()]
st.markdown(f"""
**{bit_length}-bit encoding:**
- Best Dictionary Size: {best_result['Max Dictionary Size']}
- Compression Performance: {best_result['Compression Performance (%)']:.2f}%
- Compression Ratio: {best_result['Compression Ratio']:.3f}
""")
# Show detailed results in a dataframe
st.subheader("All Test Results")
st.dataframe(test_results)
# Create and show visualization
st.subheader("Performance Visualization")
fig = create_performance_chart(test_results)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("Please select at least one bit length to test")
else: # Custom Test
with col1:
selected_bit_length = st.selectbox(
"Select Code Bit Length",
[9, 12, 16, 20, 24],
index=1
)
max_possible_size = (1 << selected_bit_length) - 1
st.info(f"Maximum possible dictionary size: {max_possible_size}")
custom_dict_sizes = st.text_input(
"Enter Dictionary Sizes (comma-separated)",
"256, 512, 1024, 2048, 4096, None"
)
try:
dict_sizes = [
None if size.strip().lower() == 'none'
else int(size.strip())
for size in custom_dict_sizes.split(',')
]
except ValueError:
st.error("Invalid dictionary sizes")
dict_sizes = []
if st.button("Run Custom Test"):
if dict_sizes:
with st.spinner("Testing parameters..."):
test_results = test_parameters(
test_file,
dict_sizes,
[selected_bit_length]
)
show_test_results(test_results)
else:
st.error("Please enter valid dictionary sizes")
def show_test_results(test_results):
"""Display test results and visualizations"""
st.subheader("Test Results")
st.dataframe(test_results)
# Show best parameters
best_result = test_results.loc[test_results['Compression Performance (%)'].idxmax()]
st.success(f"""
Best parameters found:
- Dictionary Size: {best_result['Max Dictionary Size']}
- Code Bit Length: {best_result['Code Bit Length']}
- Compression Performance: {best_result['Compression Performance (%)']:.2f}%
""")
# Create and show visualization
st.subheader("Performance Visualization")
fig = create_performance_chart(test_results)
st.plotly_chart(fig, use_container_width=True)
if __name__ == "__main__":
main()