-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschema_parser.py
75 lines (62 loc) · 2.58 KB
/
schema_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
import json
import re
def parse_schema(file_path):
with open(file_path, "r") as file:
lines = file.readlines()
schema = []
current_table = None
reading_data = False
for line in lines:
line = line.strip()
if line.endswith(":"):
if current_table is not None:
schema.append(current_table)
current_table = {"TableName": line[:-1], "Columns": []}
description_lines = []
reading_data = False
elif current_table:
if "KEYS" in line and "COLUMN_NAME" in line: # Start reading the data
reading_data = True
elif reading_data:
if line: # Read data entries
data = re.split(
r"\s{2,}", line
) # Assume at least two spaces as a separator
try:
keytype = data[0]
if not re.match(r"^([A-Z]K,)*[A-Z]K$", keytype):
data = [None] + data
column_name = data[1]
if " " in column_name:
column_name, dtype = column_name.split(" ")
data = [data[0], column_name, dtype] + data[2:]
if len(data) == 3:
data += [None, ""]
if len(data) == 4:
last = data[-1]
if last == "NOT NULL":
data = data + [""]
else:
data = data[:-1] + [None, last]
column_entry = {
"Keys": data[0],
"ColumnName": data[1],
"DataType": data[2],
"Nullable": data[3],
"Comment": data[4],
}
except IndexError:
print(f"Error parsing line: {line}")
continue
else:
current_table["Columns"].append(column_entry)
else:
description_lines.append(line)
# Optionally store and use description if needed elsewhere; currently not included in output format
if current_table is not None:
schema.append(current_table)
return schema
schema = parse_schema("schema_documentation.txt")
with open("schema.json", "w") as file:
json.dump(schema, file, indent=4)