-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathclassifyVirustotal.py
27 lines (24 loc) · 972 Bytes
/
classifyVirustotal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Find common string
# Pass path to virustotal file as argv[1]
# Start by splitting the file into lowercase words.
import sys
words = open(sys.argv[1]).read().lower().split()
# Get the set of unique words.
uniques = []
for word in words:
if word not in uniques and word != "virustotal" and word != "results":
uniques.append(word)
# Make a list of (count, unique) tuples.
counts = []
for unique in uniques:
count = 0 # Initialize the count to zero.
for word in words: # Iterate over the words.
if word == unique: # Is this word equal to the current unique?
count += 1 # If so, increment the count
counts.append((count, unique))
counts.sort() # Sorting the list puts the lowest counts first.
counts.reverse() # Reverse it, putting the highest counts first.
# Print the ten words with the highest counts.
for i in range(min(5, len(counts))):
count, word = counts[i]
print('%s \t %d' % (word, count))