-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyzer.py
More file actions
40 lines (29 loc) · 1.34 KB
/
analyzer.py
File metadata and controls
40 lines (29 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
exclude_words = ['el', 'la', 'los', 'las', 'un', 'una', 'unos', 'unas', 'al', 'del', 'lo', 'le', 'y', 'e', 'o', 'u', 'de', 'a', 'en', 'que', 'es', 'por', 'para', 'con', 'se', 'su', 'les', 'me', 'q', 'te', 'pero', 'mi', 'ya', 'cuando', 'como', 'estoy', 'voy', 'porque', 'he', 'son', 'solo', 'tengo', 'muy']
def analyze(topic):
top_words = {}
tweets_topic = open(topic, encoding='utf-8')
for line in tweets_topic:
words = line.strip().lower().split()
for word in words:
if word not in exclude_words:
top_words[word] = top_words.get(word, 0) + 1
tweets_topic.close()
return top_words
def most_mentioned_people(top_words, most_used_words):
count_u = 0
for word in most_used_words:
if count_u < 10 and word.startswith('@'):
print(top_words[word], word)
count_u += 1
print('*' * 40) # esto es para separar visualmente la información
def top20_words(top_words, most_used_words):
count = 0
for word in most_used_words:
if count < 20:
print(top_words[word], word)
count += 1
top = analyze('#ParoIndefinido2021.txt') # Returns array of top words
top_sorted = sorted(top, key = top.get, reverse = True) # Orders by quantity of words
#So Results:
most_mentioned_people(top, top_sorted)
top20_words(top, top_sorted)