-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathFake_Real_Classification.py
More file actions
56 lines (54 loc) · 2.03 KB
/
Fake_Real_Classification.py
File metadata and controls
56 lines (54 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import random
import nltk
import collections
list_ = []
words = []
documents = []
SingleDocument = [[], ""]
for num in range(1, 2000):
list_ = []
with open("Real_Fake_News/fake" + "/" + str(num) + ".txt", 'r') as f:
for line in f:
for word in line.split():
list_.append(word)
words.append(word)
SingleDocument = (list_, "fake")
documents.append(SingleDocument)
for num in range(1, 2000):
list_ = []
with open("Real_Fake_News/real" + "/" + str(num) + ".txt", 'r') as f:
for line in f:
for word in line.split():
list_.append(word)
words.append(word)
SingleDocument = (list_, "real")
documents.append(SingleDocument)
print(len(documents))
random.shuffle(documents)
print(len(documents))
print(len(words))
all_words = nltk.FreqDist(words)
print(all_words)
word_features = list(all_words.keys())[:2500]
def document_features(document):
document_words = set(document)
features = {}
for word in word_features:
features['contains({})'.format(word)] = (word in document_words)
return features
featuresets = [(document_features(d), c) for (d, c) in documents]
train_set= featuresets[:3000]
test_set = featuresets[3000:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
print("NaiveBayesClassifier Accuracy =>"+str(nltk.classify.accuracy(classifier, test_set)*100))
classifier.show_most_informative_features(5)
classifier = nltk.DecisionTreeClassifier.train(train_set,binary=False, entropy_cutoff=0.4, depth_cutoff=10, support_cutoff=20)
print("DecisionTreeClassifier Accuracy =>"+str(nltk.classify.accuracy(classifier, test_set)*100))
# To Test This Application Put in File 1.txt and try to make the text large as possible because the features not large (small data set)
#InputList=[]
#with open("1.txt", 'r') as f:
# for line in f:
# for word in line.split():
# InputList.append(word)
# words.append(word)
#print(classifier.classify(document_features(InputList)))