-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbaseline_svm_intent.py
131 lines (95 loc) · 4.2 KB
/
baseline_svm_intent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import sys
sys.path.append(sys.path[0] + '/../')
from corpus.corpus_base import Corpus
from feature_extract.feature_base import Feature
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import svm
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
class SVM:
def __init__(self, train_corpus, test_corpus):
self.corpus_tr = train_corpus
self.corpus_ts = test_corpus
self.__predicted = []
self.setup()
self.train_data = self.feature_tr
self.train_targets = self.all_targ_tr
self.test_data = self.feature_ts
self.test_targets = self.all_targ_ts
def train(self, my_verbose=False):
# --------- transform targets ----------- #
le = preprocessing.LabelEncoder()
# ---------- BUILD SVM MODEL ------------ #
model = svm.SVC( C = 1,
kernel = 'poly',
verbose = my_verbose,
gamma = 1,
probability=True)
# ---------- Train the model using the training sets ------------ #
model.fit(self.train_data, le.fit_transform(self.train_targets))
# ----------- Predict the response for test dataset ------------ #
y_pred = model.predict(self.test_data)
# ---------- Retain probabilities ------------------------------- #
self.__predicted = model.predict_proba(self.test_data)
__prbList = list()
for p in self.__predicted:
probs = {}
for i in range(len(p)):
probs[le.inverse_transform([i])[0]] = p[i]
__prbList.append(probs)
print('CORPUS_TS size: ', self.corpus_ts.get_size())
for i, j in zip(__prbList, self.corpus_ts):
j.set_intent_probabilities(i)
print('----------- Intent probabilities set is complete ---------------')
# ---------- MODEL ACCURACY ----------- #
print("Accuracy:",metrics.accuracy_score(le.fit_transform(self.test_targets), y_pred))
# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(le.fit_transform(self.test_targets), y_pred, average='micro'))
# # Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(le.fit_transform(self.test_targets), y_pred, average='micro'))
print("\n")
print(classification_report(le.fit_transform(self.test_targets), y_pred))
print('TRAIN DATA')
print('TEST DATA')
print(y_pred)
print()
print(le.fit_transform(self.test_targets))
def setup(self):
# ---- grab all utterances ----
self.all_sent_tr = list()
for inst in self.corpus_tr:
self.all_sent_tr.append(inst.get_utterance())
# ---- grab all utterances ----
self.all_sent_ts = list()
for inst in self.corpus_ts:
self.all_sent_ts.append(inst.get_utterance())
self.all_sent_combined = self.all_sent_tr + self.all_sent_ts
self.vocab = set()
for sent in self.all_sent_combined:
self.vocab.update(sent.split())
# ---- train feature ----
feat = Feature(self.vocab)
self.feature_tr = feat.create_tfidf(self.all_sent_tr)
# ---- test feature ----
self.feature_ts = feat.create_tfidf(self.all_sent_ts)
# print(feature_ts.create_tfidf(all_sent_ts))
# ---- grab all train targets ----
self.all_targ_tr = list()
for inst in self.corpus_tr:
self.all_targ_tr.append(inst.get_gold_intent())
# print(all_targ_tr)
# ---- grab all test targets ----
self.all_targ_ts = list()
for inst in self.corpus_ts:
self.all_targ_ts.append(inst.get_gold_intent())
if __name__ == '__main__':
pass
tr = Corpus(9,'train')
ts = Corpus(2, 'test')
baseline_svm = SVM(tr,ts)
baseline_svm.setup()
print(baseline_svm.feature_tr)
print(len(baseline_svm.feature_tr[0]))
baseline_svm.train()