This repository was archived by the owner on Dec 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
144 lines (112 loc) · 5.57 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import settings
import numpy as np
import shutil
from utils.preprocessing import Preprocessing
from utils.io_handler import IOHandler
from classifiers.svm import SVM
from sklearn.metrics import cohen_kappa_score, accuracy_score, precision_score, recall_score, f1_score
from feature_extractors.resnet101_extractor import Resnet101Extractor as Extractor
from prettytable import PrettyTable
def prepare_dataset(
original_dataset_path: str = settings.SOURCE_BREAKHIS_FOLDER,
prepared_dataset_path: str = settings.DEFAULT_DATA_FOLDER) -> str:
organized_dataset_path = IOHandler.prepare_breakhis_dataset(
original_dataset_path,
dest_folder_path=os.path.join(prepared_dataset_path, 'breakhis'))
# Todo: Fix mixed slashes bug
resized_dataset_path = os.path.abspath(
os.path.join(prepared_dataset_path, 'resized_breakhis')).replace('\\', '/')
organized_dataset_path = os.path.abspath(organized_dataset_path).replace('\\', '/')
for magnification in settings.MAGNIFICATIONS:
Preprocessing.resize_dataset(
original_dataset_path=os.path.join(f'{organized_dataset_path}/', f'{magnification}X'),
new_dataset_path=os.path.join(f'{resized_dataset_path}/', f'{magnification}X'),
classes=('benign', 'malignant'),
new_size=settings.DEFAULT_IMAGE_SIZE,
save_files=True)
shutil.rmtree(organized_dataset_path)
return resized_dataset_path
def get_classification_results(
rounds: int,
dataset: np.array,
apply_smote: bool) -> tuple:
# Arrays to store the predictions metrics
_accuracy = []
_kappa = []
_f1 = []
_precision = []
_recall = []
for _round in range(rounds):
# Splitting data into train and test randomly
train, test = Preprocessing.train_test_split(dataset)
# Generating synthetic data to equalize the amount of data belonging to both classes
train = Preprocessing.apply_smote(train) if apply_smote else train
# Isolating classes and features for the training and test steps
x_train = Preprocessing.isolate_features(train)
y_train = Preprocessing.isolate_classes(train)
x_test = Preprocessing.isolate_features(test)
y_test = Preprocessing.isolate_classes(test)
# Fitting the classifier with pre-defined hyperparams
classifier = SVM(x_train, y_train.flatten()).train()
# Getting classification results for the test data
predictions = classifier.predict(x_test).flatten()
_accuracy.append(accuracy_score(y_test.flatten(), predictions))
_kappa.append(cohen_kappa_score(y_test.flatten(), predictions))
_precision.append(precision_score(y_test.flatten(), predictions))
_recall.append(recall_score(y_test.flatten(), predictions))
_f1.append(f1_score(y_test.flatten(), predictions))
return (
np.average(_accuracy),
np.average(_kappa),
np.average(_precision),
np.average(_recall),
np.average(_f1))
if __name__ == '__main__':
# ---------------------------------------------------------------------------------------------------
# Preparing the BreakHis dataset. Here, we expect to receive the dataset in its original form
# beginning in the folder 'breast'.
# It's important to set all the paths in the settings.py file or else the preparation might not work
# as expected.
# Also, the output folder will contain all the files in the breakhis dataset organized by
# magnification and class as follows:
# output_path/
# 40X/
# malignant/
# benign/
# 100X
# malignant/
# benign/
# ...
# ---------------------------------------------------------------------------------------------------
# prepare_dataset()
# Defining the dataset path
magnification = settings.MAGNIFICATIONS[2]
images_dataset_path = f'{settings.DEFAULT_DATA_FOLDER}/resized_breakhis/{magnification}X'
# ---------------------------------------------------------------------------------------------
# Extracting the deep learning features from the images
# Alternatively, you can save the deep learning features into a .csv file and then load it
# to memory to avoid the need to extract the same features again.
# ---------------------------------------------------------------------------------------------
feature_extractor = Extractor(settings.DEFAULT_IMAGE_SIZE, verbose=True)
deep_learning_features = feature_extractor.batch_extract(dataset_path=images_dataset_path)
IOHandler.save_as_csv(
data=deep_learning_features,
filepath=f'{settings.DEFAULT_DATA_FOLDER}/features',
filename=f'{Extractor.__name__}_{magnification}X')
# deep_learning_features = IOHandler.read_csv(
# filepath=f'{settings.DEFAULT_DATA_FOLDER}/feature_files',
# filename='my_features')
deep_learning_features = Preprocessing.min_max_normalization(deep_learning_features)
(acc, kappa, precision, recall, f1) = get_classification_results(
settings.ROUNDS_OF_TEST,
deep_learning_features,
settings.APPLY_SMOTE)
table = PrettyTable()
table.field_names = ['Metric', f'Average result for {settings.ROUNDS_OF_TEST} iterations']
table.add_row(['Accuracy', acc])
table.add_row(['Kappa score', kappa])
table.add_row(['Precision', precision])
table.add_row(['Recall', recall])
table.add_row(['F1 score', f1])
print(table)