-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_dataset_creator.py
52 lines (39 loc) · 1.51 KB
/
simple_dataset_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
Reza Marzban
https://github.com/Reza-Marzban
"""
import numpy as np
import matplotlib.pyplot as plt
class SimpleDataSetCreator:
@staticmethod
def create_simple_dataset():
"""Create 10000 datapoint with two classes"""
# first dataset class distribution
mean = [2, 1]
cov = [[1, 0], [0, 3]]
data1 = np.random.multivariate_normal(mean, cov, 5000)
# Second dataset class distribution
mean = [5, 8]
cov = [[3, 1], [1, 3]]
data2 = np.random.multivariate_normal(mean, cov, 5000)
x = np.concatenate((data1, data2))
# creating the labels
y1 = np.zeros((5000, 1))
y2 = np.ones((5000, 1))
y = np.concatenate((y1, y2))
# splitting to train and test set (0.85, 0.20)
mask = np.random.rand(10000) < 0.85
x_train = x[mask]
y_train = y[mask]
mask = np.logical_not(mask)
x_test = x[mask]
y_test = y[mask]
return x_train, y_train, x_test, y_test
if __name__ == "__main__":
x_train, y_train, x_test, y_test = SimpleDataSetCreator.create_simple_dataset()
plt.scatter(x_train[:, 0], x_train[:, 1], s=30, alpha=0.32, c=y_train.squeeze(), cmap="Paired")
plt.show()
y_test1 = y_test+5
plt.scatter(np.concatenate((x_train[:, 0], x_test[:, 0])), np.concatenate((x_train[:, 1], x_test[:, 1])),
s=30, alpha=0.3, c=np.concatenate((y_train,y_test1)).squeeze(), cmap="Paired")
plt.show()