-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatasets.py
80 lines (63 loc) · 2.28 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import numpy as np
from sklearn.datasets import make_moons, make_circles, load_iris
def read_dataset(dataset_path, dataset_name):
# I don't know how is built your dataset
path = os.path.join(dataset_path, dataset_name)
with np.load(path) as data:
examples = data['x']
labels = data['y']
return examples, labels
def gen_lin_separable_data():
# generate training data in the 2-d case
mean1 = np.array([0, 2])
mean2 = np.array([2, 0])
cov = np.array([[0.8, 0.6], [0.6, 0.8]])
X1 = np.random.multivariate_normal(mean1, cov, 100)
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 100)
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2
def gen_non_lin_separable_data():
mean1 = [-1, 2]
mean2 = [1, -1]
mean3 = [4, -4]
mean4 = [-4, 4]
cov = [[1.0, 0.8], [0.8, 1.0]]
X1 = np.random.multivariate_normal(mean1, cov, 50)
X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 50)
X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2
def gen_non_lin_separable_data2():
np.random.seed(0)
X_xor = np.random.randn(200, 2)
y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0)
y_xor = np.where(y_xor, 1, -1).astype("float")
return X_xor, y_xor
def gen_non_lin_separable_data3():
X, y = make_moons(n_samples=100, noise=0)
y = np.where(y, 1, -1).astype("float")
return X, y.astype("float")
def gen_non_lin_separable_data4():
X, y = make_circles(n_samples=100, noise=0)
y = np.where(y, 1, -1).astype("float")
return X, y.astype("float")
def gen_non_lin_separable_data5():
iris = load_iris()
X = iris.data[:, :2]
y = iris.target
y = np.where(y, 1, -1).astype("float")
return X, y.astype("float")
def gen_lin_separable_overlap_data():
# generate training data in the 2-d case
mean1 = np.array([0, 2])
mean2 = np.array([2, 0])
cov = np.array([[1.5, 1.0], [1.0, 1.5]])
X1 = np.random.multivariate_normal(mean1, cov, 100)
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, 100)
y2 = np.ones(len(X2)) * -1
return X1, y1, X2, y2