datasets.py

import os
import numpy as np
from sklearn.datasets import make_moons, make_circles, load_iris


def read_dataset(dataset_path, dataset_name):
    # I don't know how is built your dataset
    path = os.path.join(dataset_path, dataset_name)
    with np.load(path) as data:
        examples = data['x']
        labels = data['y']

    return examples, labels


def gen_lin_separable_data():
    # generate training data in the 2-d case
    mean1 = np.array([0, 2])
    mean2 = np.array([2, 0])
    cov = np.array([[0.8, 0.6], [0.6, 0.8]])
    X1 = np.random.multivariate_normal(mean1, cov, 100)
    y1 = np.ones(len(X1))
    X2 = np.random.multivariate_normal(mean2, cov, 100)
    y2 = np.ones(len(X2)) * -1
    return X1, y1, X2, y2


def gen_non_lin_separable_data():
    mean1 = [-1, 2]
    mean2 = [1, -1]
    mean3 = [4, -4]
    mean4 = [-4, 4]
    cov = [[1.0, 0.8], [0.8, 1.0]]
    X1 = np.random.multivariate_normal(mean1, cov, 50)
    X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))
    y1 = np.ones(len(X1))
    X2 = np.random.multivariate_normal(mean2, cov, 50)
    X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))
    y2 = np.ones(len(X2)) * -1
    return X1, y1, X2, y2


def gen_non_lin_separable_data2():
    np.random.seed(0)
    X_xor = np.random.randn(200, 2)
    y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0)
    y_xor = np.where(y_xor, 1, -1).astype("float")
    return X_xor, y_xor


def gen_non_lin_separable_data3():
    X, y = make_moons(n_samples=100, noise=0)
    y = np.where(y, 1, -1).astype("float")
    return X, y.astype("float")


def gen_non_lin_separable_data4():
    X, y = make_circles(n_samples=100, noise=0)
    y = np.where(y, 1, -1).astype("float")
    return X, y.astype("float")


def gen_non_lin_separable_data5():
    iris = load_iris()
    X = iris.data[:, :2]
    y = iris.target
    y = np.where(y, 1, -1).astype("float")
    return X, y.astype("float")


def gen_lin_separable_overlap_data():
    # generate training data in the 2-d case
    mean1 = np.array([0, 2])
    mean2 = np.array([2, 0])
    cov = np.array([[1.5, 1.0], [1.0, 1.5]])
    X1 = np.random.multivariate_normal(mean1, cov, 100)
    y1 = np.ones(len(X1))
    X2 = np.random.multivariate_normal(mean2, cov, 100)
    y2 = np.ones(len(X2)) * -1
    return X1, y1, X2, y2