-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathdata_reader.py
107 lines (90 loc) · 3.95 KB
/
data_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import glob
import h5py
import tensorflow as tf
import numpy as np
from img_utils import get_images
class FileDataReader(object):
def __init__(self, data_dir, input_height, input_width, height, width,
batch_size):
self.data_dir = data_dir
self.input_height, self.input_width = input_height, input_width
self.height, self.width = height, width
self.batch_size = batch_size
self.image_files = glob.glob(data_dir+'*')
def next_batch(self, batch_size):
sample_files = np.random.choice(self.image_files, batch_size)
images = get_images(
sample_files, self.input_height, self.input_width,
self.height, self.width)
return images
class H5DataLoader(object):
def __init__(self, data_path, is_train=True):
self.is_train = is_train
data_file = h5py.File(data_path, 'r')
self.images, self.labels = data_file['X'], data_file['Y']
self.gen_indexes()
def gen_indexes(self):
if self.is_train:
self.indexes = np.random.permutation(range(self.images.shape[0]))
else:
self.indexes = np.array(range(self.images.shape[0]))
self.cur_index = 0
def next_batch(self, batch_size):
next_index = self.cur_index+batch_size
cur_indexes = list(self.indexes[self.cur_index:next_index])
self.cur_index = next_index
if len(cur_indexes) < batch_size and self.is_train:
self.gen_indexes()
self.cur_index = batch_size-len(cur_indexes)
cur_indexes += list(self.indexes[:batch_size-len(cur_indexes)])
cur_indexes.sort()
return self.images[cur_indexes], self.labels[cur_indexes]
class QueueDataReader(object):
def __init__(self, sess, data_dir, data_list, input_size, class_num,
name, data_format):
self.sess = sess
self.scope = name + '/data_reader'
self.class_num = class_num
self.channel_axis = 3
images, labels = self.read_data(data_dir, data_list)
images = tf.convert_to_tensor(images, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.string)
queue = tf.train.slice_input_producer(
[images, labels], shuffle=True, name=self.scope+'/slice')
self.image, self.label = self.read_dataset(
queue, input_size, data_format)
def next_batch(self, batch_size):
image_batch, label_batch = tf.train.shuffle_batch(
[self.image, self.label], batch_size=batch_size,
num_threads=4, capacity=50000, min_after_dequeue=10000,
name=self.scope+'/batch')
return image_batch, label_batch
def read_dataset(self, queue, input_size, data_format):
image = tf.image.decode_jpeg(
tf.read_file(queue[0]), channels=3, name=self.scope+'/image')
label = tf.image.decode_png(
tf.read_file(queue[1]), channels=1, name=self.scope+'/label')
image = tf.image.resize_images(image, input_size)
label = tf.image.resize_images(label, input_size, 1)
if data_format == 'NCHW':
self.channel_axis = 1
image = tf.transpose(image, [2, 0, 1])
label = tf.transpose(label, [2, 0, 1])
image -= tf.reduce_mean(tf.cast(image, dtype=tf.float32),
(0, 1), name=self.scope+'/mean')
return image, label
def read_data(self, data_dir, data_list):
with open(data_list, 'r') as f:
images, labels = [], []
for line in f:
image, label = line.strip('\n').split(' ')
images.append(data_dir + image)
labels.append(data_dir + label)
return images, labels
def start(self):
self.coord = tf.train.Coordinator()
self.threads = tf.train.start_queue_runners(
coord=self.coord, sess=self.sess)
def close(self):
self.coord.request_stop()
self.coord.join(self.threads)