-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_empirical.py
159 lines (130 loc) · 7.14 KB
/
data_empirical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Jordy Thielen (jordy.thielen@donders.ru.nl)
"""
import os
import h5py
import numpy as np
from scipy.stats import zscore
import nibabel as nib
class BensonDataset():
"""
Benson, N. C., Jamison, K. W., Arcaro, M. J., Vu, A. T., Glasser, M. F., Coalson, T. S., ... & Kay, K. (2018).
The Human Connectome Project 7 Tesla retinotopy dataset: Description and population receptive field analysis.
Journal of vision, 18(13), 23-23.
Data downloaded from ConnectomeDB: https://db.humanconnectome.org/
AnalysePRF results including the Wang 2015 atlas from OSF: https://osf.io/bw9ec/
From page 3 Figure 1:
Each run lasted 300 seconds in total, there are 6 runs in total.
Each CCW/CW/EXP/CON run starts and ends with 22 seconds baseline.
Each BAR1/BAR2 run starts and ends with 16 seconds baseline and has 12 seconds baseline half-way through.
Each run performs eight stimulus sweeps of 32 seconds within a run.
Sweeps are:
- for CCW a counter-clockwise rotating wedge;
- for CW a clock-wise rotating wedge;
- for EXP an expanding ring;
- for CON an contracting ring;
- for BAR1/BAR2 eight bar directions in the order R, U, L, D, UR, UL, DL, DR (Right, Left, Up, Down).
Note: The two BAR runs presented identical stimuli, i.e. the stimulus protocol in BAR1 and BAR2 is the same.
"""
subjects = ["126426", "130114", "130518", "134627", "135124", "146735", "165436", "167440", "177140", "180533",
"186949", "193845", "239136", "360030", "385046", "401422", "463040", "550439", "552241", "644246",
"654552", "757764", "765864", "878877", "905147", "943862", "971160", "973770", "995174"]
rois = ["V1v", "V1d", "V2v", "V2d", "V3v", "V3d"]
def __init__(self, stimulus, root, subject, train_test_split=None, cat_dim="time"):
"""
Sets up a specific empirical dataset of an individual participant of the HCP retinotopy dataset.
args:
stimulus: (stimulus.BensonStimulus) stimulus class containing the stimulus itself as a numpy.ndarray of shape [runs, voxels, height, width]
root: (str) path to the dataset
subject: (int or str) specific subject to test, either as string identifier, or integer index
train_test_split: (str) load the training ("train") or testing ("test") split, or all (None) (default: None)
cat_dim: (str) dimension along which to concatenate individual stimulus runs, "time" for time dimension or "chan" for channel/runs dimension (default: "time")
notes:
The data will be represented as a numpy.ndarray of shape [runs/channels, volumes, voxels]
"""
self.stimulus = stimulus
self.root = root
self.train_test_split = train_test_split
self.cat_dim = cat_dim
if isinstance(subject, int):
self.subject = self.subjects[subject]
else:
if subject in self.subjects:
self.subject = subject
else:
raise Exception("Subject not found!")
# Read voxel labels from atlas
with h5py.File(os.path.join(root, "derivatives", "analyseprf", "atlas.mat"), "r") as fid:
atlas = np.array(fid["wang2015"]).astype("int").flatten()
labels = ["".join(chr(c[0]) for c in fid[ref[0]]) for ref in fid["wang2015labels"]]
labels_idx = [i for i, lab in enumerate(labels) if lab in self.rois]
# Make ROI mask
self.mask = np.full(atlas.size, False)
for lab in labels_idx:
self.mask[atlas == lab] = True
self.n_voxels = np.sum(self.mask)
# Loop runs
self.data = np.zeros((len(self.stimulus.runs), self.stimulus.n_volumes, self.n_voxels), dtype="float32")
for i, run in enumerate(self.stimulus.runs):
# Handle phase encoding direction
if run in ["RETCCW", "RETEXP", "RETBAR1"]:
run = "tfMRI_" + run + "_7T_AP"
else:
run = "tfMRI_" + run + "_7T_PA"
# Load data
self.data[i, :, :] = nib.load(os.path.join(root, "sourcedata", self.subject, "MNINonLinear", "Results", run, "{}_Atlas_MSMAll_hp2000_clean.dtseries.nii".format(run))).get_fdata()[:, self.mask]
# To percent signal change
self.data[i, :, :] = 100.0 * self.data[i, :, :] / np.mean(self.data[i, :, :], axis=0, keepdims=True) - 100.0
# Split data
if train_test_split is not None:
self.split_data()
# Concatenate runs over time
if self.cat_dim == "time":
self.data = np.reshape(self.data, (1, -1, self.n_voxels))
# Preprocess data
self.data = zscore(self.data, axis=1)
def __len__(self):
"""
Returns the size of the datasets, i.e., the number of voxels.
returns:
length (int): the number of voxels in the dataset
"""
return self.data.shape[2]
def get_item(self, index):
"""
Returns a specific voxel from the dataset.
args:
index (int): the index of the voxel to return
returns:
data (numpt.ndarray): the empirical fMRI voxel of shape [1, volumes] if cat_dim="time" or [runs/channels, voxel] if cat_dim="chan"
"""
return self.data[:, :, index] # channels, samples, voxel
def split_data(self):
"""
Splits the data in a training or testing split (based on the train_test_split parameter).
Specifically, the training split will include the first halfs of the odd runs and the second
halfs of the even runs. The testing split will contain the remaining halfs. Additionally,
blank intervals are removed to align the underlying stimulus protocol. Finally, for the
testing split, the last two bar runs are sitched in order to make the stimulus protocal
again identical by means of the specific bar directions within those runs.
"""
# Loop runs
data = np.zeros((len(self.stimulus.runs), self.stimulus.n_volumes//2-22, self.n_voxels), dtype="float32")
for i, run in enumerate(self.stimulus.runs):
# Split train and test
if self.train_test_split == "train" and i % 2 == 0 or self.train_test_split == "test" and i % 2 == 1:
if "RETBAR" in run:
data[i, :, :] = self.data[i, 16:self.stimulus.n_volumes//2-6, :] # baseline 12 sec pre and post run and 8 seconds half-way
else:
data[i, :, :] = self.data[i, 22:self.stimulus.n_volumes//2, :] # baseline 16 sec pre and post run
else:
if "RETBAR" in run:
data[i, :, :] = self.data[i, self.stimulus.n_volumes//2+6:-16, :] # baseline 12 sec pre and post run and 8 seconds half-way
else:
data[i, :, :] = self.data[i, self.stimulus.n_volumes//2:-22, :] # baseline 16 sec pre and post run
# Make the order of directions equal for train and test split in BAR runs
if self.train_test_split == "test":
data[[-2, -1], :, :] = data[[-1, -2], :, :]
self.data = data