Skip to content

Commit 06aaf70

Browse files
make script one hot compatible (#5)
* feat: one-hot encoding compatible * fix: send weight tensor to device * chore: comments added * fix:removed build testing * chore: deepchem,tensorflow dependency bump * update: pip version >19.3 * fix: relaxed scikit-learn dependency * Update mca.py Co-authored-by: Jannis Born <jannis.born@gmx.de>
1 parent 122ab25 commit 06aaf70

File tree

7 files changed

+41
-32
lines changed

7 files changed

+41
-32
lines changed

.travis.yml

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ services:
44
before_script:
55
- docker pull drugilsberg/rdkit-ubuntu:latest
66
- docker build -f .travis/Dockerfile -t toxsmi .
7+
branches:
8+
except:
9+
- testing
710
script:
811
- docker run -it toxsmi python3 -c "import toxsmi"
912
- docker run -it toxsmi python3 scripts/train_baselines.py -h

conda.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: toxsmi
1+
name: toxsmi_test
22
channels:
33
- https://conda.anaconda.org/rdkit
44
dependencies:
@@ -10,9 +10,9 @@ dependencies:
1010
- paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@0.0.2
1111
- numpy>=1.14.3
1212
- torch==1.5.1
13-
- deepchem==2.2.1.dev54
13+
- deepchem>=2.4
1414
- six==1.15.0
15-
- scikit-learn==0.21.3
16-
- tensorflow<2.0
15+
- scikit-learn>=0.21.3
16+
- tensorflow>=2.0
1717
- Pillow==7.1.0
1818

requirements.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ pytoda @ git+https://github.com/PaccMann/paccmann_datasets@0.1.1
22
paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@0.0.2
33
numpy>=1.14.3
44
torch==1.5.1
5-
deepchem==2.2.1.dev54
5+
deepchem>=2.4
66
six==1.15.0
7-
scikit-learn==0.21.3
8-
tensorflow<2.0
7+
scikit-learn>=0.21.3
8+
tensorflow>=2.0
99
Pillow==7.1.0

scripts/train_tox.py

+3
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def smiles_tensor_batch_to_fp(smiles):
134134
sanitize=params.get('sanitize', True)
135135
)
136136

137+
# include arg label_columns if data file has any unwanted columns (such as index) to be ignored.
137138
train_dataset = AnnotatedDataset(
138139
annotations_filepath=train_scores_filepath,
139140
dataset=smiles_dataset,
@@ -183,6 +184,7 @@ def smiles_tensor_batch_to_fp(smiles):
183184
logger.info(smiles_dataset._dataset.transform)
184185
logger.info(smiles_test_dataset._dataset.transform)
185186

187+
# include arg label_columns if data file has any unwanted columns (such as index) to be ignored.
186188
test_dataset = AnnotatedDataset(
187189
annotations_filepath=test_scores_filepath,
188190
dataset=smiles_test_dataset,
@@ -241,6 +243,7 @@ def smiles_tensor_batch_to_fp(smiles):
241243

242244
model = MODEL_FACTORY[params.get('model_fn', 'mca')](params).to(device)
243245
logger.info(model)
246+
logger.info(model.loss_fn.class_weights)
244247

245248
logger.info('Parameters follow')
246249
for name, param in model.named_parameters():

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def get_version(rel_path):
3434
),
3535
install_requires=[
3636
'paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor',
37-
'torch', 'deepchem', 'tensorflow<2.0', 'Pillow', 'six'
37+
'torch', 'deepchem>=2.4', 'tensorflow>=2.0', 'Pillow', 'six'
3838
],
3939
packages=find_packages('.'),
4040
zip_safe=False

toxsmi/models/mca.py

100644100755
+26-23
Original file line numberDiff line numberDiff line change
@@ -84,33 +84,11 @@ def __init__(self, params: dict, *args, **kwargs):
8484
# Model architecture (hyperparameter)
8585
self.multiheads = params.get('multiheads', [4, 4, 4, 4])
8686
self.filters = params.get('filters', [64, 64, 64])
87-
self.hidden_sizes = (
88-
[
89-
self.multiheads[0] * params['smiles_embedding_size'] + sum(
90-
[h * f for h, f in zip(self.multiheads[1:], self.filters)]
91-
)
92-
] + params.get('stacked_hidden_sizes', [1024, 512])
93-
)
9487

9588
self.dropout = params.get('dropout', 0.5)
9689
self.use_batch_norm = self.params.get('batch_norm', True)
9790
self.act_fn = ACTIVATION_FN_FACTORY[
9891
params.get('activation_fn', 'relu')]
99-
self.kernel_sizes = params.get(
100-
'kernel_sizes', [
101-
[3, params['smiles_embedding_size']],
102-
[5, params['smiles_embedding_size']],
103-
[11, params['smiles_embedding_size']]
104-
]
105-
)
106-
if len(self.filters) != len(self.kernel_sizes):
107-
raise ValueError(
108-
'Length of filter and kernel size lists do not match.'
109-
)
110-
if len(self.filters) + 1 != len(self.multiheads):
111-
raise ValueError(
112-
'Length of filter and multihead lists do not match'
113-
)
11492

11593
# Build the model. First the embeddings
11694
if params.get('embedding', 'learned') == 'learned':
@@ -159,6 +137,31 @@ def __init__(self, params: dict, *args, **kwargs):
159137
else:
160138
raise ValueError(f"Unknown embedding type: {params['embedding']}")
161139

140+
self.kernel_sizes = params.get(
141+
'kernel_sizes', [
142+
[3, self.smiles_embedding.weight.shape[1]],
143+
[5, self.smiles_embedding.weight.shape[1]],
144+
[11, self.smiles_embedding.weight.shape[1]]
145+
]
146+
)
147+
148+
self.hidden_sizes = (
149+
[
150+
self.multiheads[0] * self.smiles_embedding.weight.shape[1] + sum(
151+
[h * f for h, f in zip(self.multiheads[1:], self.filters)]
152+
)
153+
] + params.get('stacked_hidden_sizes', [1024, 512])
154+
)
155+
156+
if len(self.filters) != len(self.kernel_sizes):
157+
raise ValueError(
158+
'Length of filter and kernel size lists do not match.'
159+
)
160+
if len(self.filters) + 1 != len(self.multiheads):
161+
raise ValueError(
162+
'Length of filter and multihead lists do not match'
163+
)
164+
162165
self.convolutional_layers = nn.Sequential(
163166
OrderedDict(
164167
[
@@ -177,7 +180,7 @@ def __init__(self, params: dict, *args, **kwargs):
177180
)
178181
)
179182

180-
smiles_hidden_sizes = [params['smiles_embedding_size']] + self.filters
183+
smiles_hidden_sizes = [self.smiles_embedding.weight.shape[1]] + self.filters
181184
self.smiles_projections = nn.Sequential(
182185
OrderedDict(
183186
[

toxsmi/utils/wrappers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def forward(self, yhat: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
7373
weight_tensor[y == 0.0] = self.class_weights[0]
7474
weight_tensor[y == 1.0] = self.class_weights[1]
7575

76-
out = loss * weight_tensor
76+
out = loss * weight_tensor.to(DEVICE)
7777

7878
if self.reduction == 'mean':
7979
return torch.mean(out)

0 commit comments

Comments
 (0)