-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpatient_pickler.py
592 lines (511 loc) · 25 KB
/
patient_pickler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
"""
patient_pickler.py
version 3.0
package github.com/ajk77/PatientPy
Created by AndrewJKing.com|@andrewsjourney
This file uses database connections to query and store patient data.
Connections are specific to HiDENIC and MARS data stucture.
Data types handled include:
demographics (demo)
laboratory tests, vital signs, and venilator settings (root)
medication orders (med)
intake and output measurements (io)
micro biology (micro)
procedures (procedures)
---DEPENDENCIES---
A pickle directory (pkl_dir) must be created.
^The following subdirectories must be created as well: 'root_data/','flag_data/','med_data/','procedure_data/','micro_data/','io_data/','demo_data/'
The database table connected to in the determine_cases_days_and_times() method should contain only the patient cases of interest (e.g. after location, year, and diagnosis selection)
---TODO---
[] Setup database connections to NOC
[] Setup database connections to MIMIC III
---LICENSE---
This file is part of PatientPy
PatientPy is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
PatientPy is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PatientPy. If not, see <https://www.gnu.org/licenses/>.
"""
from patientpy_utils import load_info_from_pickle_file, load_case_day_mapping, determine_case_times
import pickle
import unicodedata
import os
import time
import datetime
# crisma (or convert to NOC server)
from models import a_demographics
from models import a_ClinicalEvents
from models import a_HomeMeds
from models import a_ICUpatients
from models import a_IO
from models import a_Medication
from models import a_Micro
from models import a_Surgical
from models import a_Ventilator
# local_lemr (or convert to NOC server)
from models import lab_739
from models import marstorootcodes
from models import rootgroupmember
from models import discrete_result_mapping
def query_and_store_root_information(root_info_pickle_file):
"""
This file queries and stores the root information from the different tables.
Uses three database connections
"""
def load_marstoroot():
"""
Database connection to lab code mapping table
"""
mtr = {} # mars to root
rtm = {} # root to mars
results = marstorootcodes.objects.all() # database connection for mapping file
for result in results:
mtr[result.marscode] = result.rootcode
if result.rootcode in rtm.keys():
rtm[result.rootcode].append(result.marscode)
else:
rtm[result.rootcode] = [result.marscode]
# # skip these mappings
rtm.pop('X', None)
rtm.pop('U', None)
return [mtr, rtm]
def load_marstoroot_include_vitals_and_vent():
"""
Returns mars to root mappings and root to mars mappings.
Root to mars mappings include vitals and vents.
Since the vital and vent mapping are nott in the mapping table, they are manually added.
"""
mtr, rtm = load_marstoroot()
rtm['MODE'] = ['Mode']
rtm['VSTRIA'] = ['Trial extubation']
rtm['VSTUBE'] = ['Tube Status']
rtm['VSVENT'] = ['Vent Status']
rtm['O2VENT'] = ['FIO2']
rtm['VTCPP'] = ['CPP']
rtm['VTCVP'] = ['CVP']
rtm['VTDIAA'] = ['Pulmonary artery diastolic']
rtm['VTDIAV'] = ['Diastolic BP']
rtm['VTHR'] = ['HR']
rtm['VTICP'] = ['ICP']
rtm['VTRR'] = ['RR']
rtm['VTSO2A'] = ['SaO2']
rtm['VTSO2V'] = ['SvO2']
rtm['VTSYSA'] = ['Pulmonary artery systolic']
rtm['VTSYSV'] = ['Systolic BP']
rtm['VTTEMA'] = ['Temperature']
rtm['VTWEIG'] = ['Weight']
rtm['VTWGEA'] = ['Pulmonary artery wedge']
return mtr, rtm
def load_rootgroupmember():
"""
Database connection to group membership table
"""
groups = {} # [groupname] = [root1, root2, ...]
lab_group_order = [0] * 19
rtn = {} # root to name
rtt = {} # root to table
results = rootgroupmember.objects.all() # database connection for group membership
for result in results:
if result.groupname in groups.keys():
groups[result.groupname].append(result.root)
else:
groups[result.groupname] = [result.root]
if result.grouprank < 20:
lab_group_order[result.grouprank-1] = result.groupname
rtn[result.root] = result.labname
rtt[result.root] = result.datatable
return [groups, lab_group_order, rtn, rtt]
def load_root_to_datatype():
"""
Returns a dict of [root] -> datatype
datatypes can be binary, interval, nominal, ordinal, unused-nom
"""
rtdt = {} # root to data type
results = rootgroupmember.objects.all() # Database connection from group membership for datatype
for result in results:
rtdt[result.root] = result.datatype
return rtdt
def load_discrete_result_mapping():
"""
Database connection to discrete result mapping table
"""
drm = {} # discrete result mapping
results = discrete_result_mapping.objects.all() # Database connection for discrete result mapping
for result in results:
if result.root not in drm.keys():
drm[result.root] = [[], [], [], []] # [[text maps to 1], [to 2], [to 3], [to 4]]
drm[result.root][int(result.mapvalue)-1].append(result.eventtext)
return drm
mtr, rtm = load_marstoroot_include_vitals_and_vent() # mars to root, root to mars
groups, lab_group_order, rtn, rtt = load_rootgroupmember() # root to name, root to table
rtdt = load_root_to_datatype() # root to data type
drm = load_discrete_result_mapping() # discrete result mapping
root_order = rtm.keys() # use this order for all loops b/c roots need to be consistent order.
pickle_data = [mtr, rtm, groups, lab_group_order, rtn, rtt, rtdt, drm, root_order]
with open(root_info_pickle_file, 'wb') as f:
pickle.dump(pickle_data, f)
return
def query_and_store_other_information(other_info_pickle_file):
"""
This file queries and stores the med, procedure, micro, io, and demographic information from the different tables.
Uses three database connections
"""
# ## query and store medications and home medications
med_order = []
mtt = {}
results = a_Medication.objects.using('remote').order_by().values('name').distinct() # Database connection to medication table
for result in results:
if result['name'] not in med_order:
med_order.append(result['name'])
mtt[result['name']] = 'a_Medication'
results = a_HomeMeds.objects.using('remote').order_by().values('genericname').distinct() # Database connection to home medication table
for result in results:
if result['genericname'] not in med_order:
med_order.append(result['genericname'])
mtt[result['genericname']] = 'a_HomeMeds'
else:
if mtt[result['genericname']] == 'a_Medication': # catch meds that are both types
mtt[result['genericname']] = 'both'
# ## query and store procedures
procedure_order = []
results = a_Surgical.objects.using('remote').order_by().values('procedure').distinct() # Database connection to procedure table
for result in results:
if result['procedure'] not in procedure_order:
procedure_order.append(result['procedure'])
# ## Store micro biology, intake and output, and demographics orders. No connection becasue these are short, manually entered lists
micro_order = ['Blood Culture (C&S)', 'MRSA Screen for Infection Control (Nose', 'VRE Screen',
'Urine Culture (C&S)', 'Clostridium difficile Toxin (Stool)', 'Fungus Culture (C&S)',
'Anaerobic Culture (C&S)', 'Sputum Culture (C&S) (with Gram Stain)',
'Acinetobacter Screen Infection Control', 'MICRO_ANY'] # These were selected to only include frequent cultures
io_order = ['Urine', 'Everything Else', 'Oral', 'Intravenous', 'Blood Products', 'Other or unknown', 'Net']
demo_order = ['age', 'sex', 'height', 'weight', 'bmi', 'race']
pickle_data = [med_order, mtt, procedure_order, micro_order, io_order, demo_order]
with open(other_info_pickle_file, 'wb') as f:
pickle.dump(pickle_data, f)
return
def determine_cases_days_and_times(out_file):
"""
This function determine the number of days and cuttimes for each case day.
Uses one database connection
"""
out_file = open(out_file, 'w')
out_file.write('#PatientVisitId,los,cutoff\n')
visited_case_ids = []
results = a_ICUpatients.objects.using('remote').all() # Database connection to patient admit/discharge table
for result in results:
if result.patientvisitid not in visited_case_ids:
visited_case_ids.append(result.patientvisitid)
for case_id in visited_case_ids:
results = a_ICUpatients.objects.using('remote').filter(patientvisitid=case_id) # Database connection to patient admit/discharge table
t_icu_admit = 0
t_icu_discharge = 0
first = True
current_admits_dischs = []
# find earliest icu admission
for result in results:
curr_admit = (time.mktime(result.ICUadmit.timetuple()) - 18000) # subtrackt to adjust for time zone
curr_disch = (time.mktime(result.ICUdischarge.timetuple()) - 18000)
current_admits_dischs.append([curr_admit, curr_disch])
if first:
t_icu_admit = curr_admit
t_icu_discharge = curr_disch
first = False
else:
t_icu_admit = min(t_icu_admit, curr_admit)
t_icu_discharge = max(t_icu_discharge, curr_disch)
# it is possible that some of the cut times occur in between ICU admissions. This is handeled below
day_diff = (t_icu_discharge - t_icu_admit) // 86400
midnight_admit = (t_icu_admit // 86400) * 86400
eight_am_admit = midnight_admit + 28800
for los in range(int(day_diff+1)):
current_day_eight_am = eight_am_admit + (86400 * los)
for admission in current_admits_dischs:
if (admission[0] - 43200) < current_day_eight_am < (admission[1] + 43200): # insure current day is between an admission and discharge
out_file.write(str(case_id)+','+str(los)+','+str(current_day_eight_am)+'\n')
break
out_file.close()
return
def load_a_patients_root_data(case_id, root_order, rtm, rtt, rtdt):
"""
Loads all root based data for a PatientVisitId
Uses three database connections
"""
def is_num(s):
"""
Returns True if string is a number, otherwise False
"""
try:
float(s)
return True
except (ValueError, TypeError):
pass
return False
def parse_to_num(text):
"""
Removes special characters from string so it can be parsed to a float
"""
for ch in ['<', '=', '>', '%']:
if text and ch in text:
text = text.replace(ch, "")
return text
def unicode_to_str(text):
"""
Returns string from unicode input. Django's default is unicode.
"""
return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore')
root_data = {} # stores all of a patients root related data
# ## load all of the patient's data
for root in root_order:
root_data[root] = []
# do processing below convert each data point into tuples
for marsCode in rtm[root]:
if rtt[root] == 'lab_739':
results = lab_739.objects.filter(eventcode=marsCode, patientvisitid=case_id) # Database connection to laboratory test result table
if rtdt[root] == 'interval':
for result in results:
t = (time.mktime(result.eventdate.timetuple()) - 18000) * 1000
if is_num(result.eventvalue):
root_data[root].append((t, float(result.eventvalue)))
else:
curr_text = parse_to_num(result.eventtext)
if is_num(curr_text):
root_data[root].append((t, float(curr_text)))
else:
for result in results:
t = (time.mktime(result.eventdate.timetuple()) - 18000) * 1000
if result.eventtext is not None:
root_data[root].append((t, unicode_to_str(result.eventtext)))
elif rtt[root] == 'a_clinicalevents':
results = a_ClinicalEvents.objects.using('remote').filter(rollname=marsCode, patientvisitid=case_id) # Database connection to vital sign table
# this table is always interval
for result in results:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
if result.rollval is not None:
root_data[root].append((t, float(result.rollval)))
elif rtt[root] == 'a_Ventilator':
results = a_Ventilator.objects.using('remote').filter(eventname=marsCode, patientvisitid=case_id) # Database connection to ventilator setting table
# this table is never interval
for result in results:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
if result.resultval is not None:
root_data[root].append((t, unicode_to_str(result.resultval)))
else:
print '***should never print this (data_table)***'
# ## sort data by date
root_data[root].sort(key=lambda tup: tup[0])
return root_data
def load_and_pickle_patients_flag_data(case_id, root_order, rtm, rtt):
"""
This function loads and pickles all patients flag data (flags for abnormal tests).
Uses one database connection
"""
flag_data = {} # stores all flag data for patient
for root in root_order:
flag_data[root] = []
if rtt[root] == 'lab_739':
for marsCode in rtm[root]:
results = lab_739.objects.filter(eventcode=marsCode, patientvisitid=case_id) # Database connection to test result table.
for result in results:
t = (time.mktime(result.eventdate.timetuple()) - 18000) * 1000
flag_data[root].append((t, result.flag))
# ## sort data by date
flag_data[root].sort(key=lambda tup: tup[0])
return flag_data
def load_and_pickle_patients_med_data(case_id, med_order):
"""
This function loads and pickles all patients med data.
Uses two database connections.
"""
med_data = {} # stores all med data for patient
pat_med_results = a_Medication.objects.using('remote').filter(patientvisitid=case_id) # Database connection to medication order table
pat_home_med_results = a_HomeMeds.objects.using('remote').filter(patientvisitid=case_id) # Database connection to home medication perscription table.
for med in med_order:
med_data[med] = []
for result in pat_med_results:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
med_data[result.name].append((t, result.dose))
for result in pat_home_med_results:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
med_data[result.genericname].append((t, result.dose))
# ## sort data by date
for med in med_order:
med_data[med].sort(key=lambda tup: tup[0])
return med_data
def load_and_pickle_patients_procedure_data(case_id, procedure_order):
"""
This function loads and pickles all patients procedure data.
Uses one database connection.
"""
procedure_data = {} # stores all flag data for patient
pat_procedure_data = a_Surgical.objects.using('remote').filter(patientvisitid=case_id) # Database connection to procedures table.
for procedure in procedure_order:
procedure_data[procedure] = []
for result in pat_procedure_data:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
procedure_data[result.procedure].append((t, 1))
# ## sort data by date
for procedure in procedure_order:
procedure_data[procedure].sort(key=lambda tup: tup[0])
return procedure_data
def load_and_pickle_patients_micro_data(case_id, micro_order):
"""
This function loads and pickles all patients Micro data.
Uses one database connection.
"""
micro_data = {} # stores all flag data for patient
pat_micro_data = a_Micro.objects.using('remote').filter(patientvisitid=case_id) # Database connection to micro biology table
for micro in micro_order:
micro_data[micro] = []
for result in pat_micro_data:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
micro_data['MICRO_ANY'].append((t, 1)) # boolean for if ordered
if result.ordername in micro_order:
micro_data[result.ordername].append((t, 1))
# ## sort data by date
for micro in micro_order:
micro_data[micro].sort(key=lambda tup: tup[0])
return micro_data
def load_and_pickle_patients_io_data(case_id):
"""
This function loads and pickles all patients io data.
Uses one database connection
"""
def load_io(case_id):
curr_data = [[], [], [], [], [], [], []] # urine, everything else, oral, intravenous, blood products, other, net
existing_dates = []
results = a_IO.objects.using('remote').filter(patientvisitid=case_id) # Database connection to intake and output table
for result in results:
t = (time.mktime(result.date.timetuple()) - 18000) * 1000
day = ((t // 86400000) * 86400000) # reduce to day
if result.type == 'Output':
if result.name == 'Urine Output':
io_type = 0
else:
io_type = 1
else:
if result.category == 6:
io_type = 2
elif result.category in [1, 2, 3, 4, 5, 8, 9, 10]:
io_type = 3
elif result.category == 7:
io_type = 4
else: # category > 10
io_type = 5
if day not in existing_dates:
existing_dates.append(day)
for i in range(7):
curr_data[i].append([day, 0])
curr_data[io_type][existing_dates.index(day)][1] += round(result.volume, 2)
# make output negative
for i in range(len(curr_data[1])):
curr_data[0][i][1] = -curr_data[0][i][1]
curr_data[1][i][1] = -curr_data[1][i][1]
# calculate net
low_high = [1, -1]
for i in range(len(existing_dates)):
day_net = 0
day_pos = 0
day_neg = 0
for q in range(6):
day_net += curr_data[q][i][1]
if curr_data[q][i][1] > 0:
day_pos += curr_data[q][i][1]
if curr_data[q][i][1] < 0:
day_neg += curr_data[q][i][1]
curr_data[6][i][1] = day_net
low_high[0] = min(low_high[0], day_neg)
low_high[1] = max(low_high[1], day_pos)
dict_results = [{"name": 'Urine', "step": 1, "data": io_to_day(curr_data[0]), "stack": "a"},
{"name": 'Everything Else', "step": 1, "data": io_to_day(curr_data[1]), "stack": "a"},
{"name": 'Oral', "step": 1, "data": io_to_day(curr_data[2]), "stack": "a"},
{"name": 'Intravenous', "step": 1, "data": io_to_day(curr_data[3]), "stack": "a"},
{"name": 'Blood Products', "step": 1, "data": io_to_day(curr_data[4]), "stack": "a"},
{"name": 'Other or unknown', "step": 1, "data": io_to_day(curr_data[5]), "stack": "a"},
{"name": 'Net', "step": 1, "data": io_to_day(curr_data[6]), "stack": "b"}]
return [dict_results, low_high]
io_data = {}
io_dict, discard = load_io(case_id, 1577840461000) # include all times
for io_category_data in io_dict:
io_data[io_category_data['name']] = io_category_data['data']
return io_data
def load_and_pickle_patients_demo_data(case_id):
"""
This function loads and pickles all patients io and demographic data.
Uses one database connection.
"""
pat_demo_data = a_demographics.objects.using('remote').get(patientvisitid=case_id) # Database connection to demographics table.
if pat_demo_data.sex is None or pat_demo_data.sex == 'M':
sex = 0
else:
sex = 1
if pat_demo_data.race == 'White':
race = 0
else:
race = 1
demo_data = [{"age": pat_demo_data.age, "sex": sex, "height": pat_demo_data.height,
"weight": pat_demo_data.weight, "bmi": pat_demo_data.bmi, "race": race}]
return demo_data
def pickle_patient_data(root_dir, pickle_dir, case_day_filename='case_day_mapping.txt'):
"""
Creates and stores patient data and root_information as binary
"""
# ## create and store information structures
query_and_store_root_information(pickle_dir+'root_info.pickle') # store root information structures
query_and_store_other_information(pickle_dir+'other_info.pickle') # store med, procedure, micro, intake and output, and dempgraphic mapping information
# ## load stored information structures
mtr, rtm, groups, lab_group_order, rtn, rtt, rtdt, drm, root_order = load_info_from_pickle_file(pickle_dir+'root_info.pickle')
med_order, mtt, procedure_order, micro_order, io_order, demo_order = load_info_from_pickle_file(pickle_dir+'other_info.pickle')
# ^mtr=mars2root, rtm=root2mars, rtn=root2name, rtt=root2table, rtdt=root2datatype drm=discrete result mapping
# ^^mtt=med2table
# ## create patient case day file
determine_cases_days_and_times(root_dir+case_day_filename)
# ## load case day file info (order of cases, patient los days, time points of those los days)
patient_order, patient_days, patient_cut_times = load_case_day_mapping(root_dir+case_day_filename)
# ## for each patient case
for i in range(len(patient_order)):
print patient_order[i], '\t', i # print progress
# ## the root data
curr_patient_data = load_a_patients_root_data(patient_order[i], root_order, rtm, rtt, rtdt)
with open(pickle_dir+'root_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_patient_data, f)
# ## the (root) flag data function stores flag data separately
curr_pat_flag_data = load_and_pickle_patients_flag_data(patient_order[i], root_order, rtm, rtt)
with open(pickle_dir+'flag_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_flag_data, f)
# ## the medication data
curr_pat_med_data = load_and_pickle_patients_med_data(patient_order[i], med_order)
with open(pickle_dir+'med_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_med_data, f)
# ## the procedure data
curr_pat_procedure_data = load_and_pickle_patients_procedure_data(patient_order[i], procedure_order)
with open(pickle_dir+'procedure_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_procedure_data, f)
# ## the micro_data
curr_pat_micro_data = load_and_pickle_patients_micro_data(patient_order[i], micro_order)
with open(pickle_dir+'micro_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_micro_data, f)
# ## i/o data
curr_pat_io_data = load_and_pickle_patients_io_data(patient_order[i])
with open(pickle_dir+'io_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_io_data, f)
# ## demographic data
curr_pat_demo_data = load_and_pickle_patients_demo_data(patient_order[i])
with open(pickle_dir+'demo_data/'+patient_order[i]+'.pickle', 'wb') as f:
pickle.dump(curr_pat_demo_data, f)
return
if __name__ == '__main__':
# ## set pickle storage directory
root_dir = '//modeling_folder/'
pkl_dir = '//modeling_folder/all_data_pickle_files/'
case_day_filename = 'case_day_mapping-01Jan2018.txt' # running this file will create
# ## will need to rund this once on database to generate case_day_mapping file
if False:
determine_case_times(root_dir + case_day_filename)
# ## run pickle_patient_data
pickle_patient_data(root_dir, pickle_dir, case_day_filename)