Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MLD3
FIDDLE
Commits
73d02477
Commit
73d02477
authored
Aug 01, 2019
by
Shengpu Tang (tangsp)
Browse files
update naming conventions
parent
bcdaa133
Changes
6
Show whitespace changes
Inline
Side-by-side
mimic3_experiments/3_ML_models/config.yaml
View file @
73d02477
data_path
:
/data
4/tangsp/mimic3_features
/
data_path
:
..
/data
/processed
/
model_names
:
{
'
CNN'
:
'
CNN_V3'
,
...
...
mimic3_experiments/3_ML_models/lib/data.py
View file @
73d02477
...
...
@@ -11,10 +11,10 @@ from sklearn.impute import SimpleImputer
import
yaml
with
open
(
'config.yaml'
)
as
f
:
config
=
yaml
.
load
(
f
)
config
=
yaml
.
safe_
load
(
f
)
data_path
=
config
[
'data_path'
]
def
get_test
(
task
,
fuse
=
False
,
duration
=
4
,
timestep
=
0.5
,
normalize
=
Tru
e
,
batch_size
=
64
):
def
get_test
(
task
,
duration
,
timestep
,
fuse
=
Fals
e
,
batch_size
=
64
):
"""
Returns:
pytorch DataLoader for test
...
...
@@ -37,7 +37,7 @@ def get_test(task, fuse=False, duration=4, timestep=0.5, normalize=True, batch_s
return
te_loader
def
get_train_val_test
(
task
,
fuse
=
False
,
duration
=
4
,
timestep
=
0.5
,
normalize
=
True
,
batch_size
=
64
):
def
get_train_val_test
(
task
,
fuse
=
False
,
duration
=
4
,
timestep
=
0.5
,
batch_size
=
64
):
"""
Returns:
pytorch DataLoader for train, val, test
...
...
@@ -70,11 +70,11 @@ def get_train_val_test(task, fuse=False, duration=4, timestep=0.5, normalize=Tru
def
get_benchmark_splits
(
fuse
=
False
,
batch_size
=
64
):
task
=
'mortality'
duration
=
48
duration
=
48
.0
timestep
=
1.0
df_label
=
pd
.
read_csv
(
data_path
+
'population/pop.mortality_benchmark.csv'
).
rename
(
columns
=
{
'{}_LABEL'
.
format
(
task
):
'LABEL'
})
X
=
sparse
.
load_npz
(
data_path
+
'features/benchmark
.
outcome={}
.
T={}
.
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
s
=
sparse
.
load_npz
(
data_path
+
'features/benchmark
.
outcome={}
.
T={}
.
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
X
=
sparse
.
load_npz
(
data_path
+
'features/benchmark
,
outcome={}
,
T={}
,
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
s
=
sparse
.
load_npz
(
data_path
+
'features/benchmark
,
outcome={}
,
T={}
,
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
tr_idx
=
df_label
[
df_label
[
'partition'
]
==
'train'
].
index
.
values
va_idx
=
df_label
[
df_label
[
'partition'
]
==
'val'
].
index
.
values
...
...
@@ -120,8 +120,8 @@ def get_benchmark_test(fuse=False, batch_size=64):
df_label_all
=
pd
.
read_csv
(
data_path
+
'population/{}_{}h.csv'
.
format
(
task
,
duration
)).
rename
(
columns
=
{
'{}_LABEL'
.
format
(
task
):
'LABEL'
})
df_label
=
pd
.
read_csv
(
data_path
+
'population/pop.mortality_benchmark.csv'
).
rename
(
columns
=
{
'{}_LABEL'
.
format
(
task
):
'LABEL'
})
X
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
.
T={}
.
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
s
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
.
T={}
.
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
X
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
,
T={}
,
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
s
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
,
T={}
,
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
te_idx
=
[
df_label_all
[
df_label_all
[
'ICUSTAY_ID'
]
==
ID
].
index
.
values
[
0
]
for
ID
in
df_label
[
df_label
[
'partition'
]
==
'test'
][
'ID'
]]
...
...
@@ -168,8 +168,8 @@ class _Mimic3Reader(object):
.
sort_values
(
by
=
[
'SUBJECT_ID'
,
'LABEL'
])
\
.
drop_duplicates
(
'SUBJECT_ID'
,
keep
=
'last'
).
reset_index
(
drop
=
True
)
self
.
X
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
.
T={}
.
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
self
.
s
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
.
T={}
.
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
self
.
X
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
,
T={}
,
dt={}/X.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
self
.
s
=
sparse
.
load_npz
(
data_path
+
'features/outcome={}
,
T={}
,
dt={}/s.npz'
.
format
(
task
,
duration
,
timestep
)).
todense
()
print
(
'Finish reading data
\t
{:.2f} s'
.
format
(
time
.
time
()
-
start_time
))
...
...
@@ -201,8 +201,8 @@ class _Mimic3Reader(object):
te_idx
=
self
.
df_subjects
[
self
.
df_subjects
[
'partition'
]
==
'test'
].
index
.
values
try
:
import
pathlib
pathlib
.
Path
(
'./output/outcome={}
.
T={}
.
dt={}/'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
np
.
savez
(
open
(
'./output/outcome={}
.
T={}
.
dt={}/idx.npz'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
),
'wb'
),
tr_idx
=
tr_idx
,
va_idx
=
va_idx
,
te_idx
=
te_idx
)
pathlib
.
Path
(
'./output/outcome={}
,
T={}
,
dt={}/'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
np
.
savez
(
open
(
'./output/outcome={}
,
T={}
,
dt={}/idx.npz'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
),
'wb'
),
tr_idx
=
tr_idx
,
va_idx
=
va_idx
,
te_idx
=
te_idx
)
except
:
print
(
'indices not saved'
)
raise
...
...
@@ -248,8 +248,8 @@ class _Mimic3Reader(object):
try
:
import
pathlib
pathlib
.
Path
(
'./output/outcome={}
.
T={}
.
dt={}/'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
np
.
savez
(
open
(
'./output/outcome={}
.
T={}
.
dt={}/idx.npz'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
),
'wb'
),
tr_idx
=
tr_idx
,
va_idx
=
va_idx
,
te_idx
=
te_idx
)
pathlib
.
Path
(
'./output/outcome={}
,
T={}
,
dt={}/'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
np
.
savez
(
open
(
'./output/outcome={}
,
T={}
,
dt={}/idx.npz'
.
format
(
self
.
task
,
self
.
duration
,
self
.
timestep
),
'wb'
),
tr_idx
=
tr_idx
,
va_idx
=
va_idx
,
te_idx
=
te_idx
)
except
:
print
(
'indices not saved'
)
raise
...
...
mimic3_experiments/3_ML_models/run_deep.py
View file @
73d02477
...
...
@@ -63,7 +63,7 @@ args = parser.parse_args()
task
=
args
.
outcome
model_type
=
args
.
model_type
T
=
in
t
(
args
.
T
)
T
=
floa
t
(
args
.
T
)
dt
=
float
(
args
.
dt
)
L_in
=
int
(
np
.
floor
(
T
/
dt
))
in_channels
=
dimensions
[
task
][
float
(
T
)]
...
...
mimic3_experiments/3_ML_models/run_deep_all.sh
View file @
73d02477
...
...
@@ -3,17 +3,17 @@ set -euxo pipefail
mkdir
-p
log
cuda
=
0
python run_deep.py
--outcome
=
mortality
--T
=
48
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=mortality
.
T=48
.
dt=1.0
.
CNN.log'
python run_deep.py
--outcome
=
mortality
--T
=
48
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=mortality
.
T=48
.
dt=1.0
.
RNN.log'
python run_deep.py
--outcome
=
mortality
--T
=
48
.0
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=mortality
,
T=48
,
dt=1.0
,
CNN.log'
python run_deep.py
--outcome
=
mortality
--T
=
48
.0
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=mortality
,
T=48
,
dt=1.0
,
RNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
4
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=ARF
.
T=4
.
dt=1.0
.
CNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
4
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=ARF
.
T=4
.
dt=1.0
.
RNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
4
.0
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=ARF
,
T=4
,
dt=1.0
,
CNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
4
.0
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=ARF
,
T=4
,
dt=1.0
,
RNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
12
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=ARF
.
T=12
.
dt=1.0
.
CNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
12
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=ARF
.
T=12
.
dt=1.0
.
RNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
12
.0
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=ARF
,
T=12
,
dt=1.0
,
CNN.log'
python run_deep.py
--outcome
=
ARF
--T
=
12
.0
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=ARF
,
T=12
,
dt=1.0
,
RNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
4
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=Shock
.
T=4
.
dt=1.0
.
CNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
4
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=Shock
.
T=4
.
dt=1.0
.
RNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
4
.0
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=Shock
,
T=4
,
dt=1.0
,
CNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
4
.0
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=Shock
,
T=4
,
dt=1.0
,
RNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
12
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=Shock
.
T=12
.
dt=1.0
.
CNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
12
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=Shock
.
T=12
.
dt=1.0
.
RNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
12
.0
--dt
=
1.0
--model_type
=
CNN
--cuda
=
$cuda
&>
'log/outcome=Shock
,
T=12
,
dt=1.0
,
CNN.log'
python run_deep.py
--outcome
=
Shock
--T
=
12
.0
--dt
=
1.0
--model_type
=
RNN
--cuda
=
$cuda
&>
'log/outcome=Shock
,
T=12
,
dt=1.0
,
RNN.log'
mimic3_experiments/3_ML_models/run_shallow.py
View file @
73d02477
...
...
@@ -32,7 +32,7 @@ task = args.outcome
model_type
=
args
.
model_type
model_name
=
model_type
T
=
in
t
(
args
.
T
)
T
=
floa
t
(
args
.
T
)
dt
=
float
(
args
.
dt
)
if
model_type
==
'CNN'
:
...
...
@@ -47,11 +47,11 @@ else:
assert
False
print
(
'EXPERIMENT:'
,
'model={}
.
outcome={}
.
T={}
.
dt={}'
.
format
(
model_name
,
task
,
T
,
dt
))
print
(
'EXPERIMENT:'
,
'model={}
,
outcome={}
,
T={}
,
dt={}'
.
format
(
model_name
,
task
,
T
,
dt
))
# Create checkpoint directories
import
pathlib
pathlib
.
Path
(
"./checkpoint/model={}
.
outcome={}
.
T={}
.
dt={}/"
.
format
(
model_name
,
task
,
T
,
dt
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
pathlib
.
Path
(
"./checkpoint/model={}
,
outcome={}
,
T={}
,
dt={}/"
.
format
(
model_name
,
task
,
T
,
dt
)).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
######
# Data
...
...
@@ -137,7 +137,7 @@ print('best_params_', clf.best_params_)
print
(
'best_score_ '
,
clf
.
best_score_
)
try
:
np
.
savetxt
(
'output/outcome={}
.
T={}
.
dt={}/{}
.
coef.txt'
.
format
(
task
,
T
,
dt
,
model_name
),
'output/outcome={}
,
T={}
,
dt={}/{}
,
coef.txt'
.
format
(
task
,
T
,
dt
,
model_name
),
clf
.
best_estimator_
.
coef_
,
delimiter
=
','
,
)
...
...
mimic3_experiments/3_ML_models/run_shallow_all.sh
View file @
73d02477
...
...
@@ -3,21 +3,21 @@ set -euxo pipefail
mkdir
-p
log
mkdir
-p
output
python run_shallow.py
--outcome
=
mortality
--T
=
48
--dt
=
1.0
--model_type
=
LR
\
>
>(
tee
'log/outcome=mortality
.
T=48.dt=1.0
.
LR.out'
)
\
2>
>(
tee
'log/outcome=mortality
.
T=48.dt=1.0
.
LR.err'
>
&2
)
python run_shallow.py
--outcome
=
mortality
--T
=
48
--dt
=
1.0
--model_type
=
RF
\
>
>(
tee
'log/outcome=mortality
.
T=48.dt=1.0
.
RF.out'
)
\
2>
>(
tee
'log/outcome=mortality
.
T=48.dt=1.0
.
RF.err'
>
&2
)
python run_shallow.py
--outcome
=
mortality
--T
=
48
.0
--dt
=
1.0
--model_type
=
LR
\
>
>(
tee
'log/outcome=mortality
,
T=48.
0,
dt=1.0
,
LR.out'
)
\
2>
>(
tee
'log/outcome=mortality
,
T=48.
0,
dt=1.0
,
LR.err'
>
&2
)
python run_shallow.py
--outcome
=
mortality
--T
=
48
.0.
--dt
=
1.0
--model_type
=
RF
\
>
>(
tee
'log/outcome=mortality
,
T=48.
0,
dt=1.0
,
RF.out'
)
\
2>
>(
tee
'log/outcome=mortality
,
T=48.
0,
dt=1.0
,
RF.err'
>
&2
)
python run_shallow.py
--outcome
=
ARF
--T
=
4
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=ARF
.
T=4.dt=1.0
.
LR.log'
python run_shallow.py
--outcome
=
Shock
--T
=
4
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=Shock
.
T=4.dt=1.0
.
LR.log'
python run_shallow.py
--outcome
=
ARF
--T
=
4
.0
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=ARF
,
T=4.
0,
dt=1.0
,
LR.log'
python run_shallow.py
--outcome
=
Shock
--T
=
4
.0
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=Shock
,
T=4.
0,
dt=1.0
,
LR.log'
python run_shallow.py
--outcome
=
ARF
--T
=
4
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=ARF
.
T=4.dt=1.0
.
RF.log'
python run_shallow.py
--outcome
=
Shock
--T
=
4
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=Shock
.
T=4.dt=1.0
.
RF.log'
python run_shallow.py
--outcome
=
ARF
--T
=
4
.0
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=ARF
,
T=4.
0,
dt=1.0
,
RF.log'
python run_shallow.py
--outcome
=
Shock
--T
=
4
.0
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=Shock
,
T=4.
0,
dt=1.0
,
RF.log'
python run_shallow.py
--outcome
=
ARF
--T
=
12
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=ARF
.
T=12.dt=1.0
.
LR.log'
python run_shallow.py
--outcome
=
Shock
--T
=
12
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=Shock
.
T=12.dt=1.0
.
LR.log'
python run_shallow.py
--outcome
=
ARF
--T
=
12
.0
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=ARF
,
T=12.
0,
dt=1.0
,
LR.log'
python run_shallow.py
--outcome
=
Shock
--T
=
12
.0
--dt
=
1.0
--model_type
=
LR &>
'log/outcome=Shock
,
T=12.
0,
dt=1.0
,
LR.log'
python run_shallow.py
--outcome
=
ARF
--T
=
12
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=ARF
.
T=12.dt=1.0
.
RF.log'
python run_shallow.py
--outcome
=
Shock
--T
=
12
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=Shock
.
T=12.dt=1.0
.
RF.log'
python run_shallow.py
--outcome
=
ARF
--T
=
12
.0
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=ARF
,
T=12.
0,
dt=1.0
,
RF.log'
python run_shallow.py
--outcome
=
Shock
--T
=
12
.0
--dt
=
1.0
--model_type
=
RF &>
'log/outcome=Shock
,
T=12.
0,
dt=1.0
,
RF.log'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment