Commit 73d02477 authored by Shengpu Tang (tangsp)'s avatar Shengpu Tang (tangsp)
Browse files

update naming conventions

parent bcdaa133
data_path: /data4/tangsp/mimic3_features/ data_path: ../data/processed/
model_names: { model_names: {
'CNN': 'CNN_V3', 'CNN': 'CNN_V3',
......
...@@ -11,10 +11,10 @@ from sklearn.impute import SimpleImputer ...@@ -11,10 +11,10 @@ from sklearn.impute import SimpleImputer
import yaml import yaml
with open('config.yaml') as f: with open('config.yaml') as f:
config = yaml.load(f) config = yaml.safe_load(f)
data_path = config['data_path'] data_path = config['data_path']
def get_test(task, fuse=False, duration=4, timestep=0.5, normalize=True, batch_size=64): def get_test(task, duration, timestep, fuse=False, batch_size=64):
""" """
Returns: Returns:
pytorch DataLoader for test pytorch DataLoader for test
...@@ -37,7 +37,7 @@ def get_test(task, fuse=False, duration=4, timestep=0.5, normalize=True, batch_s ...@@ -37,7 +37,7 @@ def get_test(task, fuse=False, duration=4, timestep=0.5, normalize=True, batch_s
return te_loader return te_loader
def get_train_val_test(task, fuse=False, duration=4, timestep=0.5, normalize=True, batch_size=64): def get_train_val_test(task, fuse=False, duration=4, timestep=0.5, batch_size=64):
""" """
Returns: Returns:
pytorch DataLoader for train, val, test pytorch DataLoader for train, val, test
...@@ -70,11 +70,11 @@ def get_train_val_test(task, fuse=False, duration=4, timestep=0.5, normalize=Tru ...@@ -70,11 +70,11 @@ def get_train_val_test(task, fuse=False, duration=4, timestep=0.5, normalize=Tru
def get_benchmark_splits(fuse=False, batch_size=64): def get_benchmark_splits(fuse=False, batch_size=64):
task = 'mortality' task = 'mortality'
duration = 48 duration = 48.0
timestep = 1.0 timestep = 1.0
df_label = pd.read_csv(data_path + 'population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'}) df_label = pd.read_csv(data_path + 'population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'})
X = sparse.load_npz(data_path +'features/benchmark.outcome={}.T={}.dt={}/X.npz'.format(task, duration, timestep)).todense() X = sparse.load_npz(data_path +'features/benchmark,outcome={},T={},dt={}/X.npz'.format(task, duration, timestep)).todense()
s = sparse.load_npz(data_path +'features/benchmark.outcome={}.T={}.dt={}/s.npz'.format(task, duration, timestep)).todense() s = sparse.load_npz(data_path +'features/benchmark,outcome={},T={},dt={}/s.npz'.format(task, duration, timestep)).todense()
tr_idx = df_label[df_label['partition'] == 'train'].index.values tr_idx = df_label[df_label['partition'] == 'train'].index.values
va_idx = df_label[df_label['partition'] == 'val' ].index.values va_idx = df_label[df_label['partition'] == 'val' ].index.values
...@@ -120,8 +120,8 @@ def get_benchmark_test(fuse=False, batch_size=64): ...@@ -120,8 +120,8 @@ def get_benchmark_test(fuse=False, batch_size=64):
df_label_all = pd.read_csv(data_path + 'population/{}_{}h.csv'.format(task, duration)).rename(columns={'{}_LABEL'.format(task): 'LABEL'}) df_label_all = pd.read_csv(data_path + 'population/{}_{}h.csv'.format(task, duration)).rename(columns={'{}_LABEL'.format(task): 'LABEL'})
df_label = pd.read_csv(data_path + 'population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'}) df_label = pd.read_csv(data_path + 'population/pop.mortality_benchmark.csv').rename(columns={'{}_LABEL'.format(task): 'LABEL'})
X = sparse.load_npz(data_path +'features/outcome={}.T={}.dt={}/X.npz'.format(task, duration, timestep)).todense() X = sparse.load_npz(data_path +'features/outcome={},T={},dt={}/X.npz'.format(task, duration, timestep)).todense()
s = sparse.load_npz(data_path +'features/outcome={}.T={}.dt={}/s.npz'.format(task, duration, timestep)).todense() s = sparse.load_npz(data_path +'features/outcome={},T={},dt={}/s.npz'.format(task, duration, timestep)).todense()
te_idx = [df_label_all[df_label_all['ICUSTAY_ID'] == ID].index.values[0] for ID in df_label[df_label['partition'] == 'test' ]['ID']] te_idx = [df_label_all[df_label_all['ICUSTAY_ID'] == ID].index.values[0] for ID in df_label[df_label['partition'] == 'test' ]['ID']]
...@@ -168,8 +168,8 @@ class _Mimic3Reader(object): ...@@ -168,8 +168,8 @@ class _Mimic3Reader(object):
.sort_values(by=['SUBJECT_ID', 'LABEL']) \ .sort_values(by=['SUBJECT_ID', 'LABEL']) \
.drop_duplicates('SUBJECT_ID', keep='last').reset_index(drop=True) .drop_duplicates('SUBJECT_ID', keep='last').reset_index(drop=True)
self.X = sparse.load_npz(data_path +'features/outcome={}.T={}.dt={}/X.npz'.format(task, duration, timestep)).todense() self.X = sparse.load_npz(data_path +'features/outcome={},T={},dt={}/X.npz'.format(task, duration, timestep)).todense()
self.s = sparse.load_npz(data_path +'features/outcome={}.T={}.dt={}/s.npz'.format(task, duration, timestep)).todense() self.s = sparse.load_npz(data_path +'features/outcome={},T={},dt={}/s.npz'.format(task, duration, timestep)).todense()
print('Finish reading data \t {:.2f} s'.format(time.time() - start_time)) print('Finish reading data \t {:.2f} s'.format(time.time() - start_time))
...@@ -201,8 +201,8 @@ class _Mimic3Reader(object): ...@@ -201,8 +201,8 @@ class _Mimic3Reader(object):
te_idx = self.df_subjects[self.df_subjects['partition'] == 'test' ].index.values te_idx = self.df_subjects[self.df_subjects['partition'] == 'test' ].index.values
try: try:
import pathlib import pathlib
pathlib.Path('./output/outcome={}.T={}.dt={}/'.format(self.task, self.duration, self.timestep)).mkdir(parents=True, exist_ok=True) pathlib.Path('./output/outcome={},T={},dt={}/'.format(self.task, self.duration, self.timestep)).mkdir(parents=True, exist_ok=True)
np.savez(open('./output/outcome={}.T={}.dt={}/idx.npz'.format(self.task, self.duration, self.timestep), 'wb'), tr_idx=tr_idx, va_idx=va_idx, te_idx=te_idx) np.savez(open('./output/outcome={},T={},dt={}/idx.npz'.format(self.task, self.duration, self.timestep), 'wb'), tr_idx=tr_idx, va_idx=va_idx, te_idx=te_idx)
except: except:
print('indices not saved') print('indices not saved')
raise raise
...@@ -248,8 +248,8 @@ class _Mimic3Reader(object): ...@@ -248,8 +248,8 @@ class _Mimic3Reader(object):
try: try:
import pathlib import pathlib
pathlib.Path('./output/outcome={}.T={}.dt={}/'.format(self.task, self.duration, self.timestep)).mkdir(parents=True, exist_ok=True) pathlib.Path('./output/outcome={},T={},dt={}/'.format(self.task, self.duration, self.timestep)).mkdir(parents=True, exist_ok=True)
np.savez(open('./output/outcome={}.T={}.dt={}/idx.npz'.format(self.task, self.duration, self.timestep), 'wb'), tr_idx=tr_idx, va_idx=va_idx, te_idx=te_idx) np.savez(open('./output/outcome={},T={},dt={}/idx.npz'.format(self.task, self.duration, self.timestep), 'wb'), tr_idx=tr_idx, va_idx=va_idx, te_idx=te_idx)
except: except:
print('indices not saved') print('indices not saved')
raise raise
......
...@@ -63,7 +63,7 @@ args = parser.parse_args() ...@@ -63,7 +63,7 @@ args = parser.parse_args()
task = args.outcome task = args.outcome
model_type = args.model_type model_type = args.model_type
T = int(args.T) T = float(args.T)
dt = float(args.dt) dt = float(args.dt)
L_in = int(np.floor(T / dt)) L_in = int(np.floor(T / dt))
in_channels = dimensions[task][float(T)] in_channels = dimensions[task][float(T)]
......
...@@ -3,17 +3,17 @@ set -euxo pipefail ...@@ -3,17 +3,17 @@ set -euxo pipefail
mkdir -p log mkdir -p log
cuda=0 cuda=0
python run_deep.py --outcome=mortality --T=48 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=mortality.T=48.dt=1.0.CNN.log' python run_deep.py --outcome=mortality --T=48.0 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=mortality,T=48,dt=1.0,CNN.log'
python run_deep.py --outcome=mortality --T=48 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=mortality.T=48.dt=1.0.RNN.log' python run_deep.py --outcome=mortality --T=48.0 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=mortality,T=48,dt=1.0,RNN.log'
python run_deep.py --outcome=ARF --T=4 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=ARF.T=4.dt=1.0.CNN.log' python run_deep.py --outcome=ARF --T=4.0 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=ARF,T=4,dt=1.0,CNN.log'
python run_deep.py --outcome=ARF --T=4 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=ARF.T=4.dt=1.0.RNN.log' python run_deep.py --outcome=ARF --T=4.0 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=ARF,T=4,dt=1.0,RNN.log'
python run_deep.py --outcome=ARF --T=12 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=ARF.T=12.dt=1.0.CNN.log' python run_deep.py --outcome=ARF --T=12.0 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=ARF,T=12,dt=1.0,CNN.log'
python run_deep.py --outcome=ARF --T=12 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=ARF.T=12.dt=1.0.RNN.log' python run_deep.py --outcome=ARF --T=12.0 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=ARF,T=12,dt=1.0,RNN.log'
python run_deep.py --outcome=Shock --T=4 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=Shock.T=4.dt=1.0.CNN.log' python run_deep.py --outcome=Shock --T=4.0 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=Shock,T=4,dt=1.0,CNN.log'
python run_deep.py --outcome=Shock --T=4 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=Shock.T=4.dt=1.0.RNN.log' python run_deep.py --outcome=Shock --T=4.0 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=Shock,T=4,dt=1.0,RNN.log'
python run_deep.py --outcome=Shock --T=12 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=Shock.T=12.dt=1.0.CNN.log' python run_deep.py --outcome=Shock --T=12.0 --dt=1.0 --model_type=CNN --cuda=$cuda &> 'log/outcome=Shock,T=12,dt=1.0,CNN.log'
python run_deep.py --outcome=Shock --T=12 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=Shock.T=12.dt=1.0.RNN.log' python run_deep.py --outcome=Shock --T=12.0 --dt=1.0 --model_type=RNN --cuda=$cuda &> 'log/outcome=Shock,T=12,dt=1.0,RNN.log'
...@@ -32,7 +32,7 @@ task = args.outcome ...@@ -32,7 +32,7 @@ task = args.outcome
model_type = args.model_type model_type = args.model_type
model_name = model_type model_name = model_type
T = int(args.T) T = float(args.T)
dt = float(args.dt) dt = float(args.dt)
if model_type == 'CNN': if model_type == 'CNN':
...@@ -47,11 +47,11 @@ else: ...@@ -47,11 +47,11 @@ else:
assert False assert False
print('EXPERIMENT:', 'model={}.outcome={}.T={}.dt={}'.format(model_name, task, T, dt)) print('EXPERIMENT:', 'model={},outcome={},T={},dt={}'.format(model_name, task, T, dt))
# Create checkpoint directories # Create checkpoint directories
import pathlib import pathlib
pathlib.Path("./checkpoint/model={}.outcome={}.T={}.dt={}/".format(model_name, task, T, dt)).mkdir(parents=True, exist_ok=True) pathlib.Path("./checkpoint/model={},outcome={},T={},dt={}/".format(model_name, task, T, dt)).mkdir(parents=True, exist_ok=True)
###### ######
# Data # Data
...@@ -137,7 +137,7 @@ print('best_params_', clf.best_params_) ...@@ -137,7 +137,7 @@ print('best_params_', clf.best_params_)
print('best_score_ ', clf.best_score_) print('best_score_ ', clf.best_score_)
try: try:
np.savetxt( np.savetxt(
'output/outcome={}.T={}.dt={}/{}.coef.txt'.format(task, T, dt, model_name), 'output/outcome={},T={},dt={}/{},coef.txt'.format(task, T, dt, model_name),
clf.best_estimator_.coef_, clf.best_estimator_.coef_,
delimiter=',', delimiter=',',
) )
......
...@@ -3,21 +3,21 @@ set -euxo pipefail ...@@ -3,21 +3,21 @@ set -euxo pipefail
mkdir -p log mkdir -p log
mkdir -p output mkdir -p output
python run_shallow.py --outcome=mortality --T=48 --dt=1.0 --model_type=LR \ python run_shallow.py --outcome=mortality --T=48.0 --dt=1.0 --model_type=LR \
> >(tee 'log/outcome=mortality.T=48.dt=1.0.LR.out') \ > >(tee 'log/outcome=mortality,T=48.0,dt=1.0,LR.out') \
2> >(tee 'log/outcome=mortality.T=48.dt=1.0.LR.err' >&2) 2> >(tee 'log/outcome=mortality,T=48.0,dt=1.0,LR.err' >&2)
python run_shallow.py --outcome=mortality --T=48 --dt=1.0 --model_type=RF \ python run_shallow.py --outcome=mortality --T=48.0. --dt=1.0 --model_type=RF \
> >(tee 'log/outcome=mortality.T=48.dt=1.0.RF.out') \ > >(tee 'log/outcome=mortality,T=48.0,dt=1.0,RF.out') \
2> >(tee 'log/outcome=mortality.T=48.dt=1.0.RF.err' >&2) 2> >(tee 'log/outcome=mortality,T=48.0,dt=1.0,RF.err' >&2)
python run_shallow.py --outcome=ARF --T=4 --dt=1.0 --model_type=LR &> 'log/outcome=ARF.T=4.dt=1.0.LR.log' python run_shallow.py --outcome=ARF --T=4.0 --dt=1.0 --model_type=LR &> 'log/outcome=ARF,T=4.0,dt=1.0,LR.log'
python run_shallow.py --outcome=Shock --T=4 --dt=1.0 --model_type=LR &> 'log/outcome=Shock.T=4.dt=1.0.LR.log' python run_shallow.py --outcome=Shock --T=4.0 --dt=1.0 --model_type=LR &> 'log/outcome=Shock,T=4.0,dt=1.0,LR.log'
python run_shallow.py --outcome=ARF --T=4 --dt=1.0 --model_type=RF &> 'log/outcome=ARF.T=4.dt=1.0.RF.log' python run_shallow.py --outcome=ARF --T=4.0 --dt=1.0 --model_type=RF &> 'log/outcome=ARF,T=4.0,dt=1.0,RF.log'
python run_shallow.py --outcome=Shock --T=4 --dt=1.0 --model_type=RF &> 'log/outcome=Shock.T=4.dt=1.0.RF.log' python run_shallow.py --outcome=Shock --T=4.0 --dt=1.0 --model_type=RF &> 'log/outcome=Shock,T=4.0,dt=1.0,RF.log'
python run_shallow.py --outcome=ARF --T=12 --dt=1.0 --model_type=LR &> 'log/outcome=ARF.T=12.dt=1.0.LR.log' python run_shallow.py --outcome=ARF --T=12.0 --dt=1.0 --model_type=LR &> 'log/outcome=ARF,T=12.0,dt=1.0,LR.log'
python run_shallow.py --outcome=Shock --T=12 --dt=1.0 --model_type=LR &> 'log/outcome=Shock.T=12.dt=1.0.LR.log' python run_shallow.py --outcome=Shock --T=12.0 --dt=1.0 --model_type=LR &> 'log/outcome=Shock,T=12.0,dt=1.0,LR.log'
python run_shallow.py --outcome=ARF --T=12 --dt=1.0 --model_type=RF &> 'log/outcome=ARF.T=12.dt=1.0.RF.log' python run_shallow.py --outcome=ARF --T=12.0 --dt=1.0 --model_type=RF &> 'log/outcome=ARF,T=12.0,dt=1.0,RF.log'
python run_shallow.py --outcome=Shock --T=12 --dt=1.0 --model_type=RF &> 'log/outcome=Shock.T=12.dt=1.0.RF.log' python run_shallow.py --outcome=Shock --T=12.0 --dt=1.0 --model_type=RF &> 'log/outcome=Shock,T=12.0,dt=1.0,RF.log'
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment