Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MLD3
FIDDLE
Commits
e3fc4342
Commit
e3fc4342
authored
Jun 20, 2019
by
Shengpu Tang (tangsp)
Browse files
experiments: data extraction
parent
f4a1c687
Changes
16
Expand all
Show whitespace changes
Inline
Side-by-side
mimic3_experiments/1_data_extraction/InclusionExclusion.ipynb
0 → 100644
View file @
e3fc4342
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os, sys, time\n",
"from datetime import datetime, timedelta\n",
"import pickle\n",
"\n",
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"config = yaml.safe_load(open('../config.yaml'))\n",
"data_path = config['data_path']\n",
"mimic3_path = config['mimic3_path']\n",
"\n",
"import pathlib\n",
"pathlib.Path(data_path, 'population').mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"patients = pd.read_csv(mimic3_path + 'PATIENTS.csv', parse_dates=['DOB', 'DOD'], usecols=['SUBJECT_ID', 'DOB', 'DOD'])\n",
"admissions = pd.read_csv(mimic3_path + 'ADMISSIONS.csv', parse_dates=['DEATHTIME'], usecols=['SUBJECT_ID', 'HADM_ID', 'DEATHTIME', 'HOSPITAL_EXPIRE_FLAG'])\n",
"examples = pd.read_csv(data_path + 'prep/icustays_MV.csv', parse_dates=['INTIME', 'OUTTIME']).sort_values(by='ICUSTAY_ID') # Only Metavision\n",
"\n",
"examples = pd.merge(examples, patients, on='SUBJECT_ID', how='left')\n",
"examples = pd.merge(examples, admissions, on=['SUBJECT_ID', 'HADM_ID'], how='left')\n",
"examples['AGE'] = examples.apply(lambda x: (x['INTIME'] - x['DOB']).total_seconds(), axis=1) / 3600 / 24 / 365.25\n",
"\n",
"examples['LOS'] = examples['LOS'] * 24 # Convert to hours"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tasks = ['ARF', 'Shock']\n",
"label_defs = { task: pd.read_csv(data_path + 'labels/{}.csv'.format(task)) for task in tasks }"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Source population 23620\n"
]
}
],
"source": [
"# Start\n",
"N = len(examples['ICUSTAY_ID'].unique())\n",
"print('Source population', N)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"assert (examples['INTIME'] <= examples['OUTTIME']).all()\n",
"assert (examples['DBSOURCE'] == 'metavision').all()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exclude non-adults 23593\n"
]
}
],
"source": [
"# Remove non-adults\n",
"min_age = 18\n",
"max_age = np.inf # no max age\n",
"examples = examples[(examples.AGE >= min_age) & (examples.AGE <= max_age)]\n",
"print('Exclude non-adults', examples['ICUSTAY_ID'].nunique())\n",
"examples_ = examples"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"======\n",
"prediction time 4 hour\n",
"Exclude deaths 23499\n",
"Exclude discharges 23401\n",
"---\n",
"Outcome ARF\n",
"Exclude onset 15873\n",
"---\n",
"Outcome Shock\n",
"Exclude onset 19342\n",
"======\n",
"prediction time 12 hour\n",
"Exclude deaths 23319\n",
"Exclude discharges 23060\n",
"---\n",
"Outcome ARF\n",
"Exclude onset 14174\n",
"---\n",
"Outcome Shock\n",
"Exclude onset 17588\n"
]
}
],
"source": [
"for T in [4, 12]:\n",
" print('======')\n",
" print('prediction time', T, 'hour')\n",
"\n",
" # Remove died before cutoff hour\n",
" examples = examples_[(examples_.DEATHTIME >= examples_.INTIME + timedelta(hours=T)) | (examples_.DEATHTIME.isnull())]\n",
" print('Exclude deaths', examples['ICUSTAY_ID'].nunique())\n",
"\n",
" # Remove LOS < cutoff hour\n",
" examples = examples[examples['LOS'] >= T]\n",
" print('Exclude discharges', examples['ICUSTAY_ID'].nunique())\n",
"\n",
" populations = {}\n",
" # Remove event onset before (cutoff)\n",
" for task in tasks:\n",
" print('---')\n",
" print('Outcome', task)\n",
" label_def = label_defs[task]\n",
"\n",
" # Needed to preserve index in DataFrame\n",
" pop = examples[['ICUSTAY_ID']].reset_index() \\\n",
" .merge(label_def[['ICUSTAY_ID', '{}_ONSET_HOUR'.format(task)]], on='ICUSTAY_ID', how='left') \\\n",
" .set_index('index').copy()\n",
" pop = pop[(pop['{}_ONSET_HOUR'.format(task)] >= T) | pop['{}_ONSET_HOUR'.format(task)].isnull()]\n",
" pop['{}_LABEL'.format(task)] = pop['{}_ONSET_HOUR'.format(task)].notnull().astype(int)\n",
" pop.to_csv(data_path + 'population/{}_{}h.csv'.format(task, T), index=False)\n",
"\n",
" # Construct boolean mask\n",
" ## NOTE: uses pop.index here, assuming index is preserved\n",
" idx = pop.index\n",
" ## Otherwise, there's a slower version\n",
" # if False:\n",
" # idx = np.array([examples[examples.ICUSTAY_ID == i].index[0] for i in pop['ICUSTAY_ID']])\n",
" mask_array = np.zeros(N, dtype=bool)\n",
" mask_array[idx] = True\n",
"\n",
" # Save population boolean mask\n",
" np.save(data_path + 'population/mask_{}_{}h.npy'.format(task, T), mask_array)\n",
" np.savetxt(data_path + 'population/mask_{}_{}h.txt'.format(task, T), mask_array, fmt='%i')\n",
"\n",
" populations[task] = pop\n",
" print('Exclude onset', len(pop))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"======\n",
"prediction time 48 hour\n",
"Exclude deaths 22776\n",
"Exclude discharges 11695\n",
"---\n",
"Outcome mortality\n",
"Exclude onset 11695\n"
]
}
],
"source": [
"for T in [48]:\n",
" print('======')\n",
" print('prediction time', T, 'hour')\n",
"\n",
" # Remove died before cutoff hour\n",
" examples = examples_[(examples_.DEATHTIME >= examples_.INTIME + timedelta(hours=T)) | (examples_.DEATHTIME.isnull())]\n",
" print('Exclude deaths', examples['ICUSTAY_ID'].nunique())\n",
"\n",
" # Remove LOS < cutoff hour\n",
" examples = examples[examples['LOS'] >= T]\n",
" print('Exclude discharges', examples['ICUSTAY_ID'].nunique())\n",
"\n",
" # Remove event onset before (cutoff)\n",
" for task in ['mortality']:\n",
" print('---')\n",
" print('Outcome', task)\n",
" examples['{}_LABEL'.format(task)] = examples.HOSPITAL_EXPIRE_FLAG\n",
" pop = examples[['ICUSTAY_ID', '{}_LABEL'.format(task)]]\n",
" pop.to_csv(data_path + 'population/{}_{}h.csv'.format(task, T), index=False)\n",
" print('Exclude onset', len(pop))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
```
python
import
numpy
as
np
import
pandas
as
pd
import
os
,
sys
,
time
from
datetime
import
datetime
,
timedelta
import
pickle
from
collections
import
Counter
```
%% Cell type:code id: tags:
```
python
import
yaml
config
=
yaml
.
safe_load
(
open
(
'../config.yaml'
))
data_path
=
config
[
'data_path'
]
mimic3_path
=
config
[
'mimic3_path'
]
import
pathlib
pathlib
.
Path
(
data_path
,
'population'
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
```
%% Cell type:code id: tags:
```
python
patients
=
pd
.
read_csv
(
mimic3_path
+
'PATIENTS.csv'
,
parse_dates
=
[
'DOB'
,
'DOD'
],
usecols
=
[
'SUBJECT_ID'
,
'DOB'
,
'DOD'
])
admissions
=
pd
.
read_csv
(
mimic3_path
+
'ADMISSIONS.csv'
,
parse_dates
=
[
'DEATHTIME'
],
usecols
=
[
'SUBJECT_ID'
,
'HADM_ID'
,
'DEATHTIME'
,
'HOSPITAL_EXPIRE_FLAG'
])
examples
=
pd
.
read_csv
(
data_path
+
'prep/icustays_MV.csv'
,
parse_dates
=
[
'INTIME'
,
'OUTTIME'
]).
sort_values
(
by
=
'ICUSTAY_ID'
)
# Only Metavision
examples
=
pd
.
merge
(
examples
,
patients
,
on
=
'SUBJECT_ID'
,
how
=
'left'
)
examples
=
pd
.
merge
(
examples
,
admissions
,
on
=
[
'SUBJECT_ID'
,
'HADM_ID'
],
how
=
'left'
)
examples
[
'AGE'
]
=
examples
.
apply
(
lambda
x
:
(
x
[
'INTIME'
]
-
x
[
'DOB'
]).
total_seconds
(),
axis
=
1
)
/
3600
/
24
/
365.25
examples
[
'LOS'
]
=
examples
[
'LOS'
]
*
24
# Convert to hours
```
%% Cell type:code id: tags:
```
python
tasks
=
[
'ARF'
,
'Shock'
]
label_defs
=
{
task
:
pd
.
read_csv
(
data_path
+
'labels/{}.csv'
.
format
(
task
))
for
task
in
tasks
}
```
%% Cell type:code id: tags:
```
python
# Start
N
=
len
(
examples
[
'ICUSTAY_ID'
].
unique
())
print
(
'Source population'
,
N
)
```
%% Output
Source population 23620
%% Cell type:code id: tags:
```
python
assert
(
examples
[
'INTIME'
]
<=
examples
[
'OUTTIME'
]).
all
()
assert
(
examples
[
'DBSOURCE'
]
==
'metavision'
).
all
()
```
%% Cell type:code id: tags:
```
python
# Remove non-adults
min_age
=
18
max_age
=
np
.
inf
# no max age
examples
=
examples
[(
examples
.
AGE
>=
min_age
)
&
(
examples
.
AGE
<=
max_age
)]
print
(
'Exclude non-adults'
,
examples
[
'ICUSTAY_ID'
].
nunique
())
examples_
=
examples
```
%% Output
Exclude non-adults 23593
%% Cell type:code id: tags:
```
python
for
T
in
[
4
,
12
]:
print
(
'======'
)
print
(
'prediction time'
,
T
,
'hour'
)
# Remove died before cutoff hour
examples
=
examples_
[(
examples_
.
DEATHTIME
>=
examples_
.
INTIME
+
timedelta
(
hours
=
T
))
|
(
examples_
.
DEATHTIME
.
isnull
())]
print
(
'Exclude deaths'
,
examples
[
'ICUSTAY_ID'
].
nunique
())
# Remove LOS < cutoff hour
examples
=
examples
[
examples
[
'LOS'
]
>=
T
]
print
(
'Exclude discharges'
,
examples
[
'ICUSTAY_ID'
].
nunique
())
populations
=
{}
# Remove event onset before (cutoff)
for
task
in
tasks
:
print
(
'---'
)
print
(
'Outcome'
,
task
)
label_def
=
label_defs
[
task
]
# Needed to preserve index in DataFrame
pop
=
examples
[[
'ICUSTAY_ID'
]].
reset_index
()
\
.
merge
(
label_def
[[
'ICUSTAY_ID'
,
'{}_ONSET_HOUR'
.
format
(
task
)]],
on
=
'ICUSTAY_ID'
,
how
=
'left'
)
\
.
set_index
(
'index'
).
copy
()
pop
=
pop
[(
pop
[
'{}_ONSET_HOUR'
.
format
(
task
)]
>=
T
)
|
pop
[
'{}_ONSET_HOUR'
.
format
(
task
)].
isnull
()]
pop
[
'{}_LABEL'
.
format
(
task
)]
=
pop
[
'{}_ONSET_HOUR'
.
format
(
task
)].
notnull
().
astype
(
int
)
pop
.
to_csv
(
data_path
+
'population/{}_{}h.csv'
.
format
(
task
,
T
),
index
=
False
)
# Construct boolean mask
## NOTE: uses pop.index here, assuming index is preserved
idx
=
pop
.
index
## Otherwise, there's a slower version
# if False:
# idx = np.array([examples[examples.ICUSTAY_ID == i].index[0] for i in pop['ICUSTAY_ID']])
mask_array
=
np
.
zeros
(
N
,
dtype
=
bool
)
mask_array
[
idx
]
=
True
# Save population boolean mask
np
.
save
(
data_path
+
'population/mask_{}_{}h.npy'
.
format
(
task
,
T
),
mask_array
)
np
.
savetxt
(
data_path
+
'population/mask_{}_{}h.txt'
.
format
(
task
,
T
),
mask_array
,
fmt
=
'%i'
)
populations
[
task
]
=
pop
print
(
'Exclude onset'
,
len
(
pop
))
```
%% Output
======
prediction time 4 hour
Exclude deaths 23499
Exclude discharges 23401
---
Outcome ARF
Exclude onset 15873
---
Outcome Shock
Exclude onset 19342
======
prediction time 12 hour
Exclude deaths 23319
Exclude discharges 23060
---
Outcome ARF
Exclude onset 14174
---
Outcome Shock
Exclude onset 17588
%% Cell type:code id: tags:
```
python
for
T
in
[
48
]:
print
(
'======'
)
print
(
'prediction time'
,
T
,
'hour'
)
# Remove died before cutoff hour
examples
=
examples_
[(
examples_
.
DEATHTIME
>=
examples_
.
INTIME
+
timedelta
(
hours
=
T
))
|
(
examples_
.
DEATHTIME
.
isnull
())]
print
(
'Exclude deaths'
,
examples
[
'ICUSTAY_ID'
].
nunique
())
# Remove LOS < cutoff hour
examples
=
examples
[
examples
[
'LOS'
]
>=
T
]
print
(
'Exclude discharges'
,
examples
[
'ICUSTAY_ID'
].
nunique
())
# Remove event onset before (cutoff)
for
task
in
[
'mortality'
]:
print
(
'---'
)
print
(
'Outcome'
,
task
)
examples
[
'{}_LABEL'
.
format
(
task
)]
=
examples
.
HOSPITAL_EXPIRE_FLAG
pop
=
examples
[[
'ICUSTAY_ID'
,
'{}_LABEL'
.
format
(
task
)]]
pop
.
to_csv
(
data_path
+
'population/{}_{}h.csv'
.
format
(
task
,
T
),
index
=
False
)
print
(
'Exclude onset'
,
len
(
pop
))
```
%% Output
======
prediction time 48 hour
Exclude deaths 22776
Exclude discharges 11695
---
Outcome mortality
Exclude onset 11695
%% Cell type:code id: tags:
```
python
```
mimic3_experiments/1_data_extraction/LabelDistributions.ipynb
0 → 100644
View file @
e3fc4342
This diff is collapsed.
Click to expand it.
mimic3_experiments/1_data_extraction/PopulationSummary.ipynb
0 → 100644
View file @
e3fc4342
This diff is collapsed.
Click to expand it.
mimic3_experiments/1_data_extraction/config.py
0 → 100644
View file @
e3fc4342
import
os
,
yaml
with
open
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'../config.yaml'
))
as
f
:
config
=
yaml
.
full_load
(
f
)
data_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
config
[
'data_path'
])
mimic3_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
config
[
'mimic3_path'
])
parallel
=
True
n_jobs
=
72
mimic3_experiments/1_data_extraction/extract_data.py
0 → 100644
View file @
e3fc4342
This diff is collapsed.
Click to expand it.
mimic3_experiments/1_data_extraction/generate_labels.py
0 → 100644
View file @
e3fc4342
"""
generate_labels.py
Author: Shengpu Tang
Generate labels for two adverse outcomes: ARF and shock.
"""
import
pandas
as
pd
import
numpy
as
np
import
scipy.stats
import
itertools
from
collections
import
OrderedDict
,
Counter
from
joblib
import
Parallel
,
delayed
from
tqdm
import
tqdm
as
tqdm
import
yaml
data_path
=
yaml
.
full_load
(
open
(
'../config.yaml'
))[
'data_path'
]
import
pathlib
pathlib
.
Path
(
data_path
,
'labels'
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
examples
=
pd
.
read_csv
(
data_path
+
'prep/icustays_MV.csv'
,
parse_dates
=
[
'INTIME'
,
'OUTTIME'
]).
sort_values
(
by
=
'ICUSTAY_ID'
)
chartevents
=
pd
.
read_pickle
(
data_path
+
'prep/chartevents.p'
)
procedures
=
pd
.
read_pickle
(
data_path
+
'prep/procedureevents_mv.p'
)
inputevents
=
pd
.
read_pickle
(
data_path
+
'prep/inputevents_mv.p'
)
ventilation
=
[
'225792'
,
# Invasive Ventilation
'225794'
,
# Non-invasive Ventilation
]
PEEP
=
[
'220339'
,
# PEEP set
]
vasopressors
=
[
'221906'
,
# Norepinephrine
'221289'
,
# Epinephrine
'221662'
,
# Dopamine
'222315'
,
# Vasopressin
'221749'
,
# Phenylephrine
]
## ARF: (PEEP) OR (mechanical ventilation)
df_PEEP
=
chartevents
[
chartevents
.
ITEMID
.
isin
(
PEEP
)].
copy
()
df_vent
=
procedures
[
procedures
.
ITEMID
.
isin
(
ventilation
)].
rename
(
columns
=
{
't_start'
:
't'
}).
copy
()
df_ARF
=
pd
.
concat
([
df_PEEP
[[
'ICUSTAY_ID'
,
't'
]],
df_vent
[[
'ICUSTAY_ID'
,
't'
]]],
axis
=
0
)
df_ARF
[
'ICUSTAY_ID'
]
=
df_ARF
[
'ICUSTAY_ID'
].
astype
(
int
)
df_ARF
=
df_ARF
.
sort_values
(
by
=
[
'ICUSTAY_ID'
,
't'
]).
drop_duplicates
([
'ICUSTAY_ID'
],
keep
=
'first'
).
reset_index
(
drop
=
True
)
df_ARF
=
df_ARF
.
rename
(
columns
=
{
't'
:
'ARF_ONSET_HOUR'
})
df_ARF
=
pd
.
merge
(
examples
[[
'ICUSTAY_ID'
]],
df_ARF
,
on
=
'ICUSTAY_ID'
,
how
=
'left'
)
df_ARF
[
'ARF_LABEL'
]
=
df_ARF
[
'ARF_ONSET_HOUR'
].
notnull
().
astype
(
int
)
print
(
'ARF: '
,
dict
(
Counter
(
df_ARF
[
'ARF_LABEL'
])),
'N = {}'
.
format
(
len
(
df_ARF
)),
sep
=
'
\t
'
)
df_ARF
.
to_csv
(
data_path
+
'labels/ARF.csv'
,
index
=
False
)
## Shock: (one of vasopressors)
df_vaso
=
inputevents
[
inputevents
.
ITEMID
.
isin
(
vasopressors
)].
rename
(
columns
=
{
't_start'
:
't'
}).
copy
()
df_shock
=
df_vaso
.
copy
()
df_shock
[
'ICUSTAY_ID'
]
=
df_shock
[
'ICUSTAY_ID'
].
astype
(
int
)
df_shock
=
df_shock
.
sort_values
(
by
=
[
'ICUSTAY_ID'
,
't'
]).
drop_duplicates
([
'ICUSTAY_ID'
],
keep
=
'first'
).
reset_index
(
drop
=
True
)
df_shock
=
df_shock
.
rename
(
columns
=
{
't'
:
'Shock_ONSET_HOUR'
})
df_shock
=
pd
.
merge
(
examples
[[
'ICUSTAY_ID'
]],
df_shock
,
on
=
'ICUSTAY_ID'
,
how
=
'left'
)
df_shock
[
'Shock_LABEL'
]
=
df_shock
[
'Shock_ONSET_HOUR'
].
notnull
().
astype
(
int
)
print
(
'Shock: '
,
dict
(
Counter
(
df_shock
[
'Shock_LABEL'
])),
'N = {}'
.
format
(
len
(
df_shock
)),
sep
=
'
\t
'
)
df_shock
.
to_csv
(
data_path
+
'labels/Shock.csv'
,
index
=
False
)
mimic3_experiments/1_data_extraction/grouped_variables.yaml
0 → 100644
View file @
e3fc4342
HR
:
-
220045
# Heart Rate
SysBP
:
-
224167
# Manual Blood Pressure Systolic Left
-
227243
# Manual Blood Pressure Systolic Right
-
220050
# Arterial Blood Pressure systolic
-
220179
# Non Invasive Blood Pressure systolic
-
225309
# ART BP Systolic
DiaBP
:
-
224643
# Manual Blood Pressure Diastolic Left
-
227242
# Manual Blood Pressure Diastolic Right
-
220051
# Arterial Blood Pressure diastolic
-
220180
# Non Invasive Blood Pressure diastolic
-
225310
# ART BP Diastolic
RR
:
-
220210
# Respiratory Rate
-
224690
# Respiratory Rate (Total)
Temperature
:
-
223761
# Temperature Fahrenheit
-
223762
# Temperature Celsius
SpO2
:
-
220277
# O2 saturation pulseoxymetry
Height
:
-
226707
# Height
-
226730
# Height (cm)
Weight
:
-
224639
# Daily Weight
-
226512
# Admission Weight (Kg)
-
226531
# Admission Weight (lbs.)
mimic3_experiments/1_data_extraction/resources/IHM_benchmark.ipynb
0 → 100644
View file @
e3fc4342
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Multitask benchmark: https://www.nature.com/articles/s41597-019-0103-9"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"with open('../config.yaml') as f:\n",
" config = yaml.full_load(f)\n",
"\n",
"data_path = config['data_path']"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from collections import defaultdict"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df_train = pd.read_csv('train_listfile.csv')\n",
"df_val = pd.read_csv('val_listfile.csv')\n",
"df_test = pd.read_csv('test_listfile.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.13534500374633882"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train['y_true'].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [