Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MLD3
FIDDLE
Commits
0aa11f54
Commit
0aa11f54
authored
Sep 06, 2019
by
Shengpu Tang (tangsp)
Browse files
NEWS baseline
parent
287854c2
Changes
3
Hide whitespace changes
Inline
Side-by-side
mimic3_experiments/4_baselines/CalculateNEWS.ipynb
0 → 100644
View file @
0aa11f54
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"from operator import itemgetter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"with open('../data_code/config.yaml') as f:\n",
" config = yaml.safe_load(f)\n",
" data_path = config['data_path']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"icu = pd.read_csv(data_path + 'prep/icustays_MV.csv')\n",
"chart = pd.read_pickle(data_path + 'prep/chartevents.p')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"chart['ITEMID'] = chart['ITEMID'].replace({\n",
" '223900': 'GCS verbal',\n",
" '223901': 'GCS motor',\n",
" '220739': 'GCS eye',\n",
" '226732': 'O2 delivery device',\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"NEWS_components = [\n",
" 'HR', \n",
" 'RR', \n",
" 'SysBP', \n",
" 'Temperature', \n",
" 'SpO2', \n",
" 'GCS verbal',\n",
" 'GCS motor',\n",
" 'GCS eye',\n",
" 'O2 delivery device',\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"items = chart[chart[\"ITEMID\"].isin(NEWS_components)].copy().reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13807467"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(items)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convert GCS to AVPU (15: A, 1-14: VPU)\n",
"\n",
"| * | Eye Opening | Eye Opening | Eye Opening | Eye Opening | Verbal | Verbal | Verbal | Verbal | Verbal | Motor | Motor | Motor | Motor | Motor | Motor |\n",
"|----------|-------------|-------------|-------------|-------------|----------|----------|----------------|------------------|--------|---------|------------|------------|---------|-----------|-------|\n",
"| Response | Spontaneous | To Speech | To Pain | None | Oriented | Confused | Inapproapriate | Incomprehensible | None | Obeying | Localizing | Withdrawal | Flexing | Extending | None |\n",
"| Score | 4 | 3 | 2 | 1 | 5 | 4 | 5 | 2 | 1 | 6 | 5 | 4 | 3 | 2 | 1 |\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"verbal_mask = (items['ITEMID'] == 'GCS verbal')\n",
"items.loc[verbal_mask, 'VALUE'] = items.loc[verbal_mask, 'VALUE'].map({\n",
" \"No Response-ETT\": 1,\n",
" \"No Response\": 1,\n",
" \"Incomprehensible sounds\": 2,\n",
" \"Inappropriate Words\": 3,\n",
" \"Confused\": 4,\n",
" \"Oriented\": 5,\n",
"})\n",
"\n",
"motor_mask = (items['ITEMID'] == 'GCS motor')\n",
"items.loc[motor_mask, 'VALUE'] = items.loc[motor_mask, 'VALUE'].map({\n",
" \"No response\": 1,\n",
" \"Abnormal extension\": 2,\n",
" \"Abnormal Flexion\": 3,\n",
" \"Flex-withdraws\": 4,\n",
" \"Localizes Pain\": 5,\n",
" \"Obeys Commands\": 6,\n",
"})\n",
"\n",
"\n",
"eye_mask = (items['ITEMID'] == 'GCS eye')\n",
"items.loc[eye_mask, 'VALUE'] = items.loc[eye_mask, 'VALUE'].map({\n",
" \"None\": 1,\n",
" \"To Pain\": 2,\n",
" \"To Speech\": 3,\n",
" \"Spontaneously\": 4,\n",
"})\n",
"\n",
"oxygen_mask = (items['ITEMID'] == 'O2 delivery device')\n",
"items.loc[oxygen_mask, 'VALUE'] = items.loc[oxygen_mask, 'VALUE'].map({\n",
" 'Nasal cannula': 1, \n",
" 'Aerosol-cool': 1, \n",
" 'Bipap mask ': 1, \n",
" 'None': 0,\n",
" 'CPAP mask ': 1,\n",
" 'Endotracheal tube': 1, \n",
" 'Face tent': 1,\n",
" 'Medium conc mask ': 1,\n",
" 'Other': 1, \n",
" 'Venti mask ': 1, \n",
" 'Non-rebreather': 1,\n",
" 'Tracheostomy tube': 1, \n",
" 'Trach mask ': 1, \n",
" 'High flow neb': 1,\n",
" 'High flow nasal cannula': 1, \n",
" 'T-piece': 1, \n",
" 'Vapomist': 1,\n",
" 'Ultrasonic neb': 1,\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"assert set(items[items['ITEMID'] == \"O2 delivery device\"]['VALUE'].unique()) == set([0,1])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ICUSTAY_ID</th>\n",
" <th>t</th>\n",
" <th>ITEMID</th>\n",
" <th>VALUE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>200001</td>\n",
" <td>-0.003333</td>\n",
" <td>HR</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200001</td>\n",
" <td>0.013333</td>\n",
" <td>RR</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200001</td>\n",
" <td>0.030000</td>\n",
" <td>SysBP</td>\n",
" <td>113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200001</td>\n",
" <td>0.130000</td>\n",
" <td>SpO2</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>RR</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>SpO2</td>\n",
" <td>95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>HR</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>Temperature</td>\n",
" <td>37.2778</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>200001</td>\n",
" <td>0.196667</td>\n",
" <td>O2 delivery device</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>200001</td>\n",
" <td>0.213333</td>\n",
" <td>GCS eye</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ICUSTAY_ID t ITEMID VALUE\n",
"0 200001 -0.003333 HR 115\n",
"1 200001 0.013333 RR 22\n",
"2 200001 0.030000 SysBP 113\n",
"3 200001 0.130000 SpO2 94\n",
"4 200001 0.163333 RR 26\n",
"5 200001 0.163333 SpO2 95\n",
"6 200001 0.163333 HR 114\n",
"7 200001 0.163333 Temperature 37.2778\n",
"8 200001 0.196667 O2 delivery device 1\n",
"9 200001 0.213333 GCS eye 4"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"items.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Map values to bins & Aggregate scores\n",
"<img src=\"NEWS_table.jpg\">"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"HR_bins = [41, 51, 91, 111, 131]\n",
"RR_bins = [9, 12, 21, 25]\n",
"SpO2_bins = [92, 94, 96]\n",
"Temp_bins = [35.1, 36.1, 38.1, 39.1]\n",
"SBP_bins = [91, 101, 111, 220]\n",
"\n",
"HR_dict = { 0:3, 1:1, 2:0, 3:1, 4:2, 5:3 }\n",
"RR_dict = { 0:3, 1:1, 2:0, 3:2, 4:3 }\n",
"SpO2_dict = { 0:3, 1:2, 2:1, 3:0 }\n",
"Temp_dict = { 0:3, 1:1, 2:0, 3:1, 4:2 }\n",
"SBP_dict = { 0:3, 1:2, 2:1, 3:0, 4:3 }"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# test binning\n",
"assert SpO2_dict[np.digitize(90, SpO2_bins)] == 3\n",
"assert HR_dict[np.digitize(80, HR_bins)] == 0"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def calculate_score(row):\n",
" gcs_score = int(row['GCS motor']) + int(row['GCS verbal']) + int(row['GCS eye'])\n",
" score = \\\n",
" HR_dict[np.digitize(float(row['HR']), HR_bins)] + \\\n",
" RR_dict[np.digitize(float(row['RR']), RR_bins)] + \\\n",
" SBP_dict[np.digitize(float(row['SysBP']), SBP_bins)] + \\\n",
" Temp_dict[np.digitize(float(row['Temperature']), Temp_bins)] + \\\n",
" SpO2_dict[np.digitize(float(row['SpO2']), SpO2_bins)] + \\\n",
" 2 * int(int(row[\"O2 delivery device\"]) == 1) + \\\n",
" 3 * int(gcs_score < 15)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"IDs_test = list(icu[icu['partition'] == 'test'][\"ICUSTAY_ID\"])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 3492/3492 [03:40<00:00, 15.81it/s]\n",
"100%|██████████| 3492/3492 [03:56<00:00, 15.09it/s]\n",
"100%|██████████| 3492/3492 [04:18<00:00, 13.48it/s]\n"
]
}
],
"source": [
"for T in [4, 12, 48]:\n",
" score_dict = {}\n",
" for ID in tqdm(IDs_test):\n",
" try:\n",
" df = items[items['ICUSTAY_ID'] == ID].sort_values(by='t')\n",
" df = df[df['t'] <= T]\n",
" df = df.drop_duplicates(subset=['ITEMID', 't'], keep='last')\n",
" df = df.pivot(index='t', columns='ITEMID', values='VALUE')\n",
" df = df.reindex(NEWS_components, axis=1)\n",
" df = df.ffill().dropna()\n",
" if len(df) > 0:\n",
" df['NEWS'] = df.apply(calculate_score, axis=1)\n",
" score_dict[ID] = df['NEWS'].max()\n",
" else:\n",
" score_dict[ID] = np.nan\n",
" except:\n",
" print(ID)\n",
" display(df)\n",
" break\n",
" \n",
" df_scores = pd.DataFrame(score_dict.items(), columns=['ID', 'NEWS']).sort_values(by='ID')\n",
" df_scores.to_csv(data_path + 'labels/NEWS_{}h.csv'.format(T), index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
import
numpy
as
np
from
tqdm
import
tqdm
from
operator
import
itemgetter
```
%% Cell type:code id: tags:
```
python
import
yaml
with
open
(
'../data_code/config.yaml'
)
as
f
:
config
=
yaml
.
safe_load
(
f
)
data_path
=
config
[
'data_path'
]
```
%% Cell type:code id: tags:
```
python
icu
=
pd
.
read_csv
(
data_path
+
'prep/icustays_MV.csv'
)
chart
=
pd
.
read_pickle
(
data_path
+
'prep/chartevents.p'
)
```
%% Cell type:code id: tags:
```
python
chart
[
'ITEMID'
]
=
chart
[
'ITEMID'
].
replace
({
'223900'
:
'GCS verbal'
,
'223901'
:
'GCS motor'
,
'220739'
:
'GCS eye'
,
'226732'
:
'O2 delivery device'
,
})
```
%% Cell type:code id: tags:
```
python
NEWS_components
=
[
'HR'
,
'RR'
,
'SysBP'
,
'Temperature'
,
'SpO2'
,
'GCS verbal'
,
'GCS motor'
,
'GCS eye'
,
'O2 delivery device'
,
]
```
%% Cell type:code id: tags:
```
python
items
=
chart
[
chart
[
"ITEMID"
].
isin
(
NEWS_components
)].
copy
().
reset_index
(
drop
=
True
)
```
%% Cell type:code id: tags:
```
python
len
(
items
)
```
%% Output
13807467
%% Cell type:markdown id: tags:
# Convert GCS to AVPU (15: A, 1-14: VPU)
|
*
| Eye Opening | Eye Opening | Eye Opening | Eye Opening | Verbal | Verbal | Verbal | Verbal | Verbal | Motor | Motor | Motor | Motor | Motor | Motor |
|----------|-------------|-------------|-------------|-------------|----------|----------|----------------|------------------|--------|---------|------------|------------|---------|-----------|-------|
| Response | Spontaneous | To Speech | To Pain | None | Oriented | Confused | Inapproapriate | Incomprehensible | None | Obeying | Localizing | Withdrawal | Flexing | Extending | None |
| Score | 4 | 3 | 2 | 1 | 5 | 4 | 5 | 2 | 1 | 6 | 5 | 4 | 3 | 2 | 1 |
%% Cell type:code id: tags:
```
python
verbal_mask
=
(
items
[
'ITEMID'
]
==
'GCS verbal'
)
items
.
loc
[
verbal_mask
,
'VALUE'
]
=
items
.
loc
[
verbal_mask
,
'VALUE'
].
map
({
"No Response-ETT"
:
1
,
"No Response"
:
1
,
"Incomprehensible sounds"
:
2
,
"Inappropriate Words"
:
3
,
"Confused"
:
4
,
"Oriented"
:
5
,
})
motor_mask
=
(
items
[
'ITEMID'
]
==
'GCS motor'
)
items
.
loc
[
motor_mask
,
'VALUE'
]
=
items
.
loc
[
motor_mask
,
'VALUE'
].
map
({
"No response"
:
1
,
"Abnormal extension"
:
2
,
"Abnormal Flexion"
:
3
,
"Flex-withdraws"
:
4
,
"Localizes Pain"
:
5
,
"Obeys Commands"
:
6
,
})
eye_mask
=
(
items
[
'ITEMID'
]
==
'GCS eye'
)
items
.
loc
[
eye_mask
,
'VALUE'
]
=
items
.
loc
[
eye_mask
,
'VALUE'
].
map
({
"None"
:
1
,
"To Pain"
:
2
,
"To Speech"
:
3
,
"Spontaneously"
:
4
,
})
oxygen_mask
=
(
items
[
'ITEMID'
]
==
'O2 delivery device'
)
items
.
loc
[
oxygen_mask
,
'VALUE'
]
=
items
.
loc
[
oxygen_mask
,
'VALUE'
].
map
({
'Nasal cannula'
:
1
,
'Aerosol-cool'
:
1
,
'Bipap mask '
:
1
,
'None'
:
0
,
'CPAP mask '
:
1
,
'Endotracheal tube'
:
1
,
'Face tent'
:
1
,
'Medium conc mask '
:
1
,
'Other'
:
1
,
'Venti mask '
:
1
,
'Non-rebreather'
:
1
,
'Tracheostomy tube'
:
1
,
'Trach mask '
:
1
,
'High flow neb'
:
1
,
'High flow nasal cannula'
:
1
,
'T-piece'
:
1
,
'Vapomist'
:
1
,
'Ultrasonic neb'
:
1
,
})
```
%% Cell type:code id: tags:
```
python
assert
set
(
items
[
items
[
'ITEMID'
]
==
"O2 delivery device"
][
'VALUE'
].
unique
())
==
set
([
0
,
1
])
```
%% Cell type:code id: tags:
```
python
items
.
head
(
10
)
```
%% Output
ICUSTAY_ID t ITEMID VALUE
0 200001 -0.003333 HR 115
1 200001 0.013333 RR 22
2 200001 0.030000 SysBP 113
3 200001 0.130000 SpO2 94
4 200001 0.163333 RR 26
5 200001 0.163333 SpO2 95
6 200001 0.163333 HR 114
7 200001 0.163333 Temperature 37.2778
8 200001 0.196667 O2 delivery device 1
9 200001 0.213333 GCS eye 4
%% Cell type:markdown id: tags:
# Map values to bins & Aggregate scores
<img
src=
"NEWS_table.jpg"
>
%% Cell type:code id: tags:
```
python
HR_bins
=
[
41
,
51
,
91
,
111
,
131
]
RR_bins
=
[
9
,
12
,
21
,
25
]
SpO2_bins
=
[
92
,
94
,
96
]
Temp_bins
=
[
35.1
,
36.1
,
38.1
,
39.1
]
SBP_bins
=
[
91
,
101
,
111
,
220
]
HR_dict
=
{
0
:
3
,
1
:
1
,
2
:
0
,
3
:
1
,
4
:
2
,
5
:
3
}
RR_dict
=
{
0
:
3
,
1
:
1
,
2
:
0
,
3
:
2
,
4
:
3
}
SpO2_dict
=
{
0
:
3
,
1
:
2
,
2
:
1
,
3
:
0
}
Temp_dict
=
{
0
:
3
,
1
:
1
,
2
:
0
,
3
:
1
,
4
:
2
}
SBP_dict
=
{
0
:
3
,
1
:
2
,
2
:
1
,
3
:
0
,
4
:
3
}
```
%% Cell type:code id: tags:
```
python
# test binning
assert
SpO2_dict
[
np
.
digitize
(
90
,
SpO2_bins
)]
==
3
assert
HR_dict
[
np
.
digitize
(
80
,
HR_bins
)]
==
0
```
%% Cell type:code id: tags:
```
python
def
calculate_score
(
row
):
gcs_score
=
int
(
row
[
'GCS motor'
])
+
int
(
row
[
'GCS verbal'
])
+
int
(
row
[
'GCS eye'
])
score
=
\
HR_dict
[
np
.
digitize
(
float
(
row
[
'HR'
]),
HR_bins
)]
+
\
RR_dict
[
np
.
digitize
(
float
(
row
[
'RR'
]),
RR_bins
)]
+
\
SBP_dict
[
np
.
digitize
(
float
(
row
[
'SysBP'
]),
SBP_bins
)]
+
\
Temp_dict
[
np
.
digitize
(
float
(
row
[
'Temperature'
]),
Temp_bins
)]
+
\
SpO2_dict
[
np
.
digitize
(
float
(
row
[
'SpO2'
]),
SpO2_bins
)]
+
\
2
*
int
(
int
(
row
[
"O2 delivery device"
])
==
1
)
+
\
3
*
int
(
gcs_score
<
15
)
return
score
```
%% Cell type:code id: tags:
```
python
IDs_test
=
list
(
icu
[
icu
[
'partition'
]
==
'test'
][
"ICUSTAY_ID"
])
```
%% Cell type:code id: tags:
```
python
for
T
in
[
4
,
12
,
48
]:
score_dict
=
{}
for
ID
in
tqdm
(
IDs_test
):
try
:
df
=
items
[
items
[
'ICUSTAY_ID'
]
==
ID
].
sort_values
(
by
=
't'
)