Commit 0aa11f54 authored by Shengpu Tang (tangsp)'s avatar Shengpu Tang (tangsp)
Browse files

NEWS baseline

parent 287854c2
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"from operator import itemgetter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"with open('../data_code/config.yaml') as f:\n",
" config = yaml.safe_load(f)\n",
" data_path = config['data_path']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"icu = pd.read_csv(data_path + 'prep/icustays_MV.csv')\n",
"chart = pd.read_pickle(data_path + 'prep/chartevents.p')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"chart['ITEMID'] = chart['ITEMID'].replace({\n",
" '223900': 'GCS verbal',\n",
" '223901': 'GCS motor',\n",
" '220739': 'GCS eye',\n",
" '226732': 'O2 delivery device',\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"NEWS_components = [\n",
" 'HR', \n",
" 'RR', \n",
" 'SysBP', \n",
" 'Temperature', \n",
" 'SpO2', \n",
" 'GCS verbal',\n",
" 'GCS motor',\n",
" 'GCS eye',\n",
" 'O2 delivery device',\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"items = chart[chart[\"ITEMID\"].isin(NEWS_components)].copy().reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13807467"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(items)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convert GCS to AVPU (15: A, 1-14: VPU)\n",
"\n",
"| * | Eye Opening | Eye Opening | Eye Opening | Eye Opening | Verbal | Verbal | Verbal | Verbal | Verbal | Motor | Motor | Motor | Motor | Motor | Motor |\n",
"|----------|-------------|-------------|-------------|-------------|----------|----------|----------------|------------------|--------|---------|------------|------------|---------|-----------|-------|\n",
"| Response | Spontaneous | To Speech | To Pain | None | Oriented | Confused | Inapproapriate | Incomprehensible | None | Obeying | Localizing | Withdrawal | Flexing | Extending | None |\n",
"| Score | 4 | 3 | 2 | 1 | 5 | 4 | 5 | 2 | 1 | 6 | 5 | 4 | 3 | 2 | 1 |\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"verbal_mask = (items['ITEMID'] == 'GCS verbal')\n",
"items.loc[verbal_mask, 'VALUE'] = items.loc[verbal_mask, 'VALUE'].map({\n",
" \"No Response-ETT\": 1,\n",
" \"No Response\": 1,\n",
" \"Incomprehensible sounds\": 2,\n",
" \"Inappropriate Words\": 3,\n",
" \"Confused\": 4,\n",
" \"Oriented\": 5,\n",
"})\n",
"\n",
"motor_mask = (items['ITEMID'] == 'GCS motor')\n",
"items.loc[motor_mask, 'VALUE'] = items.loc[motor_mask, 'VALUE'].map({\n",
" \"No response\": 1,\n",
" \"Abnormal extension\": 2,\n",
" \"Abnormal Flexion\": 3,\n",
" \"Flex-withdraws\": 4,\n",
" \"Localizes Pain\": 5,\n",
" \"Obeys Commands\": 6,\n",
"})\n",
"\n",
"\n",
"eye_mask = (items['ITEMID'] == 'GCS eye')\n",
"items.loc[eye_mask, 'VALUE'] = items.loc[eye_mask, 'VALUE'].map({\n",
" \"None\": 1,\n",
" \"To Pain\": 2,\n",
" \"To Speech\": 3,\n",
" \"Spontaneously\": 4,\n",
"})\n",
"\n",
"oxygen_mask = (items['ITEMID'] == 'O2 delivery device')\n",
"items.loc[oxygen_mask, 'VALUE'] = items.loc[oxygen_mask, 'VALUE'].map({\n",
" 'Nasal cannula': 1, \n",
" 'Aerosol-cool': 1, \n",
" 'Bipap mask ': 1, \n",
" 'None': 0,\n",
" 'CPAP mask ': 1,\n",
" 'Endotracheal tube': 1, \n",
" 'Face tent': 1,\n",
" 'Medium conc mask ': 1,\n",
" 'Other': 1, \n",
" 'Venti mask ': 1, \n",
" 'Non-rebreather': 1,\n",
" 'Tracheostomy tube': 1, \n",
" 'Trach mask ': 1, \n",
" 'High flow neb': 1,\n",
" 'High flow nasal cannula': 1, \n",
" 'T-piece': 1, \n",
" 'Vapomist': 1,\n",
" 'Ultrasonic neb': 1,\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"assert set(items[items['ITEMID'] == \"O2 delivery device\"]['VALUE'].unique()) == set([0,1])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ICUSTAY_ID</th>\n",
" <th>t</th>\n",
" <th>ITEMID</th>\n",
" <th>VALUE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>200001</td>\n",
" <td>-0.003333</td>\n",
" <td>HR</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200001</td>\n",
" <td>0.013333</td>\n",
" <td>RR</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200001</td>\n",
" <td>0.030000</td>\n",
" <td>SysBP</td>\n",
" <td>113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200001</td>\n",
" <td>0.130000</td>\n",
" <td>SpO2</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>RR</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>SpO2</td>\n",
" <td>95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>HR</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>200001</td>\n",
" <td>0.163333</td>\n",
" <td>Temperature</td>\n",
" <td>37.2778</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>200001</td>\n",
" <td>0.196667</td>\n",
" <td>O2 delivery device</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>200001</td>\n",
" <td>0.213333</td>\n",
" <td>GCS eye</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ICUSTAY_ID t ITEMID VALUE\n",
"0 200001 -0.003333 HR 115\n",
"1 200001 0.013333 RR 22\n",
"2 200001 0.030000 SysBP 113\n",
"3 200001 0.130000 SpO2 94\n",
"4 200001 0.163333 RR 26\n",
"5 200001 0.163333 SpO2 95\n",
"6 200001 0.163333 HR 114\n",
"7 200001 0.163333 Temperature 37.2778\n",
"8 200001 0.196667 O2 delivery device 1\n",
"9 200001 0.213333 GCS eye 4"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"items.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Map values to bins & Aggregate scores\n",
"<img src=\"NEWS_table.jpg\">"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"HR_bins = [41, 51, 91, 111, 131]\n",
"RR_bins = [9, 12, 21, 25]\n",
"SpO2_bins = [92, 94, 96]\n",
"Temp_bins = [35.1, 36.1, 38.1, 39.1]\n",
"SBP_bins = [91, 101, 111, 220]\n",
"\n",
"HR_dict = { 0:3, 1:1, 2:0, 3:1, 4:2, 5:3 }\n",
"RR_dict = { 0:3, 1:1, 2:0, 3:2, 4:3 }\n",
"SpO2_dict = { 0:3, 1:2, 2:1, 3:0 }\n",
"Temp_dict = { 0:3, 1:1, 2:0, 3:1, 4:2 }\n",
"SBP_dict = { 0:3, 1:2, 2:1, 3:0, 4:3 }"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# test binning\n",
"assert SpO2_dict[np.digitize(90, SpO2_bins)] == 3\n",
"assert HR_dict[np.digitize(80, HR_bins)] == 0"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def calculate_score(row):\n",
" gcs_score = int(row['GCS motor']) + int(row['GCS verbal']) + int(row['GCS eye'])\n",
" score = \\\n",
" HR_dict[np.digitize(float(row['HR']), HR_bins)] + \\\n",
" RR_dict[np.digitize(float(row['RR']), RR_bins)] + \\\n",
" SBP_dict[np.digitize(float(row['SysBP']), SBP_bins)] + \\\n",
" Temp_dict[np.digitize(float(row['Temperature']), Temp_bins)] + \\\n",
" SpO2_dict[np.digitize(float(row['SpO2']), SpO2_bins)] + \\\n",
" 2 * int(int(row[\"O2 delivery device\"]) == 1) + \\\n",
" 3 * int(gcs_score < 15)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"IDs_test = list(icu[icu['partition'] == 'test'][\"ICUSTAY_ID\"])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 3492/3492 [03:40<00:00, 15.81it/s]\n",
"100%|██████████| 3492/3492 [03:56<00:00, 15.09it/s]\n",
"100%|██████████| 3492/3492 [04:18<00:00, 13.48it/s]\n"
]
}
],
"source": [
"for T in [4, 12, 48]:\n",
" score_dict = {}\n",
" for ID in tqdm(IDs_test):\n",
" try:\n",
" df = items[items['ICUSTAY_ID'] == ID].sort_values(by='t')\n",
" df = df[df['t'] <= T]\n",
" df = df.drop_duplicates(subset=['ITEMID', 't'], keep='last')\n",
" df = df.pivot(index='t', columns='ITEMID', values='VALUE')\n",
" df = df.reindex(NEWS_components, axis=1)\n",
" df = df.ffill().dropna()\n",
" if len(df) > 0:\n",
" df['NEWS'] = df.apply(calculate_score, axis=1)\n",
" score_dict[ID] = df['NEWS'].max()\n",
" else:\n",
" score_dict[ID] = np.nan\n",
" except:\n",
" print(ID)\n",
" display(df)\n",
" break\n",
" \n",
" df_scores = pd.DataFrame(score_dict.items(), columns=['ID', 'NEWS']).sort_values(by='ID')\n",
" df_scores.to_csv(data_path + 'labels/NEWS_{}h.csv'.format(T), index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment