Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mi_counseling
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shensq
mi_counseling
Commits
280fbb04
Commit
280fbb04
authored
4 years ago
by
DeepLearning VM
Browse files
Options
Downloads
Patches
Plain Diff
server running code after kbert full implementation
parent
c5662789
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
code/gpt_loader/load_data.py
+27
-14
27 additions, 14 deletions
code/gpt_loader/load_data.py
code/gpt_sample.py
+47
-23
47 additions, 23 deletions
code/gpt_sample.py
code/gpt_tuning.py
+29
-19
29 additions, 19 deletions
code/gpt_tuning.py
code/run_compare_aug.sh
+17
-10
17 additions, 10 deletions
code/run_compare_aug.sh
with
120 additions
and
66 deletions
code/gpt_loader/load_data.py
+
27
−
14
View file @
280fbb04
...
...
@@ -543,14 +543,19 @@ class GptDataset_KBERT(Dataset):
pickle_handler
=
open
(
"
../data_processed/data_comet_dict
"
,
'
rb
'
)
self
.
data
=
pickle
.
load
(
pickle_handler
)
self
.
max_length
=
510
self
.
tokenizer
=
tokenizer
self
.
args
=
args
self
.
num_turns
=
args
.
num_turns
self
.
ref
,
self
.
speaker1
,
self
.
speaker2
=
tokenizer
.
ref
,
tokenizer
.
speaker1
,
tokenizer
.
speaker2
self
.
eos
=
tokenizer
.
eos
self
.
augment
=
tokenizer
.
augment
# self.args.kbert_mask = True
# self.args.kbert_position = True
if
not
self
.
args
.
kbert
:
self
.
args
.
kbert_mask
=
False
self
.
args
.
kbert_position
=
False
print
(
"
Not using kbert scheme.
"
)
if
self
.
args
.
kbert_mask
:
print
(
"
using kbert-style attention mask
"
)
if
self
.
args
.
kbert_position
:
...
...
@@ -613,13 +618,14 @@ class GptDataset_KBERT(Dataset):
last_related_token_index
=
len
(
srl_mask
[
i
])
-
1
-
srl_mask
[
i
][::
-
1
].
index
(
1
)
# add comet output
if
comet_encoded
[
i
]
is
not
None
:
x
+=
[
self
.
augment
]
+
comet_encoded
[
i
]
type_x
+=
[
self
.
augment
]
*
(
len
(
comet_encoded
[
i
])
+
1
)
if
self
.
args
.
kbert
:
if
comet_encoded
[
i
]
is
not
None
:
x
+=
[
self
.
augment
]
+
comet_encoded
[
i
]
type_x
+=
[
self
.
augment
]
*
(
len
(
comet_encoded
[
i
])
+
1
)
# +2 for the special token and the requirement of one-number larger than the utterance
soft_position_x
+=
list
(
range
(
soft_loc
+
2
+
last_related_token_index
,
soft_loc
+
2
+
last_related_token_index
+
(
len
(
comet_encoded
[
i
])
+
1
)))
# +2 for the special token and the requirement of one-number larger than the utterance
soft_position_x
+=
list
(
range
(
soft_loc
+
2
+
last_related_token_index
,
soft_loc
+
2
+
last_related_token_index
+
(
len
(
comet_encoded
[
i
])
+
1
)))
soft_loc
+=
(
len
(
context_encoded
[
i
])
+
1
)
is_speaker1
=
not
is_speaker1
...
...
@@ -634,7 +640,13 @@ class GptDataset_KBERT(Dataset):
lm_x
+=
[
-
100
]
+
response_encoded
+
[
self
.
eos
]
soft_position_x
+=
list
(
range
(
soft_loc
,
soft_loc
+
len
(
response_encoded
)
+
2
))
x
=
x
[:
self
.
max_length
]
type_x
=
type_x
[:
self
.
max_length
]
lm_x
=
lm_x
[:
self
.
max_length
]
soft_position_x
=
soft_position_x
[:
self
.
max_length
]
# build attention mask
attention_mask
=
torch
.
tril
(
torch
.
ones
(
len
(
x
),
len
(
x
)))
if
self
.
args
.
kbert_mask
:
...
...
@@ -691,11 +703,12 @@ def get_data(args, tokenizer, split_size):
pickle_handler
=
open
(
'
../data_processed/
'
+
args
.
special_input
,
'
rb
'
)
x_y_meta
=
pickle
.
load
(
pickle_handler
)
gpt_data
=
GptDataset
(
x_y_meta
,
tokenizer
,
args
.
output_dir
,
num_turns
=
args
.
num_turns
)
elif
not
args
.
kbert
:
print
(
"
Using full data.
"
)
pickle_handler
=
open
(
'
../data_processed/x_y_with_comet
'
,
'
rb
'
)
# TODO: change back to the old data.
x_y_meta
=
pickle
.
load
(
pickle_handler
)
gpt_data
=
GptDataset_full
(
x_y_meta
,
tokenizer
,
args
=
args
)
# #======================origin without kbert======
# elif not args.kbert:
# print("Using full data.")
# pickle_handler = open('../data_processed/x_y_with_comet', 'rb') # TODO: change back to the old data.
# x_y_meta = pickle.load(pickle_handler)
# gpt_data = GptDataset_full(x_y_meta, tokenizer, args=args)
else
:
print
(
"
Using KBERT data
"
)
gpt_data
=
GptDataset_KBERT
(
tokenizer
,
args
=
args
)
...
...
This diff is collapsed.
Click to expand it.
code/gpt_sample.py
+
47
−
23
View file @
280fbb04
...
...
@@ -92,7 +92,7 @@ def sample_sequence(model, length, context, num_samples=1, temperature=1,
if
torch
.
cuda
.
is_available
():
output_attention_mask
=
output_attention_mask
.
cuda
()
with
torch
.
no_grad
():
for
i
in
t
range
(
length
):
for
i
in
range
(
length
):
# inputs = {'input_ids': generated, 'past': None, 'key_word': key_word, 'use_keyword':use_keyword}
current_length
=
generated
.
shape
[
-
1
]
if
args
.
kbert
:
...
...
@@ -144,22 +144,17 @@ def run_model(args, model, tokenizer, test_loader):
hyp
=
[]
ref
=
[]
context
=
[]
f
=
open
(
'
../result/
'
+
args
.
output_dir
+
'
.txt
'
,
'
w
'
)
f_ref
=
open
(
'
../result/reference_
'
+
args
.
output_dir
+
'
.txt
'
,
'
w
'
)
for
i
,
sample
in
enumerate
(
test_loader
):
# if args.cross_attention:
# x, type_x, pos_x, lm_x, x_len, meta, keyword_x = sample
# else:
# x, type_x, pos_x, lm_x, x_len, meta = sample
# keyword_x = None
# f = open('../result/'+args.output_dir+'.txt','w')
# f_ref = open('../result/reference_'+args.output_dir+'.txt','w')
for
sample
in
tqdm
(
test_loader
):
x
,
type_x
,
pos_x
,
lm_x
,
x_len
,
attention_mask
=
sample
input_len
=
x_len
[
0
]
# The number of tokens of the context utterances
context_tokens
=
x
[
0
][:
input_len
+
1
]
# at evaluation stage, the input is without the ground truth
generated
=
0
for
i
in
range
(
args
.
nsamples
//
args
.
batch_size
):
decode_length
=
int
(
len
(
context_tokens
))
decode_length
=
min
(
int
(
0.5
*
len
(
context_tokens
))
,
192
)
# if args.augment:
# decode_length = int(0.5 * (5/6) * len(context_tokens))
out
=
sample_sequence
(
...
...
@@ -171,18 +166,31 @@ def run_model(args, model, tokenizer, test_loader):
out
=
out
[:,
len
(
context_tokens
):
-
1
].
tolist
()
# the generated result,get rid of eos
ref
.
append
(
tokenizer
.
decode
(
x
[
0
].
tolist
()[
len
(
context_tokens
):
-
1
]))
f_ref
.
write
(
tokenizer
.
decode
(
x
[
0
].
tolist
()[
len
(
context_tokens
):
-
1
]))
f_ref
.
write
(
'
\n
'
)
#
f_ref.write(tokenizer.decode(x[0].tolist()[len(context_tokens):-1]))
#
f_ref.write('\n')
hyp
.
append
(
tokenizer
.
decode
(
out
[
0
]))
f
.
write
(
tokenizer
.
decode
(
out
[
0
]))
f
.
write
(
'
\n
'
)
#
f.write(tokenizer.decode(out[0]))
#
f.write('\n')
context
.
append
(
tokenizer
.
decode
(
x
[
0
].
tolist
()[:
len
(
context_tokens
)]))
f
.
close
()
f_ref
.
close
()
#
f.close()
#
f_ref.close()
return
hyp
,
ref
,
context
def
print_metric
(
hyp
,
ref
,
context
,
effective_length
=
1024
):
# ===== Calculate rouge ========
rouge
=
Rouge
()
print
(
len
(
hyp
))
print
(
len
(
ref
))
hyp
,
ref
=
zip
(
*
[(
x
,
y
)
for
x
,
y
in
zip
(
hyp
,
ref
)
if
len
(
x
)
>
3
and
len
(
y
)
>
3
])
print
(
len
(
hyp
))
hyp
=
[
x
[:
effective_length
]
for
x
in
hyp
]
ref
=
[
x
[:
effective_length
]
for
x
in
ref
]
scores
=
rouge
.
get_scores
(
hyp
,
ref
,
avg
=
True
)
print
(
"
ROUGE
"
,
scores
)
def
calculate_metric
(
hyp
,
ref
,
context
,
effective_length
=
1024
):
# ===== Calculate rouge ========
with
open
(
'
../result/rouge.txt
'
,
'
a
'
)
as
f_result
:
...
...
@@ -222,6 +230,12 @@ def rouge_rank(hyp, ref, context):
scores_content
=
sorted
(
scores_content
,
key
=
lambda
x
:
x
[
0
][
'
rouge-1
'
][
'
f
'
],
reverse
=
True
)
return
scores_content
def
set_seed
(
seed
):
np
.
random
.
seed
(
seed
)
torch
.
random
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed
(
seed
)
torch
.
manual_seed
(
seed
)
if
__name__
==
'
__main__
'
:
USE_CUDA
=
torch
.
cuda
.
is_available
()
logging
.
basicConfig
(
format
=
'
%(asctime)s - %(levelname)s - %(name)s - %(message)s
'
,
...
...
@@ -256,17 +270,27 @@ if __name__ == '__main__':
print
(
args
)
# Setup the random seeds.
np
.
random
.
seed
(
args
.
seed
)
torch
.
random
.
manual_seed
(
args
.
seed
)
torch
.
cuda
.
manual_seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
set_seed
(
args
.
seed
)
model
,
tokenizer
=
load_model_data
(
args
)
split_size
=
{
'
train
'
:
0.90
,
'
test
'
:
0.05
,
'
val
'
:
0.05
}
data_loader
,
test_loader
,
val_loader
=
get_data
(
args
,
split_size
=
split_size
,
tokenizer
=
tokenizer
)
# model, tokenizer, test_loader = load_model_data(args) # TODO: this is for old get_data
# import pdb;pdb.set_trace()
hyp
,
ref
,
context
=
run_model
(
args
,
model
,
tokenizer
,
test_loader
)
# seed_list = [0,10,]
seed_list
=
[
20
,
30
]
# seed_list = [0,]
hyp_all
=
[]
ref_all
=
[]
context_all
=
[]
for
seed
in
seed_list
:
set_seed
(
seed
)
print
(
"
Using random seed {}
"
.
format
(
seed
))
hyp
,
ref
,
context
=
run_model
(
args
,
model
,
tokenizer
,
test_loader
)
hyp_all
+=
hyp
ref_all
+=
ref
context_all
+=
context
sample_ranked
=
rouge_rank
(
hyp
,
ref
,
context
)
with
open
(
"
../data_processed/rouge_rank_
"
+
args
.
model_dir
,
'
wb
'
)
as
f
:
pickle
.
dump
(
sample_ranked
,
f
)
...
...
This diff is collapsed.
Click to expand it.
code/gpt_tuning.py
+
29
−
19
View file @
280fbb04
...
...
@@ -17,7 +17,7 @@ from tqdm import tqdm, trange
import
random
from
utils
import
clean_text
,
text_standardize
,
construct_grouped_parameters
,
get_unfreezing_funcs
from
gpt_loader
import
GptDataset
,
collate_fn
,
collate_fn_keyword
,
prepare_mix_review
,
update_mix_review
,
get_data
import
gpt_sample
# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import
logging
...
...
@@ -60,7 +60,7 @@ def parse_arguments():
help
=
"
The output directory where the model predictions and checkpoints will be written.
"
)
parser
.
add_argument
(
'
--seed
'
,
type
=
int
,
default
=
42
)
parser
.
add_argument
(
'
--num_train_epochs
'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'
--train_batch_size
'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'
--train_batch_size
'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'
--max_grad_norm
'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'
--learning_rate
'
,
type
=
float
,
default
=
6.25e-5
)
parser
.
add_argument
(
'
--warmup_proportion
'
,
type
=
float
,
default
=
0.1
)
...
...
@@ -79,6 +79,7 @@ def parse_arguments():
parser
.
add_argument
(
'
--kbert
'
,
action
=
'
store_true
'
)
parser
.
add_argument
(
'
--kbert_mask
'
,
action
=
'
store_true
'
)
parser
.
add_argument
(
'
--kbert_position
'
,
action
=
'
store_true
'
)
parser
.
add_argument
(
'
--eval_rouge
'
,
action
=
'
store_true
'
)
args
=
parser
.
parse_args
()
print
(
args
)
return
args
...
...
@@ -94,12 +95,13 @@ def load_model(args):
# ====== Load GPT2 model ========
model_dir
=
'
../models/
'
+
args
.
model_dir
# model = GPT2LMHeadModel.from_pretrained(model_dir)
model
=
GPT2LMHeadModel
.
from_pretrained
(
'
gpt2
'
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
'
gpt2-medium
'
)
# model = GPT2LMHeadModel.from_pretrained('gpt2')
if
USE_CUDA
:
model
.
cuda
()
# tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
'
gpt2
'
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
'
gpt2
-medium
'
)
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
num_added_toks
=
tokenizer
.
add_tokens
([
'
<speaker1>
'
,
'
<speaker2>
'
,
'
<augment>
'
,
'
<ref>
'
])
model
.
resize_token_embeddings
(
len
(
tokenizer
))
tokenizer
.
eos
=
50256
...
...
@@ -151,26 +153,19 @@ def main():
model
.
train
()
exp_average_loss
=
None
progress_bar
=
trange
(
int
(
args
.
num_train_epochs
),
desc
=
"
Epoch
"
,
leave
=
True
)
min
_eval_loss
=
100
# large enough number
prev
_eval_loss
=
100
# large enough number
early_terminate_counter
=
0
for
epo
in
progress_bar
:
# for _ in range(int(args.num_train_epochs)):
# data_loader = update_mix_review(gpt_train, gpt_alex, epo, mix_ratio=4, mix_decay=0.7)
for
sample
in
tqdm
(
data_loader
):
# for sample in data_loader:
# import pdb;pdb.set_trace()
# if args.cross_attention:
# x, type_x, pos_x, lm_x, x_len, _, keyword_x = sample
# else:
# x, type_x, pos_x, lm_x, x_len, _ = sample
# keyword_x = None
x
,
type_x
,
pos_x
,
lm_x
,
x_len
,
attention_mask
=
sample
if
not
args
.
kbert
:
attention_mask
=
None
input_len
=
x_len
[
0
]
lm_x
[:,
x_len
[
0
]
+
1
+
args
.
first_K_tokens
:
-
1
]
=
-
1
# loss = model(x, position_ids=pos_x, token_type_ids=type_x, labels=lm_x, key_word=keyword_x,
# use_keyword=args.cross_attention)[0]
loss
=
model
(
x
,
position_ids
=
pos_x
,
token_type_ids
=
type_x
,
labels
=
lm_x
,
attention_mask
=
attention_mask
)[
0
]
loss
.
backward
()
optimizer
.
step
()
...
...
@@ -181,10 +176,12 @@ def main():
eval_loss
=
evaluate
(
model
,
val_loader
,
use_keyword
=
args
.
cross_attention
)
print
(
"
Eval loss: {}
"
.
format
(
eval_loss
))
if
eval_loss
<
min_eval_loss
:
# save the model only when the loss is the smallest
if
eval_loss
<
prev_eval_loss
:
# save the model only when the loss is the smallest
#if True:
early_terminate_counter
=
0
min
_eval_loss
=
eval_loss
prev
_eval_loss
=
eval_loss
# # ==== Save the model ====
# # Save a trained model, configuration and tokenizer
...
...
@@ -200,10 +197,23 @@ def main():
model
.
save_pretrained
(
output_dir
+
args
.
output_dir
)
tokenizer
.
save_pretrained
(
output_dir
+
args
.
output_dir
)
else
:
prev_eval_loss
=
eval_loss
print
(
"
eval loss increasing!
"
)
early_terminate_counter
+=
1
if
early_terminate_counter
>
3
:
# if the eval loss does not decrease for 5 epochs, terminate early.
if
early_terminate_counter
>=
2
:
# if the eval loss does not decrease for 5 epochs, terminate early.
print
(
'
=
'
*
30
+
str
(
epo
)
+
'
=
'
*
30
)
return
if
args
.
eval_rouge
:
args
.
nsamples
=
1
args
.
length
=
-
1
args
.
batch_size
=
1
args
.
temperature
=
1.0
args
.
top_k
=
0
args
.
top_p
=
0.95
hyp
,
ref
,
context
=
gpt_sample
.
run_model
(
args
,
model
,
tokenizer
,
val_loader
)
gpt_sample
.
print_metric
(
hyp
,
ref
,
context
)
model
.
train
()
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
code/run_compare_aug.sh
+
17
−
10
View file @
280fbb04
#!/bin/bash
pwd
NUM_EPOCHS
=
5
NUM_EPOCHS
=
10
NUM_TURNS
=
5
MODEL_PATH
=
"kbert
"
MODEL_PATH
=
"
no_
kbert
_"
${
NUM_EPOCHS
}
mkdir
-p
../models/
${
MODEL_PATH
}
python gpt_tuning.py
--output_dir
${
MODEL_PATH
}
--num_train_epochs
${
NUM_EPOCHS
}
--num_turns
${
NUM_TURNS
}
--kbert
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
--kbert
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS}
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
MODEL_PATH
=
"kbert_mask_position_"
${
NUM_EPOCHS
}
mkdir
-p
../models/
${
MODEL_PATH
}
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position --kbert_mask --eval_rouge
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position --kbert_mask
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
--kbert
--kbert_position
--kbert_mask
MODEL_PATH
=
"kbert_position
"
MODEL_PATH
=
"kbert_position
_"
${
NUM_EPOCHS
}
mkdir
-p
../models/
${
MODEL_PATH
}
python gpt_tuning.py
--output_dir
${
MODEL_PATH
}
--num_train_epochs
${
NUM_EPOCHS
}
--num_turns
${
NUM_TURNS
}
--kbert
--kbert_position
#
python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
--kbert
--kbert_position
MODEL_PATH
=
"kbert_
mask_position"
MODEL_PATH
=
"kbert_
"
${
NUM_EPOCHS
}
mkdir
-p
../models/
${
MODEL_PATH
}
python gpt_tuning.py
--output_dir
${
MODEL_PATH
}
--num_train_epochs
${
NUM_EPOCHS
}
--num_turns
${
NUM_TURNS
}
--kbert
--kbert_position
--kbert_mask
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
--kbert
--kbert_position
--kbert_mask
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert
python gpt_sample.py
--model_dir
${
MODEL_PATH
}
--output_dir
${
MODEL_PATH
}
--num_turns
${
NUM_TURNS
}
--top_p
0.95
--kbert
#
echo "Finished."
echo
"Finished."
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment