Skip to content
Snippets Groups Projects
Commit 280fbb04 authored by DeepLearning VM's avatar DeepLearning VM
Browse files

server running code after kbert full implementation

parent c5662789
No related branches found
No related tags found
No related merge requests found
......@@ -543,14 +543,19 @@ class GptDataset_KBERT(Dataset):
pickle_handler = open("../data_processed/data_comet_dict", 'rb')
self.data = pickle.load(pickle_handler)
self.max_length = 510
self.tokenizer = tokenizer
self.args = args
self.num_turns = args.num_turns
self.ref, self.speaker1, self.speaker2 = tokenizer.ref, tokenizer.speaker1, tokenizer.speaker2
self.eos = tokenizer.eos
self.augment = tokenizer.augment
# self.args.kbert_mask = True
# self.args.kbert_position = True
if not self.args.kbert:
self.args.kbert_mask = False
self.args.kbert_position = False
print("Not using kbert scheme.")
if self.args.kbert_mask:
print("using kbert-style attention mask")
if self.args.kbert_position:
......@@ -613,13 +618,14 @@ class GptDataset_KBERT(Dataset):
last_related_token_index = len(srl_mask[i]) - 1 - srl_mask[i][::-1].index(1)
# add comet output
if comet_encoded[i] is not None:
x += [self.augment] + comet_encoded[i]
type_x += [self.augment] * (len(comet_encoded[i]) + 1)
if self.args.kbert:
if comet_encoded[i] is not None:
x += [self.augment] + comet_encoded[i]
type_x += [self.augment] * (len(comet_encoded[i]) + 1)
# +2 for the special token and the requirement of one-number larger than the utterance
soft_position_x += list(range(soft_loc + 2 + last_related_token_index,
soft_loc + 2 + last_related_token_index + (len(comet_encoded[i]) + 1)))
# +2 for the special token and the requirement of one-number larger than the utterance
soft_position_x += list(range(soft_loc + 2 + last_related_token_index,
soft_loc + 2 + last_related_token_index + (len(comet_encoded[i]) + 1)))
soft_loc += (len(context_encoded[i]) + 1)
is_speaker1 = not is_speaker1
......@@ -634,7 +640,13 @@ class GptDataset_KBERT(Dataset):
lm_x += [-100] + response_encoded + [self.eos]
soft_position_x += list(range(soft_loc, soft_loc + len(response_encoded) + 2))
x = x[:self.max_length]
type_x = type_x[:self.max_length]
lm_x = lm_x[:self.max_length]
soft_position_x = soft_position_x[:self.max_length]
# build attention mask
attention_mask = torch.tril(torch.ones(len(x), len(x)))
if self.args.kbert_mask:
......@@ -691,11 +703,12 @@ def get_data(args, tokenizer, split_size):
pickle_handler = open('../data_processed/' + args.special_input, 'rb')
x_y_meta = pickle.load(pickle_handler)
gpt_data = GptDataset(x_y_meta, tokenizer, args.output_dir, num_turns=args.num_turns)
elif not args.kbert:
print("Using full data.")
pickle_handler = open('../data_processed/x_y_with_comet', 'rb') # TODO: change back to the old data.
x_y_meta = pickle.load(pickle_handler)
gpt_data = GptDataset_full(x_y_meta, tokenizer, args=args)
# #======================origin without kbert======
# elif not args.kbert:
# print("Using full data.")
# pickle_handler = open('../data_processed/x_y_with_comet', 'rb') # TODO: change back to the old data.
# x_y_meta = pickle.load(pickle_handler)
# gpt_data = GptDataset_full(x_y_meta, tokenizer, args=args)
else:
print("Using KBERT data")
gpt_data = GptDataset_KBERT(tokenizer, args=args)
......
......@@ -92,7 +92,7 @@ def sample_sequence(model, length, context, num_samples=1, temperature=1,
if torch.cuda.is_available():
output_attention_mask = output_attention_mask.cuda()
with torch.no_grad():
for i in trange(length):
for i in range(length):
# inputs = {'input_ids': generated, 'past': None, 'key_word': key_word, 'use_keyword':use_keyword}
current_length = generated.shape[-1]
if args.kbert:
......@@ -144,22 +144,17 @@ def run_model(args, model, tokenizer, test_loader):
hyp = []
ref = []
context = []
f = open('../result/'+args.output_dir+'.txt','w')
f_ref = open('../result/reference_'+args.output_dir+'.txt','w')
for i,sample in enumerate(test_loader):
# if args.cross_attention:
# x, type_x, pos_x, lm_x, x_len, meta, keyword_x = sample
# else:
# x, type_x, pos_x, lm_x, x_len, meta = sample
# keyword_x = None
# f = open('../result/'+args.output_dir+'.txt','w')
# f_ref = open('../result/reference_'+args.output_dir+'.txt','w')
for sample in tqdm(test_loader):
x, type_x, pos_x, lm_x, x_len, attention_mask = sample
input_len = x_len[0] # The number of tokens of the context utterances
context_tokens = x[0][:input_len+1] # at evaluation stage, the input is without the ground truth
generated = 0
for i in range(args.nsamples // args.batch_size):
decode_length = int(len(context_tokens))
decode_length = min(int(0.5 * len(context_tokens)),192)
# if args.augment:
# decode_length = int(0.5 * (5/6) * len(context_tokens))
out = sample_sequence(
......@@ -171,18 +166,31 @@ def run_model(args, model, tokenizer, test_loader):
out = out[:, len(context_tokens):-1].tolist() # the generated result,get rid of eos
ref.append(tokenizer.decode(x[0].tolist()[len(context_tokens):-1]))
f_ref.write(tokenizer.decode(x[0].tolist()[len(context_tokens):-1]))
f_ref.write('\n')
# f_ref.write(tokenizer.decode(x[0].tolist()[len(context_tokens):-1]))
# f_ref.write('\n')
hyp.append(tokenizer.decode(out[0]))
f.write(tokenizer.decode(out[0]))
f.write('\n')
# f.write(tokenizer.decode(out[0]))
# f.write('\n')
context.append(tokenizer.decode(x[0].tolist()[:len(context_tokens)]))
f.close()
f_ref.close()
# f.close()
# f_ref.close()
return hyp, ref, context
def print_metric(hyp, ref, context, effective_length=1024):
# ===== Calculate rouge ========
rouge = Rouge()
print(len(hyp))
print(len(ref))
hyp, ref = zip(*[(x,y) for x,y in zip(hyp, ref) if len(x)>3 and len(y)>3])
print(len(hyp))
hyp = [x[:effective_length] for x in hyp]
ref = [x[:effective_length] for x in ref]
scores = rouge.get_scores(hyp, ref,avg=True)
print("ROUGE",scores)
def calculate_metric(hyp, ref, context, effective_length=1024):
# ===== Calculate rouge ========
with open('../result/rouge.txt','a') as f_result:
......@@ -222,6 +230,12 @@ def rouge_rank(hyp, ref, context):
scores_content = sorted(scores_content, key=lambda x:x[0]['rouge-1']['f'], reverse=True)
return scores_content
def set_seed(seed):
np.random.seed(seed)
torch.random.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.manual_seed(seed)
if __name__ == '__main__':
USE_CUDA = torch.cuda.is_available()
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
......@@ -256,17 +270,27 @@ if __name__ == '__main__':
print(args)
# Setup the random seeds.
np.random.seed(args.seed)
torch.random.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.manual_seed(args.seed)
set_seed(args.seed)
model, tokenizer = load_model_data(args)
split_size = {'train': 0.90, 'test': 0.05, 'val': 0.05}
data_loader, test_loader, val_loader = get_data(args, split_size=split_size, tokenizer=tokenizer)
# model, tokenizer, test_loader = load_model_data(args) # TODO: this is for old get_data
# import pdb;pdb.set_trace()
hyp, ref, context = run_model(args, model, tokenizer, test_loader)
# seed_list = [0,10,]
seed_list = [20,30]
# seed_list = [0,]
hyp_all = []
ref_all = []
context_all = []
for seed in seed_list:
set_seed(seed)
print("Using random seed {}".format(seed))
hyp, ref, context = run_model(args, model, tokenizer, test_loader)
hyp_all += hyp
ref_all += ref
context_all += context
sample_ranked = rouge_rank(hyp, ref, context)
with open("../data_processed/rouge_rank_" + args.model_dir,'wb') as f:
pickle.dump(sample_ranked, f)
......
......@@ -17,7 +17,7 @@ from tqdm import tqdm, trange
import random
from utils import clean_text, text_standardize, construct_grouped_parameters, get_unfreezing_funcs
from gpt_loader import GptDataset, collate_fn,collate_fn_keyword, prepare_mix_review, update_mix_review, get_data
import gpt_sample
# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
......@@ -60,7 +60,7 @@ def parse_arguments():
help="The output directory where the model predictions and checkpoints will be written.")
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--num_train_epochs', type=int, default=1)
parser.add_argument('--train_batch_size', type=int, default=2)
parser.add_argument('--train_batch_size', type=int, default=1)
parser.add_argument('--max_grad_norm', type=int, default=1)
parser.add_argument('--learning_rate', type=float, default=6.25e-5)
parser.add_argument('--warmup_proportion', type=float, default=0.1)
......@@ -79,6 +79,7 @@ def parse_arguments():
parser.add_argument('--kbert', action='store_true')
parser.add_argument('--kbert_mask', action='store_true')
parser.add_argument('--kbert_position', action='store_true')
parser.add_argument('--eval_rouge', action='store_true')
args = parser.parse_args()
print(args)
return args
......@@ -94,12 +95,13 @@ def load_model(args):
# ====== Load GPT2 model ========
model_dir = '../models/' + args.model_dir
# model = GPT2LMHeadModel.from_pretrained(model_dir)
model = GPT2LMHeadModel.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
# model = GPT2LMHeadModel.from_pretrained('gpt2')
if USE_CUDA:
model.cuda()
# tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
num_added_toks = tokenizer.add_tokens(['<speaker1>', '<speaker2>', '<augment>', '<ref>'])
model.resize_token_embeddings(len(tokenizer))
tokenizer.eos = 50256
......@@ -151,26 +153,19 @@ def main():
model.train()
exp_average_loss = None
progress_bar = trange(int(args.num_train_epochs), desc="Epoch", leave=True)
min_eval_loss = 100 # large enough number
prev_eval_loss = 100 # large enough number
early_terminate_counter = 0
for epo in progress_bar:
# for _ in range(int(args.num_train_epochs)):
# data_loader = update_mix_review(gpt_train, gpt_alex, epo, mix_ratio=4, mix_decay=0.7)
for sample in tqdm(data_loader):
# for sample in data_loader:
# import pdb;pdb.set_trace()
# if args.cross_attention:
# x, type_x, pos_x, lm_x, x_len, _, keyword_x = sample
# else:
# x, type_x, pos_x, lm_x, x_len, _ = sample
# keyword_x = None
x, type_x, pos_x, lm_x, x_len, attention_mask = sample
if not args.kbert:
attention_mask = None
input_len = x_len[0]
lm_x[:, x_len[0] + 1 + args.first_K_tokens:-1] = -1
# loss = model(x, position_ids=pos_x, token_type_ids=type_x, labels=lm_x, key_word=keyword_x,
# use_keyword=args.cross_attention)[0]
loss = model(x, position_ids=pos_x, token_type_ids=type_x, labels=lm_x, attention_mask=attention_mask)[0]
loss.backward()
optimizer.step()
......@@ -181,10 +176,12 @@ def main():
eval_loss = evaluate(model, val_loader, use_keyword=args.cross_attention)
print("Eval loss: {}".format(eval_loss))
if eval_loss < min_eval_loss: # save the model only when the loss is the smallest
if eval_loss < prev_eval_loss: # save the model only when the loss is the smallest
#if True:
early_terminate_counter = 0
min_eval_loss = eval_loss
prev_eval_loss = eval_loss
# # ==== Save the model ====
# # Save a trained model, configuration and tokenizer
......@@ -200,10 +197,23 @@ def main():
model.save_pretrained(output_dir + args.output_dir)
tokenizer.save_pretrained(output_dir + args.output_dir)
else:
prev_eval_loss = eval_loss
print("eval loss increasing!")
early_terminate_counter += 1
if early_terminate_counter > 3: # if the eval loss does not decrease for 5 epochs, terminate early.
if early_terminate_counter >= 2: # if the eval loss does not decrease for 5 epochs, terminate early.
print('='*30+str(epo)+'='*30)
return
if args.eval_rouge:
args.nsamples = 1
args.length = -1
args.batch_size = 1
args.temperature = 1.0
args.top_k = 0
args.top_p = 0.95
hyp, ref, context = gpt_sample.run_model(args, model, tokenizer, val_loader)
gpt_sample.print_metric(hyp, ref, context)
model.train()
if __name__ == '__main__':
main()
#!/bin/bash
pwd
NUM_EPOCHS=5
NUM_EPOCHS=10
NUM_TURNS=5
MODEL_PATH="kbert"
MODEL_PATH="no_kbert_"${NUM_EPOCHS}
mkdir -p ../models/${MODEL_PATH}
python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95 --kbert
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS}
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95
MODEL_PATH="kbert_mask_position_"${NUM_EPOCHS}
mkdir -p ../models/${MODEL_PATH}
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position --kbert_mask --eval_rouge
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position --kbert_mask
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95 --kbert --kbert_position --kbert_mask
MODEL_PATH="kbert_position"
MODEL_PATH="kbert_position_"${NUM_EPOCHS}
mkdir -p ../models/${MODEL_PATH}
python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95 --kbert --kbert_position
MODEL_PATH="kbert_mask_position"
MODEL_PATH="kbert_"${NUM_EPOCHS}
mkdir -p ../models/${MODEL_PATH}
python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert --kbert_position --kbert_mask
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95 --kbert --kbert_position --kbert_mask
# python gpt_tuning.py --output_dir ${MODEL_PATH} --num_train_epochs ${NUM_EPOCHS} --num_turns ${NUM_TURNS} --kbert
python gpt_sample.py --model_dir ${MODEL_PATH} --output_dir ${MODEL_PATH} --num_turns ${NUM_TURNS} --top_p 0.95 --kbert
#echo "Finished."
echo "Finished."
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment