@@ -31,9 +31,9 @@ model = model.cuda().eval()
3131
3232with torch.no_grad():
3333 # short context
34- text = " Tell me about yourself."
35- inputs = tokenizer(text, return_tensors = " pt" ).to(" cuda" )
36- outputs = model.generate(** inputs, max_new_tokens = 20 )
34+ messages = [{ " role " : " user " , " content " : " Tell me about yourself." }]
35+ inputs = tokenizer.apply_chat_template(messages, tokenize = True , add_generation_prompt = True , return_tensors = " pt" , return_dict = True ).to(" cuda" )
36+ outputs = model.generate(** inputs, max_new_tokens = 50 )
3737 print (f " Input Length: { inputs[' input_ids' ].shape[1 ]} " )
3838 print (f " Output: { tokenizer.decode(outputs[0 ], skip_special_tokens = True )} " )
3939
@@ -43,7 +43,8 @@ with torch.no_grad():
4343 # long context
4444 with open (" data/toy/infbench.json" , encoding = " utf-8" ) as f:
4545 example = json.load(f)
46- inputs = tokenizer(example[" context" ], return_tensors = " pt" ).to(" cuda" )
46+ messages = [{" role" : " user" , " content" : example[" context" ]}]
47+ inputs = tokenizer.apply_chat_template(messages, tokenize = True , add_generation_prompt = True , return_tensors = " pt" , return_dict = True ).to(" cuda" )
4748 outputs = model.generate(** inputs, do_sample = False , top_p = 1 , temperature = 1 , max_new_tokens = 20 )[:, inputs[" input_ids" ].shape[1 ]:]
4849 print (" *" * 20 )
4950 print (f " Input Length: { inputs[' input_ids' ].shape[1 ]} " )
0 commit comments