trainer
目录
基本用法
下面是使用的一个例子,重点是TrainingArg和data_collator。
= LineByLineTextDataset(tokenizer=tokenizer, file_path='./text.txt', block_size=512)
dataset
= DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=True, mlm_probability=0.15 )
data_collator
= TrainingArguments( output_dir='./outputs/',
training_args =True,
overwrite_output_dir=100,
num_train_epochs=16,
per_device_train_batch_size=5000, )
save_steps
= Trainer( model=model,
trainer =training_args,
args=data_collator,
data_collator=dataset, )
train_dataset
trainer.train()
'./outputs/')
trainer.save_model(