Advertisement
nerdemma

TechIA

Aug 31st, 2024
32
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.37 KB | None | 0 0
  1. import pandas as pd
  2. from datasets import Dataset
  3. from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
  4. import torch
  5.  
  6. # Load your data from a CSV file
  7. csv_file = "xenodm.log"
  8. df = pd.read_csv(csv_file)
  9.  
  10. # Convert the DataFrame to a Hugging Face Dataset
  11. dataset = Dataset.from_pandas(df)
  12.  
  13. # Load the tokenizer
  14. tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
  15.  
  16. # Tokenize the dataset
  17. def tokenize_function(examples):
  18. return tokenizer(examples['text'], padding='max_length', truncation=True)
  19.  
  20. tokenized_dataset = dataset.map(tokenize_function, batched=True)
  21.  
  22. # Define the model
  23. model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
  24.  
  25. # Set up training arguments
  26. training_args = TrainingArguments(
  27. output_dir='./results',
  28. num_train_epochs=3,
  29. per_device_train_batch_size=16,
  30. per_device_eval_batch_size=16,
  31. warmup_steps=500,
  32. weight_decay=0.01,
  33. logging_dir='./logs',
  34. logging_steps=10,
  35. evaluation_strategy="epoch",
  36. )
  37.  
  38. # Define the Trainer
  39. trainer = Trainer(
  40. model=model,
  41. args=training_args,
  42. train_dataset=tokenized_dataset,
  43. eval_dataset=tokenized_dataset,
  44. )
  45.  
  46. # Fine-tune the model
  47. trainer.train()
  48.  
  49. # Save the fine-tuned model
  50. model.save_pretrained("fine_tuned_bert")
  51. tokenizer.save_pretrained("fine_tuned_bert")
  52.  
  53.  
Tags: py IA
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement