Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from datasets import Dataset
- from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
- import torch
- # Load your data from a CSV file
- csv_file = "xenodm.log"
- df = pd.read_csv(csv_file)
- # Convert the DataFrame to a Hugging Face Dataset
- dataset = Dataset.from_pandas(df)
- # Load the tokenizer
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
- # Tokenize the dataset
- def tokenize_function(examples):
- return tokenizer(examples['text'], padding='max_length', truncation=True)
- tokenized_dataset = dataset.map(tokenize_function, batched=True)
- # Define the model
- model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
- # Set up training arguments
- training_args = TrainingArguments(
- output_dir='./results',
- num_train_epochs=3,
- per_device_train_batch_size=16,
- per_device_eval_batch_size=16,
- warmup_steps=500,
- weight_decay=0.01,
- logging_dir='./logs',
- logging_steps=10,
- evaluation_strategy="epoch",
- )
- # Define the Trainer
- trainer = Trainer(
- model=model,
- args=training_args,
- train_dataset=tokenized_dataset,
- eval_dataset=tokenized_dataset,
- )
- # Fine-tune the model
- trainer.train()
- # Save the fine-tuned model
- model.save_pretrained("fine_tuned_bert")
- tokenizer.save_pretrained("fine_tuned_bert")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement