【PyTorch深度学习项目实战200例】—— 基于Transformer实现谣言检测系统 | 第3例

时间:2024-10-27 08:45:17
  • import torch
  • import as nn
  • import as optim
  • from .data import Dataset, DataLoader
  • class RumorDetector():
  • def __init__(self, num_classes=2, d_model=128, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1):
  • super(RumorDetector, self).__init__()
  • self.encoder = ((d_model, nhead, dim_feedforward, dropout), num_encoder_layers)
  • self.decoder = ((d_model, nhead, dim_feedforward, dropout), num_decoder_layers)
  • self.fc = (d_model, num_classes)
  • def forward(self, src, tgt):
  • memory = self.encoder(src)
  • output = self.decoder(tgt, memory)
  • output = self.fc(output)
  • return output
  • class RumorDataset(Dataset):
  • def __init__(self, data, tokenizer, max_len=128):
  • self.data = data
  • self.tokenizer = tokenizer
  • self.max_len = max_len
  • def __len__(self):
  • return len(self.data)
  • def __getitem__(self, index):
  • item = self.data[index]
  • text = item['text']
  • label = item['label']
  • encoding = self.tokenizer.encode_plus(
  • text,
  • add_special_tokens=True,
  • max_length=self.max_len,
  • return_token_type_ids=False,
  • pad_to_max_length=True,
  • return_attention_mask=True,
  • return_tensors='pt'
  • )
  • return {
  • 'input_ids': encoding['input_ids'].flatten(),
  • 'attention_mask': encoding['attention_mask'].flatten(),
  • 'label': (label, dtype=)
  • }
  • tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', use_fast=True)
  • train_data = [{'text': 'This is a rumor', 'label': 1}, {'text': 'This is not a rumor', 'label': 0}]
  • test_data = [{'text': 'This is a rumor', 'label': 1}, {'text': 'This is not a rumor', 'label': 0}]
  • train_dataset = RumorDataset(train_data, tokenizer)
  • test_dataset = RumorDataset(test_data, tokenizer)
  • train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
  • test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
  • net = RumorDetector()
  • criterion = ()
  • optimizer = ((), lr=1e-4)
  • for epoch in range(10):
  • for i, batch in enumerate(train_loader):
  • input_ids = batch['input_ids']
  • attention_mask = batch['attention_mask']
  • label = batch['label']
  • optimizer.zero_grad()
  • output = net(input_ids, attention_mask)
  • loss = criterion(output, label)
  • ()
  • ()
  • correct = 0
  • total = 0
  • for i, batch in enumerate(test_loader):
  • input_ids = batch['input_ids']
  • attention_mask = batch['attention_mask']
  • label = batch['label']
  • with torch.no_grad():
  • output = net(input_ids, attention_mask)
  • predicted = (output, dim=1)
  • total += label.size(0)
  • correct += (predicted == label).sum().item()
  • print('Epoch {}, Test Accuracy: {}%'.format(epoch+1, (100 * correct / total)))