import torch
import as nn
import as optim
from .data import Dataset, DataLoader
class RumorDetector():
def __init__(self, num_classes=2, d_model=128, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1):
super(RumorDetector, self).__init__()
self.encoder = ((d_model, nhead, dim_feedforward, dropout), num_encoder_layers)
self.decoder = ((d_model, nhead, dim_feedforward, dropout), num_decoder_layers)
self.fc = (d_model, num_classes)
def forward(self, src, tgt):
memory = self.encoder(src)
output = self.decoder(tgt, memory)
output = self.fc(output)
return output
class RumorDataset(Dataset):
def __init__(self, data, tokenizer, max_len=128):
self.data = data
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.data)
def __getitem__(self, index):
item = self.data[index]
text = item['text']
label = item['label']
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'label': (label, dtype=)
}
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', use_fast=True)
train_data = [{'text': 'This is a rumor', 'label': 1}, {'text': 'This is not a rumor', 'label': 0}]
test_data = [{'text': 'This is a rumor', 'label': 1}, {'text': 'This is not a rumor', 'label': 0}]
train_dataset = RumorDataset(train_data, tokenizer)
test_dataset = RumorDataset(test_data, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
net = RumorDetector()
criterion = ()
optimizer = ((), lr=1e-4)
for epoch in range(10):
for i, batch in enumerate(train_loader):
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
label = batch['label']
optimizer.zero_grad()
output = net(input_ids, attention_mask)
loss = criterion(output, label)
()
()
correct = 0
total = 0
for i, batch in enumerate(test_loader):
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
label = batch['label']
with torch.no_grad():
output = net(input_ids, attention_mask)
predicted = (output, dim=1)
total += label.size(0)
correct += (predicted == label).sum().item()
print('Epoch {}, Test Accuracy: {}%'.format(epoch+1, (100 * correct / total)))