-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
executable file
·66 lines (43 loc) · 2.04 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import utils
from model import *
import pandas as pd
import torch
from torch import nn
#the word chunks that serve as conditioning to the generations
L = ["die"]
trainset = pd.read_csv('./data/questions.csv')
trainset = trainset.assign(clean=utils.replace_punctuation(trainset['ref']))
vocab_to_int, int_to_vocab = utils.get_tokens(trainset['clean'])
encoder = torch.load("NERnlgenc_ques.pth", map_location = 'cpu')
decoder = torch.load("NERnlgdec_ques.pth", map_location = 'cpu')
def test(dataset, encoder, decoder,
max_length=50, device=None):
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for input_tensor in dataset:
h, c = encoder.init_hidden(device=device)
encoder_outputs = torch.zeros(max_length, 2*encoder.hidden_size).to(device)
enc_outputs, enc_hidden = encoder.forward(input_tensor, (h, c))
encoder_outputs[:min(enc_outputs.shape[0], max_length)] = enc_outputs[:max_length,0,:]
dec_input = torch.Tensor([[0]]).type(torch.LongTensor).to(device)
dec_hidden = enc_hidden
dec_outputs = []
for ii in range(max_length):
dec_out, dec_hidden, dec_attn = decoder.forward(dec_input, dec_hidden, encoder_outputs)
_, out_token = dec_out.topk(1)
dec_input = out_token.detach().to(device) # detach from history as input
dec_outputs.append(out_token)
if dec_input.item() == 1:
break
list1 = [int_to_vocab[each.item()] for each in input_tensor]
list2 = [int_to_vocab[each.item()] for each in dec_outputs]
print(list1)
print(list2)
df = pd.DataFrame({'col':L})
df = df.assign(clean=utils.replace_punctuation(df['col']))
as_tokens = df['clean'].apply(lambda x: [vocab_to_int[each] for each in x.split()])
df = df.assign(tokenized=as_tokens)
max_length = 50
for i in range(len(L)):
inp = torch.Tensor(df['tokenized'][i]).view(-1, 1).type(torch.LongTensor)
test([inp], encoder, decoder, device='cpu', max_length=max_length)