forked from Vamsi995/Paraphrase-Generator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
55 lines (33 loc) · 1.12 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# wget https://storage.googleapis.com/paws/english/paws_wiki_labeled_final.tar.gz
# tar -xzf paws_wiki_labeled_final.tar.gz
import csv
train_examples = []
test_examples = []
dev_examples = []
with open("final/train.tsv", "r") as csvfile:
reader = csv.reader(csvfile, delimiter="\t")
next(reader)
for row in reader:
if row[3] == "1":
train_examples.append((row[1], row[2]))
with open("final/test.tsv", "r") as csvfile:
reader = csv.reader(csvfile, delimiter="\t")
next(reader)
for row in reader:
if row[3] == "1":
test_examples.append((row[1], row[2]))
with open("final/dev.tsv", "r") as csvfile:
reader = csv.reader(csvfile, delimiter="\t")
next(reader)
for row in reader:
if row[3] == "1":
dev_examples.append((row[1], row[2]))
test_examples = dev_examples + test_examples
with open("PAW_Train.csv","w") as csvfile:
writer = csv.writer(csvfile)
for row in train_examples:
writer.writerow(row)
with open("PAW_Test.csv","w") as csvfile:
writer = csv.writer(csvfile)
for row in test_examples:
writer.writerow(row)