-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExternalSortDistinctMain.java
154 lines (143 loc) · 4.67 KB
/
ExternalSortDistinctMain.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
public class ExternalMergeSort {
private static int iteration = 0;
private static int localPassFileCounter = 0;
public static void main(String args[]) {
try {
File result = mergeSort(divideFileIntoChunks("input-file-path/input.txt")).get(0);
distinctValues(result, "output-file-path/finaloutput.txt");
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
}
private static List<File> divideFileIntoChunks(String fileLocation) throws IOException {
List<File> chunkFiles = new LinkedList<File>();
BufferedReader br = new BufferedReader(new FileReader(new File(fileLocation)));
int chunkSize = 10; //Assuming we have enough memory to store 10 lines
String line = br.readLine();
int i = 0;
int chunkId = 0;
List<String> tempList = new ArrayList<String>();
while (line != null) {
tempList.add(line);
i++;
line = br.readLine();
if (i >= chunkSize) {
i = 0;
Collections.sort(tempList);
File newFile = new File("temp-file-path/TEMP_FILE_" + chunkId);
newFile.deleteOnExit();
BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
chunkFiles.add(newFile);
for (String s : tempList) {
bw.write(s);
bw.newLine();
}
bw.close();
chunkId++;
tempList.clear();
}
}
br.close();
return chunkFiles;
}
private static File distinctValues(File inputFile, String outputFileName) throws IOException {
File outputfile = null;
BufferedReader br = null;
BufferedWriter bw = null;
outputfile = new File(outputFileName);
br = new BufferedReader(new FileReader(inputFile));
bw = new BufferedWriter(new FileWriter(outputfile));
String str = null;
String previousStr = null;
str = br.readLine();
while (str != null) {
if (!str.equals(previousStr)) {
bw.write(str);
bw.newLine();
previousStr = str;
}
str = br.readLine();
}
bw.close();
br.close();
return outputfile;
}
private static List<File> mergeSort(List<File> chunkFiles) throws IOException {
if (chunkFiles.size() == 1) {
return chunkFiles;
}
chunkFiles = mergeSort(merge(chunkFiles));
return chunkFiles;
}
private static List<File> merge(List<File> chunkFiles) throws IOException {
int i = 0;
List<File> newChunkFileList = new ArrayList<>();
while (i < chunkFiles.size() - 1) {
//Merging the elements of File array by pairs
newChunkFileList.add(mergeTwoFiles(chunkFiles.get(i), chunkFiles.get(i + 1)));
i = i + 2;
localPassFileCounter++;
}
if (chunkFiles.size() % 2 != 0) { // Merging the last unpaired value in the new array
File file1 = newChunkFileList.remove(newChunkFileList.size() - 1);
newChunkFileList.add(mergeTwoFiles(file1, chunkFiles.get(chunkFiles.size() - 1)));
}
localPassFileCounter = 0;
iteration++;
return newChunkFileList;
}
private static File mergeTwoFiles(File first, File second) throws IOException {
File file = null;
file = new File("/Users/karanpreetsingh/Documents/ExternalMergeSort/TEMP_FILE_" + iteration + "_" + localPassFileCounter + ".txt");
file.deleteOnExit();
if ((first == null) || second == null)
return file;
BufferedReader reader1 = new BufferedReader(new FileReader(first));
BufferedReader reader2 = new BufferedReader(new FileReader(second));
BufferedWriter writer = new BufferedWriter(new FileWriter(file));
String str1 = reader1.readLine();
String str2 = reader2.readLine();
while (str1 != null || str2 != null) {
if (str1 != null && str2 != null) {
if (str1.compareTo(str2) <= 0) {
writer.write(str1);
str1 = reader1.readLine();
}
else {
writer.write(str2);
str2 = reader2.readLine();
}
writer.newLine();
}
if (str1 != null && str2 == null) { //Leftover entries from file 1
writer.write(str1);
str1 = reader1.readLine();
writer.newLine();
}
if (str2 != null && str1 == null) { //Leftover entries from file 2
writer.write(str2);
str2 = reader2.readLine();
writer.newLine();
}
}
reader1.close();
reader2.close();
writer.close();
return file;
}
}