-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathexternalMergeSortAlgo.cpp
198 lines (159 loc) · 4.85 KB
/
externalMergeSortAlgo.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#include <iostream>
#include <algorithm>
#include <queue>
#include <limits>
using namespace std;
struct MinHeapNode
{
// element to be stored
int element;
// array index from which the element is taken
int i;
};
// Comparison object to be used to order the heap
struct comp
{
bool operator()(const MinHeapNode &lhs, const MinHeapNode &rhs) const {
return lhs.element > rhs.element;
}
};
FILE* openFile(char* fileName, char* mode)
{
FILE* fp = fopen(fileName, mode);
if (fp == NULL)
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
return fp;
}
// Merges `k` sorted files. Names of files are assumed to be 1, 2, … `k`
void mergeFiles(char *output_file, int n, int k)
{
FILE* in[k];
for (int i = 0; i < k; i++)
{
char fileName[2];
// convert `i` to a string
snprintf(fileName, sizeof(fileName), "%d", i);
// open output files in reading mode
in[i] = openFile(fileName, "r");
}
// FINAL OUTPUT FILE
FILE *out = openFile(output_file, "w");
// Create a min-heap with `k` heap nodes. Every heap node has the first
// element of the scratch output file
MinHeapNode harr[k];
priority_queue<MinHeapNode, vector<MinHeapNode>, comp> pq;
int i;
for (i = 0; i < k; i++)
{
// break if no output file is empty and
// index `i` will be a number of input files
if (fscanf(in[i], "%d ", &harr[i].element) != 1) {
break;
}
// index of the scratch output file
harr[i].i = i;
pq.push(harr[i]);
}
int count = 0;
// One by one, get the minimum element from the min-heap and replace
// it with the next element. Run till all filled input files reach EOF.
while (count != i)
{
// Get the minimum element and store it in the output file
MinHeapNode root = pq.top();
pq.pop();
fprintf(out, "%d ", root.element);
// Find the next element that should replace the current root of the heap.
// The next element belongs to the same input file as the current
// minimum element.
if (fscanf(in[root.i], "%d ", &root.element) != 1 )
{
root.element = numeric_limits<int>::max();
count++;
}
// Replace the root with the next element of the input file
pq.push(root);
}
// close the input and output files
for (int i = 0; i < k; i++) {
fclose(in[i]);
}
fclose(out);
}
// Using a merge sort algorithm, create the initial runs and divide them
// evenly among the output files
void createInitialRuns(char *input_file, int run_size, int num_ways)
{
// For big input file
FILE *in = openFile(input_file, "r");
// output scratch files
FILE* out[num_ways];
char fileName[2];
for (int i = 0; i < num_ways; i++)
{
// convert `i` to a string
snprintf(fileName, sizeof(fileName), "%d", i);
// Open output files in write mode.
out[i] = openFile(fileName, "w");
}
// allocate a dynamic array large enough to accommodate runs of
// size `run_size`
int* arr = new int[run_size];
bool more_input = true;
int next_output_file = 0;
int i;
while (more_input)
{
// write `run_size` elements into `arr` from the input file
for (i = 0; i < run_size; i++)
{
if (fscanf(in, "%d ", &arr[i]) != 1)
{
more_input = false;
break;
}
}
// sort the array using merge sort
sort(arr, arr + i);
// write the records to the appropriate scratch output file
// can't assume that the loop runs to `run_size`
// since the last run's length may be less than `run_size`
for (int j = 0; j < i; j++) {
fprintf(out[next_output_file], "%d ", arr[j]);
}
next_output_file++;
}
// deallocate memory
delete arr;
// close the input and output files
for (int i = 0; i < num_ways; i++) {
fclose(out[i]);
}
fclose(in);
}
// Program to demonstrate external sorting
int main()
{
// number of partitions of the input file
int num_ways = 10;
// the size of each partition
int run_size = 1000;
char input_file[] = "input.txt";
char output_file[] = "output.txt";
FILE* in = openFile(input_file, "w");
srand(time(NULL));
// generate input
for (int i = 0; i < num_ways * run_size; i++) {
fprintf(in, "%d ", rand());
}
fclose(in);
// Read the input file, create the initial runs,
// and assign the runs to the scratch output files
createInitialRuns(input_file, run_size, num_ways);
// Merge the runs using the k–way merging
mergeFiles(output_file, run_size, num_ways);
return 0;
}