Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* external sort */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- /****************************
- * implementation dependent *
- ****************************/
- /* template for workfiles (8.3 format) */
- #define FNAME "_sort%03d.dat"
- #define LNAME 13
- /* comparison operators */
- #define compLT(x,y) (x < y)
- #define compGT(x,y) (x > y)
- /* define the record to be sorted here */
- #define LRECL 100
- typedef int keyType;
- typedef struct recTypeTag {
- keyType key; /* sort key for record */
- #if LRECL
- char data[LRECL-sizeof(keyType)]; /* other fields */
- #endif
- } recType;
- /******************************
- * implementation independent *
- ******************************/
- typedef enum {false, true} bool;
- typedef struct tmpFileTag {
- FILE *fp; /* file pointer */
- char name[LNAME]; /* filename */
- recType rec; /* last record read */
- int dummy; /* number of dummy runs */
- bool eof; /* end-of-file flag */
- bool eor; /* end-of-run flag */
- bool valid; /* true if rec is valid */
- int fib; /* ideal fibonacci number */
- } tmpFileType;
- static tmpFileType **file; /* array of file info for tmp files */
- static int nTmpFiles; /* number of tmp files */
- static char *ifName; /* input filename */
- static char *ofName; /* output filename */
- static int level; /* level of runs */
- static int nNodes; /* number of nodes for selection tree */
- void deleteTmpFiles(void) {
- int i;
- /* delete merge files and free resources */
- if (file) {
- for (i = 0; i < nTmpFiles; i++) {
- if (file[i]) {
- if (file[i]->fp) fclose(file[i]->fp);
- if (*file[i]->name) remove(file[i]->name);
- free (file[i]);
- }
- }
- free (file);
- }
- }
- void termTmpFiles(int rc) {
- /* cleanup files */
- remove(ofName);
- if (rc == 0) {
- int fileT;
- /* file[T] contains results */
- fileT = nTmpFiles - 1;
- fclose(file[fileT]->fp); file[fileT]->fp = NULL;
- if (rename(file[fileT]->name, ofName)) {
- perror("io1");
- deleteTmpFiles();
- exit(1);
- }
- *file[fileT]->name = 0;
- }
- deleteTmpFiles();
- }
- void cleanExit(int rc) {
- /* cleanup tmp files and exit */
- termTmpFiles(rc);
- exit(rc);
- }
- void *safeMalloc(size_t size) {
- void *p;
- /* safely allocate memory and initialize to zero */
- if ((p = calloc(1, size)) == NULL) {
- printf("error: malloc failed, size = %d\n", size);
- cleanExit(1);
- }
- return p;
- }
- void initTmpFiles(void) {
- int i;
- tmpFileType *fileInfo;
- /* initialize merge files */
- if (nTmpFiles < 3) nTmpFiles = 3;
- file = safeMalloc(nTmpFiles * sizeof(tmpFileType*));
- fileInfo = safeMalloc(nTmpFiles * sizeof(tmpFileType));
- for (i = 0; i < nTmpFiles; i++) {
- file[i] = fileInfo + i;
- sprintf(file[i]->name, FNAME, i);
- if ((file[i]->fp = fopen(file[i]->name, "w+b")) == NULL) {
- perror("io2");
- cleanExit(1);
- }
- }
- }
- recType *readRec(void) {
- typedef struct iNodeTag { /* internal node */
- struct iNodeTag *parent;/* parent of internal node */
- struct eNodeTag *loser; /* external loser */
- } iNodeType;
- typedef struct eNodeTag { /* external node */
- struct iNodeTag *parent;/* parent of external node */
- recType rec; /* input record */
- int run; /* run number */
- bool valid; /* input record is valid */
- } eNodeType;
- typedef struct nodeTag {
- iNodeType i; /* internal node */
- eNodeType e; /* external node */
- } nodeType;
- static nodeType *node; /* array of selection tree nodes */
- static eNodeType *win; /* new winner */
- static FILE *ifp; /* input file */
- static bool eof; /* true if end-of-file, input */
- static int maxRun; /* maximum run number */
- static int curRun; /* current run number */
- iNodeType *p; /* pointer to internal nodes */
- static bool lastKeyValid; /* true if lastKey is valid */
- static keyType lastKey; /* last key written */
- /* read next record using replacement selection */
- /* check for first call */
- if (node == NULL) {
- int i;
- if (nNodes < 2) nNodes = 2;
- node = safeMalloc(nNodes * sizeof(nodeType));
- for (i = 0; i < nNodes; i++) {
- node[i].i.loser = &node[i].e;
- node[i].i.parent = &node[i/2].i;
- node[i].e.parent = &node[(nNodes + i)/2].i;
- node[i].e.run = 0;
- node[i].e.valid = false;
- }
- win = &node[0].e;
- lastKeyValid = false;
- if ((ifp = fopen(ifName, "rb")) == NULL) {
- printf("error: file %s, unable to open\n", ifName);
- cleanExit(1);
- }
- }
- while (1) {
- /* replace previous winner with new record */
- if (!eof) {
- if (fread(&win->rec, sizeof(recType), 1, ifp) == 1) {
- if ((!lastKeyValid || compLT(win->rec.key, lastKey))
- && (++win->run > maxRun))
- maxRun = win->run;
- win->valid = true;
- } else if (feof(ifp)) {
- fclose(ifp);
- eof = true;
- win->valid = false;
- win->run = maxRun + 1;
- } else {
- perror("io4");
- cleanExit(1);
- }
- } else {
- win->valid = false;
- win->run = maxRun + 1;
- }
- /* adjust loser and winner pointers */
- p = win->parent;
- do {
- bool swap;
- swap = false;
- if (p->loser->run < win->run) {
- swap = true;
- } else if (p->loser->run == win->run) {
- if (p->loser->valid && win->valid) {
- if (compLT(p->loser->rec.key, win->rec.key))
- swap = true;
- } else {
- swap = true;
- }
- }
- if (swap) {
- /* p should be winner */
- eNodeType *t;
- t = p->loser;
- p->loser = win;
- win = t;
- }
- p = p->parent;
- } while (p != &node[0].i);
- /* end of run? */
- if (win->run != curRun) {
- /* win->run = curRun + 1 */
- if (win->run > maxRun) {
- /* end of output */
- free(node);
- return NULL;
- }
- curRun = win->run;
- }
- /* output top of tree */
- if (win->run) {
- lastKey = win->rec.key;
- lastKeyValid = true;
- return &win->rec;
- }
- }
- }
- void makeRuns(void) {
- recType *win; /* winner */
- int fileT; /* last file */
- int fileP; /* next to last file */
- int j; /* selects file[j] */
- /* Make initial runs using replacement selection.
- * Runs are written using a Fibonacci distintbution.
- */
- /* initialize file structures */
- fileT = nTmpFiles - 1;
- fileP = fileT - 1;
- for (j = 0; j < fileT; j++) {
- file[j]->fib = 1;
- file[j]->dummy = 1;
- }
- file[fileT]->fib = 0;
- file[fileT]->dummy = 0;
- level = 1;
- j = 0;
- win = readRec();
- while (win) {
- bool anyrun;
- anyrun = false;
- for (j = 0; win && j <= fileP; j++) {
- bool run;
- run = false;
- if (file[j]->valid) {
- if (!compLT(win->key, file[j]->rec.key)) {
- /* append to an existing run */
- run = true;
- } else if (file[j]->dummy) {
- /* start a new run */
- file[j]->dummy--;
- run = true;
- }
- } else {
- /* first run in file */
- file[j]->dummy--;
- run = true;
- }
- if (run) {
- anyrun = true;
- /* flush run */
- while(1) {
- if (fwrite(win, sizeof(recType), 1, file[j]->fp) != 1) {
- perror("io3");
- cleanExit(1);
- }
- file[j]->rec.key = win->key;
- file[j]->valid = true;
- if ((win = readRec()) == NULL) break;
- if (compLT(win->key, file[j]->rec.key)) break;
- }
- }
- }
- /* if no room for runs, up a level */
- if (!anyrun) {
- int t;
- level++;
- t = file[0]->fib;
- for (j = 0; j <= fileP; j++) {
- file[j]->dummy = t + file[j+1]->fib - file[j]->fib;
- file[j]->fib = t + file[j+1]->fib;
- }
- }
- }
- }
- void rewindFile(int j) {
- /* rewind file[j] and read in first record */
- file[j]->eor = false;
- file[j]->eof = false;
- rewind(file[j]->fp);
- if (fread(&file[j]->rec, sizeof(recType), 1, file[j]->fp) != 1) {
- if (feof(file[j]->fp)) {
- file[j]->eor = true;
- file[j]->eof = true;
- } else {
- perror("io5");
- cleanExit(1);
- }
- }
- }
- void mergeSort(void) {
- int fileT;
- int fileP;
- int j;
- tmpFileType *tfile;
- /* polyphase merge sort */
- fileT = nTmpFiles - 1;
- fileP = fileT - 1;
- /* prime the files */
- for (j = 0; j < fileT; j++) {
- rewindFile(j);
- }
- /* each pass through loop merges one run */
- while (level) {
- while(1) {
- bool allDummies;
- bool anyRuns;
- /* scan for runs */
- allDummies = true;
- anyRuns = false;
- for (j = 0; j <= fileP; j++) {
- if (!file[j]->dummy) {
- allDummies = false;
- if (!file[j]->eof) anyRuns = true;
- }
- }
- if (anyRuns) {
- int k;
- keyType lastKey;
- /* merge 1 run file[0]..file[P] --> file[T] */
- while(1) {
- /* each pass thru loop writes 1 record to file[fileT] */
- /* find smallest key */
- k = -1;
- for (j = 0; j <= fileP; j++) {
- if (file[j]->eor) continue;
- if (file[j]->dummy) continue;
- if (k < 0 ||
- (k != j && compGT(file[k]->rec.key, file[j]->rec.key)))
- k = j;
- }
- if (k < 0) break;
- /* write record[k] to file[fileT] */
- if (fwrite(&file[k]->rec, sizeof(recType), 1,
- file[fileT]->fp) != 1) {
- perror("io6");
- cleanExit(1);
- }
- /* replace record[k] */
- lastKey = file[k]->rec.key;
- if (fread(&file[k]->rec, sizeof(recType), 1,
- file[k]->fp) == 1) {
- /* check for end of run on file[s] */
- if (compLT(file[k]->rec.key, lastKey))
- file[k]->eor = true;
- } else if (feof(file[k]->fp)) {
- file[k]->eof = true;
- file[k]->eor = true;
- } else {
- perror("io7");
- cleanExit(1);
- }
- }
- /* fixup dummies */
- for (j = 0; j <= fileP; j++) {
- if (file[j]->dummy) file[j]->dummy--;
- if (!file[j]->eof) file[j]->eor = false;
- }
- } else if (allDummies) {
- for (j = 0; j <= fileP; j++)
- file[j]->dummy--;
- file[fileT]->dummy++;
- }
- /* end of run */
- if (file[fileP]->eof && !file[fileP]->dummy) {
- /* completed a fibonocci-level */
- level--;
- if (!level) {
- /* we're done, file[fileT] contains data */
- return;
- }
- /* fileP is exhausted, reopen as new */
- fclose(file[fileP]->fp);
- if ((file[fileP]->fp = fopen(file[fileP]->name, "w+b"))
- == NULL) {
- perror("io8");
- cleanExit(1);
- }
- file[fileP]->eof = false;
- file[fileP]->eor = false;
- rewindFile(fileT);
- /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */
- tfile = file[fileT];
- memmove(file + 1, file, fileT * sizeof(tmpFileType*));
- file[0] = tfile;
- /* start new runs */
- for (j = 0; j <= fileP; j++)
- if (!file[j]->eof) file[j]->eor = false;
- }
- }
- }
- }
- void extSort(void) {
- initTmpFiles();
- makeRuns();
- mergeSort();
- termTmpFiles(0);
- }
- int main(int argc, char *argv[]) {
- /* command-line:
- *
- * ext ifName ofName nTmpFiles nNodes
- *
- * ext in.dat out.dat 5 2000
- * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat
- */
- if (argc != 5) {
- printf("%s ifName ofName nTmpFiles nNodes\n", argv[0]);
- cleanExit(1);
- }
- ifName = argv[1];
- ofName = argv[2];
- nTmpFiles = atoi(argv[3]);
- nNodes = atoi(argv[4]);
- printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n",
- nTmpFiles, nNodes, sizeof(recType));
- extSort();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement