首页 > 代码库 > 磁盘文件排序 编程珠玑

磁盘文件排序 编程珠玑

  开始看编程珠玑了,第一个就是进行磁盘排序的问题,想到了也只是归并排序,但题目要求1M内存,这个算法不可行。编程珠玑写到使用位图(分两次操作读写可以成功实现,小于内存1M),详情看编程珠玑第一章。

题目:给定10^7数据,对大数据进行排序。要求内存只有1M,时间可以接受,较短。

解决方法:1.多路归并。2.位图操作。

在这里看了july的文章,写的真心不错。推荐:http://blog.csdn.net/v_JULY_v/article/details/6451990

其中有个生成不同随机数的程序附下:

 1 #include <iostream>   2 #include <time.h>   3 #include <assert.h>   4 using namespace std;   5    6 const int size = 10000000;   7 int num[size];   8    9 int main()  10 {  11     int n;  12     FILE *fp = fopen("data.txt", "w");  13     assert(fp);  14   15     for (n = 1; n <= size; n++)    16         //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。  17         num[n] = n;  18     srand((unsigned)time(NULL));  19     int i, j;  20   21     for (n = 0; n < size; n++)  22     {  23         i = (rand() * RAND_MAX + rand()) % 10000000;  24         j = (rand() * RAND_MAX + rand()) % 10000000;  25         swap(num[i], num[j]);  26     }  27   28     for (n = 0; n < size; n++)  29         fprintf(fp, "%d ", num[n]);  30     fclose(fp);  31     return 0;  32 }  

位图程序实现排序,首先排前5000000(每个数字对应一个bit)个后排剩下的5000000数据(每次都小于1M),代码如下(模拟两次):

 1 #include <iostream>   2 #include <bitset>   3 #include <assert.h>   4 #include <time.h>   5 using namespace std;   6    7 const int max_each_scan = 5000000;   8    9 int main()  10 {  11     clock_t begin = clock();  12     bitset<max_each_scan> bit_map;  13     bit_map.reset();  14       15     // open the file with the unsorted data  16     FILE *fp_unsort_file = fopen("data.txt", "r");  17     assert(fp_unsort_file);  18     int num;  19   20     // the first time scan to sort the data between 0 - 4999999  21     while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  22     {  23         if (num < max_each_scan)  24             bit_map.set(num, 1);  25     }  26       27     FILE *fp_sort_file = fopen("sort.txt", "w");  28     assert(fp_sort_file);  29     int i;  30       31     // write the sorted data into file  32     for (i = 0; i < max_each_scan; i++)  33     {  34         if (bit_map[i] == 1)  35             fprintf(fp_sort_file, "%d ", i);  36     }  37       38     // the second time scan to sort the data between 5000000 - 9999999  39     int result = fseek(fp_unsort_file, 0, SEEK_SET);  40     if (result)  41         cout << "fseek failed!" << endl;  42     else  43     {  44         bit_map.reset();  45         while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  46         {  47             if (num >= max_each_scan && num < 10000000)  48             {  49                 num -= max_each_scan;  50                 bit_map.set(num, 1);  51             }  52         }  53         for (i = 0; i < max_each_scan; i++)  54         {  55             if (bit_map[i] == 1)  56                 fprintf(fp_sort_file, "%d ", i + max_each_scan);  57         }  58     }  59       60     clock_t end = clock();  61     cout<<"用位图的方法,耗时:"<<endl;  62     cout << (end - begin) / CLK_TCK << "s" << endl;  63     fclose(fp_sort_file);  64     fclose(fp_unsort_file);  65     return 0;  66 }