首页 > 代码库 > Dijkstra算法以及各种海量数据排序算法

Dijkstra算法以及各种海量数据排序算法

一、Dijkstra最短路径算法

是从一个顶点到其余各顶点的最短路径算法,解决的是有向图中最短路径问题。迪杰斯特拉算法主要特点是以起始点为中心向外层层扩展,直到扩展到终点为止。

实现一

////  Dijkstra//  ACM//  Find the number of minimal path////  Created by Rachel on 18-2-12.//  Copyright (c) 2014年 ZJU. All rights reserved.//#include <iostream>#include <algorithm>#include <stdio.h>#include <functional>#include <utility>#include <memory.h>using namespace std;#define N 505#define INF 100000000#define min(a,b) a<b?a:b#define max(a,b) a>b?a:bint map[N][N];int minres[N]; //min distance from source to point_ibool visited[N];int weight[N];void init(int n){    int i,j;    for (i=0; i<n; i++) {        for (j=0; j<n; j++) {            map[i][j] = INF;        }        minres[i] = INF;    }    memset(visited, false, sizeof(visited));}void dijkstra(int source, int dest, int n){    int i,j;    for(i=0;i<n;i++)        minres[i]=map[source][i];    visited[source]=true;        // (n-1) times, each time select one point into the start point set    for (j=0; j<n-1; j++) {        //select a point to add into the start point set        int minn = INF, point=-1;        for(i=0;i<n;i++)            if (!visited[i]&&minres[i]<minn) {                minn = minres[i];                point = i;            }        visited[point] = true;                //update the min distance of other points        for (i=0; i<n; i++) {            if (!visited[i]&&minres[i]>minres[point]+map[point][i]) {                minres[i] = minres[point]+map[point][i];            }        }    }}void dfs(int source, int dest,int n, int curpoint, int curdis, int cursum, int* num, int* sum){    if (curpoint==dest && minres[dest]==curdis) {        *num = *num+1;        *sum = max(*sum, cursum);        return;    }    if (curdis>minres[dest])        return;    for (int i=0; i<n; i++) {        if(!visited[i]&&map[curpoint][i]!=INF)        {            visited[i] = true;            dfs(source, dest, n, i, curdis+map[curpoint][i], cursum+weight[i], num, sum);            visited[i] = false;        }    }}int main(){    int i,m,n,a,b,t,source,dest;    while (cin>>n>>m) {        cin>>source>>dest;        for (i=0; i<n; i++) {            cin>>weight[i]; //#peoples @ each point        }        init(n);        for(i=0;i<m;i++)        {            scanf("%d%d%d",&a,&b,&t);            map[b][a] = map[a][b]= min(map[a][b],t);        }        dijkstra(source,dest,n);        minres[source] = 0;        int num = 0, sum = 0;        memset(visited, false, sizeof(visited));        visited[source] = true;        dfs(source, dest, n, source, 0, weight[source], &num, &sum);        cout<<num<<" "<<sum<<endl;    }}

 实现二

/*Dijkstra求单源最短路径 2010.8.26*/ /*http://www.cnblogs.com/dolphin0520/archive/2011/08/26/2155202.html */#include <iostream>#include<stack>#define M 100#define N 100using namespace std;typedef struct node{    int matrix[N][M];      //邻接矩阵     int n;                 //顶点数     int e;                 //边数 }MGraph; void DijkstraPath(MGraph g,int *dist,int *path,int v0)   //v0表示源顶点 {    int i,j,k;    bool *visited=(bool *)malloc(sizeof(bool)*g.n);    for(i=0;i<g.n;i++)     //初始化     {        if(g.matrix[v0][i]>0&&i!=v0)        {            dist[i]=g.matrix[v0][i];            path[i]=v0;     //path记录最短路径上从v0到i的前一个顶点         }        else        {            dist[i]=INT_MAX;    //若i不与v0直接相邻,则权值置为无穷大             path[i]=-1;        }        visited[i]=false;        path[v0]=v0;        dist[v0]=0;    }    visited[v0]=true;    for(i=1;i<g.n;i++)     //循环扩展n-1次     {        int min=INT_MAX;        int u;        for(j=0;j<g.n;j++)    //寻找未被扩展的权值最小的顶点         {            if(visited[j]==false&&dist[j]<min)            {                min=dist[j];                u=j;                    }        }         visited[u]=true;        for(k=0;k<g.n;k++)   //更新dist数组的值和路径的值         {            if(visited[k]==false&&g.matrix[u][k]>0&&min+g.matrix[u][k]<dist[k])            {                dist[k]=min+g.matrix[u][k];                path[k]=u;             }        }            }    }void showPath(int *path,int v,int v0)   //打印最短路径上的各个顶点 {    stack<int> s;    int u=v;    while(v!=v0)    {        s.push(v);        v=path[v];    }    s.push(v);    while(!s.empty())    {        cout<<s.top()<<" ";        s.pop();    }} int main(int argc, char *argv[]){    int n,e;     //表示输入的顶点数和边数     while(cin>>n>>e&&e!=0)    {        int i,j;        int s,t,w;      //表示存在一条边s->t,权值为w        MGraph g;        int v0;        int *dist=(int *)malloc(sizeof(int)*n);        int *path=(int *)malloc(sizeof(int)*n);                for(i=0;i<N;i++)            for(j=0;j<M;j++)                g.matrix[i][j]=0;        g.n=n;        g.e=e;        for(i=0;i<e;i++)        {            cin>>s>>t>>w;            g.matrix[s][t]=w;        }        cin>>v0;        //输入源顶点         DijkstraPath(g,dist,path,v0);        for(i=0;i<n;i++)        {            if(i!=v0)            {                showPath(path,i,v0);                cout<<dist[i]<<endl;            }        }    }    return 0;}

 二、基于bitset排序

各种排序算法,生成随机文件程序。

    //purpose:  生成随机的不重复的测试数据      //copyright@ 2011.04.19 yansha      //1000w数据量,要保证生成不重复的数据量,一般的程序没有做到。      //但,本程序做到了。      //July、2010.05.30。      #include <iostream>      #include <time.h>      #include <assert.h>      using namespace std;            const int size = 10000000;      int num[size];            int main()      {          int n;          FILE *fp = fopen("data.txt", "w");          assert(fp);                for (n = 1; n <= size; n++)                //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。              num[n] = n;          srand((unsigned)time(NULL));          int i, j;                for (n = 0; n < size; n++)          {              i = (rand() * RAND_MAX + rand()) % 10000000;              j = (rand() * RAND_MAX + rand()) % 10000000;              swap(num[i], num[j]);          }                for (n = 0; n < size; n++)              fprintf(fp, "%d ", num[n]);          fclose(fp);          return 0;      }      

基于bitset实现方法

#include<iostream>#include<bitset>#include<assert.h>#include<time.h>using namespace std;const int max_each_scan=5000000;int main(){	clock_t begin=clock();	bitset<max_each_scan> bitmap;	bitmap.reset();	FILE* fp_unsort_file=fopen("data.txt","r");	assert(fp_unsort_file);	int num;	while(fscanf(fp_unsort_file,"%d",&num)!=EOF){		if(num<max_each_scan)			bitmap.set(num,1);	}	FILE* fp_sort_file=fopen("sort.txt","w");	assert(fp_sort_file);	int i ;	for(i=0;i<max_each_scan;i++){		if(bitmap[i]==1)			fprintf(fp_sort_file, "%d\n", i);	}	int result=fseek(fp_unsort_file,0,SEEK_SET);	if(result)		cout<<"failed"	else{		bitmap.reset();		while(fscanf(fp_unsort_file,"%d",$num)!=EOF){			if(num>max_each_scan&&num<10000000){				num=num-max_ean_scan;				bitmap.set(num,1);			}		}		for(i=0;i<max_each_scan;i++){			if(bitmap[i]==1){				fprintf(fp_sort_file, "%d\n", max_each_scan+i);			}		}	}	clock_t end=clock();	cout<<"排序用时:"<<endl;  	cout << (end - begin) / CLK_TCK << "s" << endl;      fclose(fp_sort_file);      fclose(fp_unsort_file);  	return 0;  }

 三、海量数据排序实例

    //copyright@ yansha      //July、updated,2011.05.28。      #include <iostream>      #include <string>      #include <algorithm>      #include <time.h>      using namespace std;            int sort_num = 10000000;      int memory_size = 250000;              //每次只对250k个小数据量进行排序      int read_data(FILE *fp, int *space)      {          int index = 0;          while (index < memory_size && fscanf(fp, "%d ", &space[index]) != EOF)              index++;          return index;      }            void write_data(FILE *fp, int *space, int num)      {          int index = 0;          while (index < num)          {              fprintf(fp, "%d ", space[index]);              index++;          }      }            // check the file pointer whether valid or not.      void check_fp(FILE *fp)      {          if (fp == NULL)          {              cout << "The file pointer is invalid!" << endl;              exit(1);          }      }            int compare(const void *first_num, const void *second_num)      {          return *(int *)first_num - *(int *)second_num;      }            string new_file_name(int n)      {          char file_name[20];          sprintf(file_name, "data%d.txt", n);          return file_name;      }            int memory_sort()      {          // open the target file.          FILE *fp_in_file = fopen("data.txt", "r");          check_fp(fp_in_file);          int counter = 0;          while (true)          {              // allocate space to store data read from file.              int *space = new int[memory_size];              int num = read_data(fp_in_file, space);              // the memory sort have finished if not numbers any more.              if (num == 0)                  break;                    // quick sort.              qsort(space, num, sizeof(int), compare);              // create a new auxiliary file name.              string file_name = new_file_name(++counter);              FILE *fp_aux_file = fopen(file_name.c_str(), "w");              check_fp(fp_aux_file);                    // write the orderly numbers into auxiliary file.              write_data(fp_aux_file, space, num);              fclose(fp_aux_file);              delete []space;          }          fclose(fp_in_file);                // return the number of auxiliary files.          return counter;      }            void merge_sort(int file_num)      {          if (file_num <= 0)              return;          // create a new file to store result.          FILE *fp_out_file = fopen("result.txt", "w");          check_fp(fp_out_file);                // allocate a array to store the file pointer.          FILE **fp_array = new FILE *[file_num];          int i;          for (i = 0; i < file_num; i++)          {              string file_name = new_file_name(i + 1);              fp_array[i] = fopen(file_name.c_str(), "r");              check_fp(fp_array[i]);          }                int *first_data = http://www.mamicode.com/new int[file_num];     "%d ", &first_data[i]);          while (true)          {              int index = 0;              while (index < file_num && finish[index])                  index++;                    // the finish condition of the merge sort.              if (index >= file_num)                  break;              //主要的修改在上面两行代码,就是merge sort结束条件。              //要保证所有文件都读完,必须使得finish[0]...finish[40]都为真              //July、yansha,555,2011.05.29。                    int min_data = http://www.mamicode.com/first_data[index];  "%d ", min_data);              if (fscanf(fp_array[index], "%d ", &first_data[index]) == EOF)                  finish[index] = true;          }                fclose(fp_out_file);          delete []finish;          delete []first_data;          for (i = 0; i < file_num; i++)              fclose(fp_array[i]);          delete [] fp_array;      }            int main()      {          clock_t start_memory_sort = clock();          int aux_file_num = memory_sort();          clock_t end_memory_sort = clock();          cout << "The time needs in memory sort: " << end_memory_sort - start_memory_sort << endl;          clock_t start_merge_sort = clock();          merge_sort(aux_file_num);          clock_t end_merge_sort = clock();          cout << "The time needs in merge sort: " << end_merge_sort - start_merge_sort << endl;          system("pause");          return 0;      }  

 
四、多路归并排序

//copyright@ 纯净的天空 && yansha   //5、July,updated,2010.05.28。 //harryshayne,update again。2011.6.30 #include <iostream>  #include <ctime>  #include <fstream>  //#include "ExternSort.h"  using namespace std;    //使用多路归并进行外排序的类  //ExternSort.h    /* * 大数据量的排序 * 多路归并排序 * 以千万级整数从小到大排序为例 * 一个比较简单的例子,没有建立内存缓冲区 */    #ifndef EXTERN_SORT_H  #define EXTERN_SORT_H    #include <cassert>  //#define k 5  #define MIN -1//这里开始的时候出现了一个BUG,如果定义的MIN大于等于待排序的数,则会是算法出现错误#define MAX 10000000//最大值,附加在归并文件结尾typedef int* LoserTree;typedef int* External;class ExternSort  {  public:      void sort()      {          time_t start = time(NULL);                    //将文件内容分块在内存中排序,并分别写入临时文件          k = memory_sort();  //                  //归并临时文件内容到输出文件          //merge_sort(file_count);         ls=new int[k];        b=new int[k+1];                K_Merge();        delete []ls;        delete []b;                  time_t end = time(NULL);          printf("total time:%f\n", (end - start) * 1000.0/ CLOCKS_PER_SEC);      }            //input_file:输入文件名      //out_file:输出文件名      //count: 每次在内存中排序的整数个数      ExternSort(const char *input_file, const char * out_file, int count)      {          m_count = count;          m_in_file = new char[strlen(input_file) + 1];          strcpy(m_in_file, input_file);          m_out_file = new char[strlen(out_file) + 1];          strcpy(m_out_file, out_file);      }      virtual ~ExternSort()      {          delete [] m_in_file;          delete [] m_out_file;      }        private:      int m_count; //数组长度      char *m_in_file;   //输入文件的路径      char *m_out_file; //输出文件的路径         int k;//归并数,此数必须要内排序之后才能得到,所以下面的ls和b都只能定义为指针(注意和书上区别)    LoserTree ls;//定义成为指针,之后动态生成数组    External b;//定义成为指针,在成员函数中可以把它当成数组使用    //int External[k];protected:      int read_data(FILE* f, int a[], int n)      {          int i = 0;          while(i < n && (fscanf(f, "%d", &a[i]) != EOF)) i++;          printf("read:%d integer\n", i);          return i;      }      void write_data(FILE* f, int a[], int n)      {          for(int i = 0; i < n; ++i)              fprintf(f, "%d ", a[i]);          fprintf(f,"%d",MAX);//在最后写上一个最大值    }      char* temp_filename(int index)      {          char *tempfile = new char[100];          sprintf(tempfile, "temp%d.txt", index);          return tempfile;      }      static int cmp_int(const void *a, const void *b)      {          return *(int*)a - *(int*)b;      }            int memory_sort()      {          FILE* fin = fopen(m_in_file, "rt");          int n = 0, file_count = 0;          int *array = new int[m_count];                    //每读入m_count个整数就在内存中做一次排序,并写入临时文件          while(( n = read_data(fin, array, m_count)) > 0)          {              qsort(array, n, sizeof(int), cmp_int);                 //这里,调用了库函数阿,在第四节的c实现里,不再调用qsort。              char *fileName = temp_filename(file_count++);              FILE *tempFile = fopen(fileName, "w");              free(fileName);              write_data(tempFile, array, n);              fclose(tempFile);          }                    delete [] array;          fclose(fin);                    return file_count;      }      void Adjust(int s)    {//沿从叶子节点b[s]到根节点ls[0]的路径调整败者树        int t=(s+k)/2;//ls[t]是b[s]的双亲节点        while(t>0)        {            if(b[s]>b[ls[t]])//如果失败,则失败者位置s留下,s指向新的胜利者            {                int tmp=s;                s=ls[t];                ls[t]=tmp;            }            t=t/2;        }        ls[0]=s;//ls[0]存放调整后的最大值的位置    }    void CreateLoserTree()    {        b[k]=MIN;//额外的存储一个最小值        for(int i=0;i<k;i++)ls[i]=k;//先初始化为指向最小值,这样后面的调整才是正确的                                    //这样能保证非叶子节点都是子树中的“二把手”        for(i=k-1;i>=0;i--)            Adjust(i);//依次从b[k-1],b[k-2]...b[0]出发调整败者树    }    void K_Merge()    {//利用败者数把k个输入归并段归并到输出段中        //b中前k个变量存放k个输入段中当前记录的元素        //归并临时文件          FILE *fout = fopen(m_out_file, "wt");          FILE* *farray = new FILE*[k];          int i;          for(i = 0; i < k; ++i)  //打开所有k路输入文件        {              char* fileName = temp_filename(i);              farray[i] = fopen(fileName, "rt");              free(fileName);          }                    for(i = 0; i < k; ++i)  //初始读取        {              if(fscanf(farray[i], "%d", &b[i]) == EOF)//读每个文件的第一个数到data数组              {                printf("there is no %d file to merge!",k);                return;            }        }      //    for(int i=0;i<k;i++)input(b[i]);        CreateLoserTree();        int q;        while(b[ls[0]]!=MAX)//        {            q=ls[0];//q用来存储b中最小值的位置,同时也对应一路文件            //output(q);            fprintf(fout,"%d ",b[q]);            //input(b[q],q);            fscanf(farray[q],"%d",&b[q]);            Adjust(q);        }        //output(ls[0]);        fprintf(fout,"%d ",b[ls[0]]);        //delete [] hasNext;          //delete [] data;                    for(i = 0; i < k; ++i)  //清理工作        {              fclose(farray[i]);          }          delete [] farray;          fclose(fout);      }    /*    void merge_sort(int file_count)      {          if(file_count <= 0) return;                    //归并临时文件          FILE *fout = fopen(m_out_file, "wt");          FILE* *farray = new FILE*[file_count];          int i;          for(i = 0; i < file_count; ++i)          {              char* fileName = temp_filename(i);              farray[i] = fopen(fileName, "rt");              free(fileName);          }                    int *data = http://www.mamicode.com/new int[file_count];//存储每个文件当前的一个数字  "%d", &data[i]) == EOF)//读每个文件的第一个数到data数组                  hasNext[i] = false;          }                    while(true)  //循环读取和输出,选择最小数的方法是简单遍历选择法        {              //求data中可用的最小的数字,并记录对应文件的索引              int min = data[0];              int j = 0;                            while (j < file_count && !hasNext[j])  //顺序跳过已读取完毕的文件                j++;                            if (j >= file_count)  //没有可取的数字,终止归并                  break;                                          for(i = j +1; i < file_count; ++i)  //选择最小数,这里应该是i=j吧!但结果是一样的!            {                  if(hasNext[i] && min > data[i])                  {                      min = data[i];                      j = i;                  }              }                            if(fscanf(farray[j], "%d", &data[j]) == EOF) //读取文件的下一个元素                  hasNext[j] = false;              fprintf(fout, "%d ", min);                        }                    delete [] hasNext;          delete [] data;                    for(i = 0; i < file_count; ++i)          {              fclose(farray[i]);          }          delete [] farray;          fclose(fout);      }      */};    #endif      //测试主函数文件  /* * 大文件排序 * 数据不能一次性全部装入内存 * 排序文件里有多个整数,整数之间用空格隔开 */    const unsigned int count = 10000000; // 文件里数据的行数  const unsigned int number_to_sort = 100000; //在内存中一次排序的数量  const char *unsort_file = "unsort_data.txt"; //原始未排序的文件名  const char *sort_file = "sort_data.txt"; //已排序的文件名  void init_data(unsigned int num); //随机生成数据文件    int main(int argc, char* *argv)  {      srand(time(NULL));      init_data(count);      ExternSort extSort(unsort_file, sort_file, number_to_sort);      extSort.sort();      system("pause");      return 0;  }    void init_data(unsigned int num)  {      FILE* f = fopen(unsort_file, "wt");      for(int i = 0; i < num; ++i)          fprintf(f, "%d ", rand());      fclose(f);  }

 五、字符串回文结构判断

class Solution{	//http://blog.csdn.net/v_july_v/article/details/6712171public:	    /**       *check weather s is a palindrome, n is the length of string s      *Copyright(C) fairywell 2011      */      bool IsPalindrome(const char *s, int n)      {         if (s == 0 || n < 1) return false; // invalid string         char *front, *back;         front = s; back = s + n - 1; // set front and back to the begin and endof the string         while (front < back) {             if (*front != *back) return false; // not a palindrome             ++front; --back;          }         return true; // check over, it‘s a palindrome            }          /**       *check weather s is a palindrome, n is the length of string s      *Copyright(C) fairywell 2011      */      bool IsPalindrome2(const char *s, int n)      {         if (s == 0 || n < 1) return false; // invalid string         char *first, *second;         int m = ((n>>1) - 1) >= 0 ? (n>>1) - 1 : 0; // m is themiddle point of s             first = s + m; second = s + n - 1 - m;         while (first >= s)                 if (s[first--] !=s[second++]) return false; // not equal, so it‘s not apalindrome         return true; // check over, it‘s a palindrome      }          /**       *find the longest palindrome in a string, n is the length of string s      *Copyright(C) fairywell 2011      */      int LongestPalindrome(const char *s, int n)      {         int i, j, max;         if (s == 0 || n < 1) return 0;         max = 0;         for (i = 0; i < n; ++i) { // i is the middle point of the palindrome             for (j = 0; (i-j >= 0) && (i+j < n); ++j) // if the lengthof the palindrome is odd                 if (s[i-j] != s[i+j]) break;                    if (j*2+1 > max) max = j * 2 + 1;                    for (j = 0; (i-j >= 0) && (i+j+1 < n); ++j) // for theeven case                 if (s[i-j] != s[i+j+1]) break;             if (j*2+2 > max) max = j * 2 + 2;          }         return max;      }          int LongestPalindrome(const char *s, int n)      {          int max = 0;          int i,j;          for (  i = 0; i < n  ; i++ )          {//以i为中心开始计算回文子串              //计算奇数回问子串长度              for (  j = 0; (i-j) >= 0 && (i+j) < n; j++ )              {                  if ( s[i-j] != s[i+j] )                  {                      break;                  }                  else                  {                      max = GETMAX(max, (2 * j  + 1));                  }              }                            //计算偶数回问子串长度              for ( j = 0; (i-j) >= 0 && i + j + 1< n; j++ )              {                  if ( s[i-j] != s[i+j+1])                  {                      break;                  }                  else                  {                      max = GETMAX(max, ( 2 * j  + 2) );                  }              }          }                return max;      }  }

 

Dijkstra算法以及各种海量数据排序算法