首页 > 代码库 > 字符串hash + 二分答案 - 求最长公共子串 --- poj 2774

字符串hash + 二分答案 - 求最长公共子串 --- poj 2774

 Long Long Message

Problem‘s Link:http://poj.org/problem?id=2774


 

Mean: 

 求两个字符串的最长公共子串的长度。

analyse:

 前面在学习后缀数组的时候已经做过一遍了,但是现在主攻字符串hash,再用字符串hash写一遍。

这题的思路是这样的:

1)取较短的串的长度作为high,然后二分答案(每次判断长度为mid=(low+high)>>1是否存在,如果存在就增加下界;不存在就缩小上界);

2)主要是对答案的判断(judge函数)。具体参看代码注释。

Time complexity:O(n)

 

Source code:

 

// Memory   Time// 1347base     0MS// by : Snarl_jsb// 2014-10-04-21.16#include<algorithm>#include<cstdio>#include<cstring>#include<cstdlib>#include<iostream>#include<vector>#include<queue>#include<stack>#include<map>#include<string>#include<climits>#include<cmath>#define ULL unsigned long longusing namespace std;string s1,s2;int l1,l2,seed=131;vector<ULL> hash;bool judge(int x){    hash.clear();    ULL tmp=0;    for (int i = 0; i < x; i++)    {        tmp=tmp* seed + s1[i];    }    hash.push_back(tmp);    ULL base =1;    for (int i = 1; i < x; i++)    {        base *= seed;    }    for (int i = x; i < l1; i++)    {        tmp=(tmp*seed+s1[i])-base*s1[i-x]*seed;        hash.push_back(tmp);    }    sort(hash.begin(),hash.end());    ULL hashval = 0;    for (int i = 0; i < x; i++)    {        hashval = hashval * seed + s2[i];    }    if (binary_search(hash.begin(),hash.end(),hashval))        return 1;    for (int i = x; i < l2; i++)    {        hashval = (hashval-(s2[i-x])*base)*seed+s2[i];        if (binary_search(hash.begin(),hash.end(),hashval))            return 1;    }    return 0;}int main(){    while (cin>>s1>>s2)    {        l1=s1.size();        l2=s2.size();        int ans = 0;        int high = min(l1,l2);        int low = 0;        while (low <= high)        {            int mid = (low+high)>>1;            if (judge(mid))            {                ans = mid;                low = mid+1;            }            else                high = mid-1;        }        printf("%d\n",ans);    }    return 0;}

注释代码:

// Memory   Time// 1347k     0MS// by : Snarl_jsb// 2014-10-04-21.16#include<algorithm>#include<cstdio>#include<cstring>#include<cstdlib>#include<iostream>#include<vector>#include<queue>#include<stack>#include<map>#include<string>#include<climits>#include<cmath>#define ULL unsigned long longusing namespace std;string s1,s2;int l1,l2,seed=131;vector<ULL> hash;bool judge(int x){    hash.clear();//多组数据时不要忘了清空全局数组    //构造s1串的hash表    ULL tmp=0;    for (int i = 0; i < x; i++)    {        tmp=tmp* seed + s1[i];    }    hash.push_back(tmp);    ULL base =1;    for (int i = 1; i < x; i++)//求出到达x的base值    {        base *= seed;    }    for (int i = x; i < l1; i++)    {        tmp=(tmp*seed+s1[i])-base*s1[i-x]*seed;        hash.push_back(tmp);    }    //构造完毕    sort(hash.begin(),hash.end()); //二分查找加速,必需先排序    ULL hashval = 0;    for (int i = 0; i < x; i++)//求出s2串0到x的hash值    {        hashval = hashval * seed + s2[i];    }    if (binary_search(hash.begin(),hash.end(),hashval))//查找s2串0到x的hash值是否在s1串的hash表中        return 1;    for (int i = x; i < l2; i++)//如果上面的s2串0到x的hash值未匹配成功,这儿接着匹配s2串长度为x的hash值是否出现在s1串的hash表中    {        hashval = hashval*seed+s2[i]-s2[i-x]*base*seed;        if (binary_search(hash.begin(),hash.end(),hashval))            return 1;    }    return 0;}int main(){    while (cin>>s1>>s2)    {        l1=s1.size();        l2=s2.size();        int ans = 0;        int low=0,high = min(l1,l2);        while (low <= high)//二分答案        {            int mid = (low+high)>>1;            if (judge(mid))//判断答案是否可行            {                ans = mid;                low = mid+1;            }            else                high = mid-1;        }        printf("%d\n",ans);    }    return 0;}    

  

字符串hash + 二分答案 - 求最长公共子串 --- poj 2774