首页 > 代码库 > 后缀数组学习笔记

后缀数组学习笔记

后缀数组的用处:快速求出两个后缀Suffix(i), Suffix(j)的最长公共前缀(LCP, Longest Common Perfix)

 

以下一张图片可谓简洁明了。

技术分享

 

下面贴上模板

1.求最长重复子串,可以重叠

void solve_duplicate_substr(int n){//duplicate available

2.求最长重复子串,不可重叠

void slove_update_duplicate_substr(int n){//duplicate unavailable

  

 Source Code:

  1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler  2 #include <stdio.h>  3 #include <iostream>  4 #include <fstream>  5 #include <cstring>  6 #include <cmath>  7 #include <stack>  8 #include <string>  9 #include <map> 10 #include <set> 11 #include <list> 12 #include <queue> 13 #include <vector> 14 #include <algorithm> 15 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 16 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 17 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 18 #define MOD 1000000007 19 #define pi acos(-1.0) 20  21 using namespace std; 22  23 typedef long long           ll      ; 24 typedef unsigned long long  ull     ; 25 typedef unsigned int        uint    ; 26 typedef unsigned char       uchar   ; 27  28 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 29 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 30  31 const double eps = 1e-7      ; 32 const int N = 1              ; 33 const int M = 200000         ; 34 const ll P = 10000000097ll   ; 35 const int INF = 0x3f3f3f3f   ; 36  37 int a[M], sa[M], h[M], rank[M]; 38  39 void radix(int *str, int *a, int *b, int n, int m){ 40     static int count[M]; 41     int i; 42     memset(count, 0, sizeof(count)); 43     for(i = 0; i < n; ++i)  ++count[str[a[i]]]; 44     for(i = 1; i <= m; ++i) count[i] += count[i - 1]; 45     for(i = n - 1; i >= 0; --i) b[--count[str[a[i]]]] = a[i]; 46 } 47  48 void suffix_array(int *str, int *sa, int n, int m){ 49     static int a[M], b[M]; 50     int i, j; 51     for(i = 0; i < n; ++i)  rank[i] = i; 52     radix(str, rank, sa, n, m); 53  54     rank[sa[0]] = 0; 55     for(i = 1; i < n; ++i)  rank[sa[i]] = rank[sa[i - 1]] + (str[sa[i]] != str[sa[i - 1]]); 56     for(i = 0; 1<<i < n; ++i){ 57         for(j = 0; j < n; ++j){ 58             a[j] = rank[j] + 1; 59             b[j] = j + (1 << i) >= n ? 0 : rank[j + (1 << i)] + 1; 60             sa[j] = j; 61         } 62         radix(b, sa, rank, n, n); 63         radix(a, rank, sa, n, n); 64         rank[sa[0]] = 0; 65         for(j = 1; j < n; ++j){ 66             rank[sa[j]] = rank[sa[j - 1]] + (a[sa[j - 1]] != a[sa[j]] || b[sa[j - 1]] != b[sa[j]]); 67         } 68     } 69 } 70  71 void calc_height(int *str, int *sa, int *h, int n){ 72     int i, k = 0; 73     h[0] = 0; 74     for(i = 0; i < n; ++i){ 75         k = k == 0 ? 0 : k - 1; 76         if(rank[i] != 0){ 77             while(str[i + k] == str[sa[rank[i] - 1] + k]){ 78                 ++k; 79             } 80         } 81         h[rank[i]] = k; 82     } 83 } 84  85 void solve_duplicate_substr(int n){//duplicate available 86     int i, j, pos, ans = 0; 87     for(i = 0; i < n; ++i){ 88         if(h[rank[i]] > ans){ 89             ans = h[rank[i]]; 90             pos = i; 91         } 92     } 93     for(i = pos; i < pos + ans; ++i){ 94         printf("%c", a[i]); 95     } 96     printf("\n"); 97 } 98  99 void slove_update_duplicate_substr(int n){//duplicate unavailable100     int i, j;101     int low = 1, high = n;102     int ans = 0, pos1 = 0, pos2 = 0;103     while(low <= high){104         int mid = (low + high) / 2;105         bool flag = false;106         for(i = 0; i < n; ){107             j = i + 1;108             int minPos = sa[i], maxPos = sa[i];109             while(j < n && h[j] >= mid){110                 minPos = min(minPos, sa[j]);111                 maxPos = max(maxPos, sa[j]);112                 ++j;113             }114             if(maxPos - minPos >= mid){115                 flag = true;116                 if(mid > ans){117                     ans = mid;118                     pos1 = minPos;119                     pos2 = maxPos;120                 }121                 break;122             }123             i = j;124         }125         if(flag)    low = mid + 1;126         else    high = mid - 1;127     }128     for(i = pos1; i < pos1 + ans; ++i){129         printf("%c", a[i]);130     }131     printf("\n");132 }133 134 int main(){135     int i, j, t, n, m, k;136     string str;137     while(cin >> str){138         copy(str.begin(), str.end(), a);139         n = str.length();140         suffix_array(a, sa, n, 256);141         calc_height(a, sa, h, n);142         solve_duplicate_substr(n);143         slove_update_duplicate_substr(n);144     }145     return 0;146 }

 

后缀数组学习笔记