首页 > 代码库 > 后缀数组学习笔记
后缀数组学习笔记
后缀数组的用处:快速求出两个后缀Suffix(i), Suffix(j)的最长公共前缀(LCP, Longest Common Perfix)
以下一张图片可谓简洁明了。
下面贴上模板
1.求最长重复子串,可以重叠
void solve_duplicate_substr(int n){//duplicate available
2.求最长重复子串,不可重叠
void slove_update_duplicate_substr(int n){//duplicate unavailable
Source Code:
1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler 2 #include <stdio.h> 3 #include <iostream> 4 #include <fstream> 5 #include <cstring> 6 #include <cmath> 7 #include <stack> 8 #include <string> 9 #include <map> 10 #include <set> 11 #include <list> 12 #include <queue> 13 #include <vector> 14 #include <algorithm> 15 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 16 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 17 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 18 #define MOD 1000000007 19 #define pi acos(-1.0) 20 21 using namespace std; 22 23 typedef long long ll ; 24 typedef unsigned long long ull ; 25 typedef unsigned int uint ; 26 typedef unsigned char uchar ; 27 28 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 29 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 30 31 const double eps = 1e-7 ; 32 const int N = 1 ; 33 const int M = 200000 ; 34 const ll P = 10000000097ll ; 35 const int INF = 0x3f3f3f3f ; 36 37 int a[M], sa[M], h[M], rank[M]; 38 39 void radix(int *str, int *a, int *b, int n, int m){ 40 static int count[M]; 41 int i; 42 memset(count, 0, sizeof(count)); 43 for(i = 0; i < n; ++i) ++count[str[a[i]]]; 44 for(i = 1; i <= m; ++i) count[i] += count[i - 1]; 45 for(i = n - 1; i >= 0; --i) b[--count[str[a[i]]]] = a[i]; 46 } 47 48 void suffix_array(int *str, int *sa, int n, int m){ 49 static int a[M], b[M]; 50 int i, j; 51 for(i = 0; i < n; ++i) rank[i] = i; 52 radix(str, rank, sa, n, m); 53 54 rank[sa[0]] = 0; 55 for(i = 1; i < n; ++i) rank[sa[i]] = rank[sa[i - 1]] + (str[sa[i]] != str[sa[i - 1]]); 56 for(i = 0; 1<<i < n; ++i){ 57 for(j = 0; j < n; ++j){ 58 a[j] = rank[j] + 1; 59 b[j] = j + (1 << i) >= n ? 0 : rank[j + (1 << i)] + 1; 60 sa[j] = j; 61 } 62 radix(b, sa, rank, n, n); 63 radix(a, rank, sa, n, n); 64 rank[sa[0]] = 0; 65 for(j = 1; j < n; ++j){ 66 rank[sa[j]] = rank[sa[j - 1]] + (a[sa[j - 1]] != a[sa[j]] || b[sa[j - 1]] != b[sa[j]]); 67 } 68 } 69 } 70 71 void calc_height(int *str, int *sa, int *h, int n){ 72 int i, k = 0; 73 h[0] = 0; 74 for(i = 0; i < n; ++i){ 75 k = k == 0 ? 0 : k - 1; 76 if(rank[i] != 0){ 77 while(str[i + k] == str[sa[rank[i] - 1] + k]){ 78 ++k; 79 } 80 } 81 h[rank[i]] = k; 82 } 83 } 84 85 void solve_duplicate_substr(int n){//duplicate available 86 int i, j, pos, ans = 0; 87 for(i = 0; i < n; ++i){ 88 if(h[rank[i]] > ans){ 89 ans = h[rank[i]]; 90 pos = i; 91 } 92 } 93 for(i = pos; i < pos + ans; ++i){ 94 printf("%c", a[i]); 95 } 96 printf("\n"); 97 } 98 99 void slove_update_duplicate_substr(int n){//duplicate unavailable100 int i, j;101 int low = 1, high = n;102 int ans = 0, pos1 = 0, pos2 = 0;103 while(low <= high){104 int mid = (low + high) / 2;105 bool flag = false;106 for(i = 0; i < n; ){107 j = i + 1;108 int minPos = sa[i], maxPos = sa[i];109 while(j < n && h[j] >= mid){110 minPos = min(minPos, sa[j]);111 maxPos = max(maxPos, sa[j]);112 ++j;113 }114 if(maxPos - minPos >= mid){115 flag = true;116 if(mid > ans){117 ans = mid;118 pos1 = minPos;119 pos2 = maxPos;120 }121 break;122 }123 i = j;124 }125 if(flag) low = mid + 1;126 else high = mid - 1;127 }128 for(i = pos1; i < pos1 + ans; ++i){129 printf("%c", a[i]);130 }131 printf("\n");132 }133 134 int main(){135 int i, j, t, n, m, k;136 string str;137 while(cin >> str){138 copy(str.begin(), str.end(), a);139 n = str.length();140 suffix_array(a, sa, n, 256);141 calc_height(a, sa, h, n);142 solve_duplicate_substr(n);143 slove_update_duplicate_substr(n);144 }145 return 0;146 }
后缀数组学习笔记
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。