首页 > 代码库 > UVA 11557 - Code Theft (KMP + HASH)

UVA 11557 - Code Theft (KMP + HASH)

UVA 11557 - Code Theft

题目链接

题意:给定一些代码文本,然后在给定一个现有文本,找出这个现有文本和前面代码文本,重复连续行最多的这些文本

思路:把每一行hash成一个值,然后对于每一个文本计算最大匹配值,枚举后缀,然后利用KMP去找即可

代码:

#include <cstdio>
#include <cstring>
#include <vector>
#include <string>
#include <iostream>
#include <vector>
using namespace std;

typedef unsigned long long ull;

const ull X = 123;
const int N = 105;

int n, next[1000005];
string name[N];
string s;
vector<int> ans;
vector<ull> code[N];

void hash(string s, int u) {
    string ss = "";
    int l = 0, r = s.length() - 1, len = s.length();;
    while (s[l] == ' ' && l < len) l++;
    while (s[r] == ' ' && r >= 0) r--;
    for (int i = l; i <= r; i++) {
	ss += s[i];
	while (s[i] == ' ' && s[i + 1] == ' ' && i < r) i++;
    }
    if (ss == "") return;
    ull ans = 0;
    for (int i = ss.length() - 1; i >= 0; i--)
	ans = ans * X + ss[i];
    code[u].push_back(ans);
}

void build(int i) {
    code[i].clear();
    while (getline(cin, s) && s != "***END***") {
	hash(s, i);
    }
}

vector<ull> T;

void getnext() {
    int N = T.size();
    next[0] = next[1] = 0;
    int j = 0;
    for (int i = 2 ; i <= N; i++) {
	while (j && T[i - 1] != T[j]) j = next[j];
	if (T[i - 1] == T[j]) j++;
	next[i] = j;
    }
}

int find() {
    int ans = 0;
    int N = code[n].size(), m = T.size(), j = 0;
    for (int i = 0; i < N; i++) {
	while (j && code[n][i] != T[j]) j = next[j];
	if (code[n][i] == T[j]) j++;
	ans = max(ans, j);
	if (j == m)
	    return m;
    }
    return ans;
}

int cal(int u) {
    int ans = 0;
    int sz1 = code[u].size();
    for (int i = 0; i < sz1; i++) {
	T.clear();
	for (int j = i; j < sz1; j++)
	    T.push_back(code[u][j]);
	getnext();
	ans = max(ans, find());
    }
    return ans;
}

void solve() {
    int Max = 0;
    ans.clear();
    for (int i = 0; i < n; i++) {
	int tmp = cal(i);
	if (tmp > Max) {
	    Max = tmp;
	    ans.clear();
	    ans.push_back(i);
	}
	else if (tmp == Max) ans.push_back(i);
    }
    cout << Max;
    if (Max) {
	for (int i = 0; i < ans.size(); i++)
	    cout << " " << name[ans[i]];
    }
    cout << endl;
}

int main() {
    while (cin >> n) {
	getchar();
	for (int i = 0; i < n; i++) {
	    getline(cin, name[i]);
	    build(i);
	}
	build(n);
	solve();
    }
    return 0;
}


UVA 11557 - Code Theft (KMP + HASH)