首页 > 代码库 > uva 10526 - Intellectual Property(后缀数组)

uva 10526 - Intellectual Property(后缀数组)

题目链接:uva 10526 - Intellectual Property

题目大意:给定两个文本,问说下面一个文本中在哪些位置上抄袭了上面个一个文本的,输出n个抄袭位置(不足n个情况全部输出),按照长度优先输出,长度相同的输出位置靠前的。
注意:空格,回车都算一个字符;一段字符只能是抄袭上面的一部分,比如上:NSB*SB 下:NSB 答案:NSB。

解题思路:将两个文本连接在一起,中间用没有出现的字符分割,然后处理处后缀数组,根据height数组的性质,求出哪些位置匹配的长度不为0(注意匹配的位置为后面一段跟前面一段的最大长度),跟据分割符的下标就可以区别前后文本。然后将覆盖的情况处理掉排序输出。

#include <cstdio>
#include <cstring>
#include <vector>
#include <algorithm>

using namespace std;
const int maxn = 200005;

struct state {
    int pos, len;
    state (int pos = 0, int len = 0) {
        this->pos = pos;
        this->len = len;
    }
};

struct Suffix_Arr {
    int n, s[maxn];
    int SA[maxn], rank[maxn], height[maxn];
    int tmp_one[maxn], tmp_two[maxn], c[maxn];

    void init();
    void add(char* str);
    void build_arr(int m);
    void get_height();
    void solve (int p);
}AC;

inline bool sort_len (const state& a, const state& b) {
    if (a.len != b.len)
        return a.len > b.len;
    return a.pos < b.pos;
}

inline bool sort_pos (const state& a, const state& b) {
    if (a.pos != b.pos)
        return a.pos < b.pos;
    return a.len > b.len;
}

int N, tick;
char str[maxn];

void init () {
    AC.init();

    gets(str);
    while (gets(str) && strcmp(str, "END TDP CODEBASE"))
        AC.add(str);

    AC.s[AC.n++] = 260;
    tick = AC.n;

    gets(str);
    while (gets(str) && strcmp(str, "END JCN CODEBASE"))
        AC.add(str);

    AC.s[AC.n++] = 0;

    AC.build_arr(261);
    AC.get_height();
}

int main () {
    int cas = 0;
    while (~scanf("%d%*c", &N) && N) {

        init();
        if (cas)
            printf("\n");

        printf("CASE %d\n", ++cas);
        AC.solve(tick);
    }
    return 0;
}

void Suffix_Arr::solve(int p) {

    int k = 0;
    memset(c, 0, sizeof(c));

    for (int i = 0; i < n; i++) {
        if (SA[i] < p)
            k = height[i+1];
        else {
            k = min(height[i], k);
            c[i] = max(c[i], k);
        }
    }

    k = 0;
    for (int i = n - 1; i >= 0; i--) {
        if (SA[i] < p) {
            k = height[i];
        } else {
            c[i] = max(c[i], k);
            k = min(height[i], k);
        }
    }

    vector<state> vec, ans;
    for (int i = 0; i < n; i++) {
        if (c[i] >= 1)
            vec.push_back(state(SA[i], c[i]));
    }

    sort(vec.begin(), vec.end(), sort_pos);
    int mv = -1;
    for (int i = 0; i < vec.size(); i++) {
        if (vec[i].pos + vec[i].len <= mv)
            continue;
        ans.push_back(vec[i]);
        mv = vec[i].pos + vec[i].len;
    }

    sort(ans.begin(), ans.end(), sort_len);
    for (int i = 0; i < N && i < ans.size(); i++) {
        printf("INFRINGING SEGMENT %d LENGTH %d POSITION %d\n", i+1, ans[i].len, ans[i].pos - p); 
        for (int j = 0; j < ans[i].len; j++)
            printf("%c", s[ans[i].pos + j]);
        printf("\n");
    }
}

void Suffix_Arr::init() {
    n = 0;
    memset(s, 0, sizeof(s));
    memset(height, 0, sizeof(height));
}

void Suffix_Arr::add (char *str) {
    int len = strlen(str);
    for (int i = 0; i < len; i++)
        s[n++] = str[i];
    s[n++] = ‘\n‘;
}

void Suffix_Arr::get_height () {
    for (int i = 0; i < n; i++)
        rank[SA[i]] = i;

    int mv = 0;
    for (int i = 0; i < n - 1; i++) {
        if (mv) mv--;

        int j = SA[rank[i]-1];
        while (s[i+mv] == s[j+mv])
            mv++;
        height[rank[i]] = mv;
    }
}

void Suffix_Arr::build_arr(int m) {
    int *x = tmp_one, *y = tmp_two;

    for (int i = 0; i < m; i++) c[i] = 0;
    for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
    for (int i = 1; i < m; i++) c[i] += c[i-1];
    for (int i = n-1; i >= 0; i--) SA[--c[x[i]]] = i;

    for (int k = 1; k <= n; k <<= 1) {

        int mv = 0;
        for (int i = n - k; i < n; i++) y[mv++] = i;
        for (int i = 0; i < n; i++) if (SA[i] >= k)
            y[mv++] = SA[i] - k;

        for (int i = 0; i < m; i++) c[i] = 0;
        for (int i = 0; i < n; i++) c[x[y[i]]]++;
        for (int i = 1; i < m; i++) c[i] += c[i-1];
        for (int i = n - 1; i >= 0; i--) SA[--c[x[y[i]]]] = y[i];

        swap(x, y);
        mv = 1;
        x[SA[0]] = 0;

        for (int i = 1; i < n; i++)
            x[SA[i]] = (y[SA[i-1]] == y[SA[i]] && y[SA[i-1] + k] == y[SA[i] + k] ? mv - 1 : mv++);

        if (mv >= n)
            break;
        m = mv;
    }
}

uva 10526 - Intellectual Property(后缀数组)