首页 > 代码库 > poj 2778 DNA Sequence(AC自动机+矩阵快速幂)

poj 2778 DNA Sequence(AC自动机+矩阵快速幂)

题目链接:poj 2778 DNA Sequence

题目大意:给定一些含有疾病的DNA序列,现在给定DNA长度,问有多少种不同的DNA序列是健康的。

解题思路:对DNA片段建立AC自动机,因为最多10个串,每个串最长为10,所以最多可能有100个节点,在长度为n时

以每个节点终止的健康字符串个数形成一个状态集,通过AC自动机形成的边可以推导出n+1的状态集,走到单词节点是

非法的,所以同样的我们可以先走到单词节点,但是从单词节点不向后转移。这样可以构造一个矩阵,剩下的就是矩阵

快速幂。注意的一点是,因为是AC自动机,所以单词节点也包括last非空的节点。

#include <cstdio>
#include <cstring>
#include <queue>
#include <vector>
#include <iostream>
#include <algorithm>

using namespace std;

const int maxn = 105;
const int sigma_size = 4;
const int mod = 100000;
typedef long long ll;

struct Mat {
    int r, c;
    ll s[maxn][maxn];
    Mat (int r = 0, int c = 0) {
        init(r, c);
    }
    void init (int r, int c) {
        this->r = r;
        this->c = c;
        memset(s, 0, sizeof(s));
    }
    friend Mat operator * (const Mat& a, const Mat& b);
}tmp;

Mat operator * (const Mat& a, const Mat& b) {
    tmp.init(a.r, b.c);
    for (int k = 0; k < a.c; k++) {
        for (int i = 0; i < a.r; i++)
            for (int j = 0; j < b.c; j++)
                tmp.s[i][j] = (tmp.s[i][j] + a.s[i][k] * b.s[k][j]) % mod;
    }
    return tmp;
}

struct Aho_Corasick {
    int sz, g[maxn][sigma_size];
    int tag[maxn], fail[maxn], last[maxn];

    void init();
    int idx(char ch);
    void insert(char* str, int k);
    void getFail();
    void match(char* str);
    void put(int x, int y);
    Mat solve();
}A;

int M, N;
char w[15];

int pow_mat(Mat x, int n) {

    Mat ans(x.r, 1);
    ans.s[0][0] = 1;

    while (n) {
        if (n&1)
            ans = x * ans;
        x = x * x;
        n >>= 1;
    }
    int ret = 0;
    for (int i = 0; i < A.sz; i++) {
        if (A.tag[i] || A.last[i])
            continue;
        ret = (ret + ans.s[i][0]) % mod;
    }
    return ret;
}

int main () {
    while (scanf("%d%d", &M, &N) == 2) {
        A.init();
        for (int i = 1; i <= M; i++) {
            scanf("%s", w);
            A.insert(w, i);
        }
        Mat u = A.solve();
        printf("%d\n", pow_mat(u, N));
    }
    return 0;
}

Mat Aho_Corasick::solve() {
    getFail();
    Mat u(sz, sz);

    for (int i = 0; i < sz; i++) {
        if (tag[i] || last[i])
            continue;

        for (int j = 0; j < 4; j++) {
            int t = i;
            while (t && g[t][j] == 0)
                t = fail[t];

            t = g[t][j];
            u.s[t][i]++;
        }
    }
    return u;
}

void Aho_Corasick::init() {
    sz = 1;
    tag[0] = 0;
    memset(g[0], 0, sizeof(g[0]));
}

int Aho_Corasick::idx(char ch) {
    switch (ch) {
        case ‘A‘:
            return 0;
        case ‘C‘:
            return 1;
        case ‘G‘:
            return 2;
        case ‘T‘:
            return 3;
    }
    return 4;
}

void Aho_Corasick::put(int x, int y) {
}

void Aho_Corasick::insert(char* str, int k) {
    int u = 0, n = strlen(str);

    for (int i = 0; i < n; i++) {
        int v = idx(str[i]);
        if (g[u][v] == 0) {
            tag[sz] = 0;
            memset(g[sz], 0, sizeof(g[sz]));
            g[u][v] = sz++;
        }
        u = g[u][v];
    }
    tag[u] = k;
}

void Aho_Corasick::match(char* str) {
    int n = strlen(str), u = 0;
    for (int i = 0; i < n; i++) {
        int v = idx(str[i]);
        while (u && g[u][v] == 0)
            u = fail[u];

        u = g[u][v];

        if (tag[u])
            put(i, u);
        else if (last[u])
            put(i, last[u]);
    }
}

void Aho_Corasick::getFail() {
    queue<int> que;

    for (int i  = 0; i < sigma_size; i++) {
        int u = g[0][i];
        if (u) {
            fail[u] = last[u] = 0;
            que.push(u);
        }
    }

    while (!que.empty()) {
        int r = que.front();
        que.pop();

        for (int i = 0; i < sigma_size; i++) {
            int u = g[r][i];

            if (u == 0) {
                g[r][i] = g[fail[r]][i];
                continue;
            }

            que.push(u);
            int v = fail[r];
            while (v && g[v][i] == 0)
                v = fail[v];

            fail[u] = g[v][i];
            last[u] = tag[fail[u]] ? fail[u] : last[fail[u]];
        }
    }
}

poj 2778 DNA Sequence(AC自动机+矩阵快速幂)