首页 > 代码库 > 《Cracking the Coding Interview》——第18章:难题——题目13

《Cracking the Coding Interview》——第18章:难题——题目13

2014-04-29 04:40

题目:给定一个字母组成的矩阵,和一个包含一堆单词的词典。请从矩阵中找出一个最大的子矩阵,使得从左到右每一行,从上到下每一列组成的单词都包含在词典中。

解法:O(n^3)级别的时间和空间进行动态规划。这道题目和第17章的最后一题很像,由于这题的时间复杂度实在是高,我动手写了字典树进行加速。如果单纯用哈希表来作为词典,查询效率实际会达到O(n)级别,导致最终的算法复杂度为O(n^4)。用字典树则可以加速到O(n^3),因为对于一个字符串“abcd”,只需要从字典树的根节点出发往下走,就可以一次性判断“a”、“ab”、“abc”、“abcd”是否包含在字典里,而用哈希表无法做到这样的效率。动态规划过程仍然是O(n^4)级别,字典树只是将查单词的过程中进行了加速,从O(n^4)加速到了O(n^3)。空间复杂度为O(n^3),用于标记横向和纵向的每一个子段是否包含在字典里。

代码:

  1 // 18.13 There is a matrix of lower case letters. Given a dictionary of words, you have to find the maximum subrectangle, such that every row reading from left to right, and every column reading from top to bottom is a word in the dictionary. Return the area as the result.
  2 #include <iostream>
  3 #include <string>
  4 #include <unordered_set>
  5 #include <vector>
  6 using namespace std;
  7 
  8 const int MAX_NODE = 10000;
  9 
 10 struct TrieNode {
 11     bool is_word;
 12     char ch;
 13     vector<int> child;
 14     TrieNode(char _ch = 0): is_word(false), ch(_ch), child(vector<int>(26, -1)) {};
 15 };
 16 int node_count;
 17 TrieNode nodes[MAX_NODE];
 18 
 19 void insertWordIntoTrie(int root, const string &s)
 20 {
 21     int len = s.length();
 22     
 23     for (int i = 0; i < len; ++i) {
 24         if (nodes[root].child[s[i] - a] < 0) {
 25             nodes[root].child[s[i] - a] = node_count++;
 26             root = nodes[root].child[s[i] - a];
 27             nodes[root].ch = s[i];
 28         } else {
 29             root = nodes[root].child[s[i] - a];
 30         }
 31         if (i == len - 1) {
 32             nodes[root].is_word = true;
 33         }
 34     }
 35 }
 36 
 37 int constructTrie(const unordered_set<string> &dict)
 38 {
 39     int root = node_count++;
 40     
 41     unordered_set<string>::const_iterator usit;
 42     for (usit = dict.begin(); usit != dict.end(); ++usit) {
 43         insertWordIntoTrie(root, *usit);
 44     }
 45     
 46     return root;
 47 }
 48 
 49 int main()
 50 {
 51     int i, j, k;
 52     int i1, i2;
 53     string s;
 54     int n, m;
 55     vector<vector<char> > matrix;
 56     vector<vector<vector<bool> > > is_row_word;
 57     vector<vector<vector<bool> > > is_col_word;
 58     vector<int> dp;
 59     unordered_set<string> dict;
 60     int root;
 61     int ptr;
 62     int max_area;
 63     
 64     while (cin >> n && n > 0) {
 65         node_count = 0;
 66         for (i = 0; i < n; ++i) {
 67             cin >> s;
 68             dict.insert(s);
 69         }
 70         root = constructTrie(dict);
 71         
 72         cin >> n >> m;
 73         
 74         matrix.resize(n);
 75         for (i = 0; i < n; ++i) {
 76             matrix[i].resize(m);
 77         }
 78         for (i = 0; i < n; ++i) {
 79             cin >> s;
 80             for (j = 0; j < m; ++j) {
 81                 matrix[i][j] = s[j];
 82             }
 83         }
 84         
 85         is_row_word.resize(n);
 86         for (i = 0; i < n; ++i) {
 87             is_row_word[i].resize(m);
 88             for (j = 0; j < m; ++j) {
 89                 is_row_word[i][j].resize(m);
 90             }
 91         }
 92 
 93         is_col_word.resize(m);
 94         for (i = 0; i < m; ++i) {
 95             is_col_word[i].resize(n);
 96             for (j = 0; j < n; ++j) {
 97                 is_col_word[i][j].resize(n);
 98             }
 99         }
100         
101         for (i = 0; i < n; ++i) {
102             for (j = 0; j < m; ++j) {
103                 ptr = root;
104                 for (k = j; k < m; ++k) {
105                     if (ptr < 0) {
106                         is_row_word[i][j][k] = false;
107                         continue;
108                     }
109                     
110                     ptr = nodes[ptr].child[matrix[i][k] - a];
111                     if (ptr < 0) {
112                         is_row_word[i][j][k] = false;
113                         continue;
114                     }
115                     
116                     is_row_word[i][j][k] = nodes[ptr].is_word;
117                 }
118             }
119         }
120         
121         for (i = 0; i < m; ++i) {
122             for (j = 0; j < n; ++j) {
123                 ptr = root;
124                 for (k = j; k < n; ++k) {
125                     if (ptr < 0) {
126                         is_col_word[i][j][k] = false;
127                         continue;
128                     }
129                     
130                     ptr = nodes[ptr].child[matrix[k][i] - a];
131                     if (ptr < 0) {
132                         is_col_word[i][j][k] = false;
133                         continue;
134                     }
135                     
136                     is_col_word[i][j][k] = nodes[ptr].is_word;
137                 }
138             }
139         }
140         
141         max_area = 0;
142         dp.resize(m);
143         for (i1 = 0; i1 < n; ++i1) {
144             for (i2 = i1; i2 < n; ++i2) {
145                 k = 0;
146                 for (j = 0; j < m; ++j) {
147                     dp[j] = is_col_word[j][i1][i2] ? (++k) : (k = 0);
148                     if (!dp[j]) {
149                         continue;
150                     }
151                     
152                     for (i = i1; i <= i2; ++i) {
153                         if (!is_row_word[i][j - dp[j] + 1][j]) {
154                             break;
155                         }
156                     }
157                     if (i > i2) {
158                         max_area = max(max_area, (i2 - i1 + 1) * dp[j]);
159                     }
160                 }
161             }
162         }
163         
164         cout << max_area << endl;
165         
166         // clear up data
167         dict.clear();
168         for (i = 0; i < n; ++i) {
169             matrix[i].clear();
170         }
171         matrix.clear();
172         
173         for (i = 0; i < n; ++i) {
174             for (j = 0; j < m; ++j) {
175                 is_row_word[i][j].clear();
176             }
177             is_row_word[i].clear();
178         }
179         is_row_word.clear();
180         
181         for (i = 0; i < m; ++i) {
182             for (j = 0; j < n; ++j) {
183                 is_col_word[i][j].clear();
184             }
185             is_col_word[i].clear();
186         }
187         is_col_word.clear();
188     }
189     
190     return 0;
191 }