首页 > 代码库 > sphinx配置增量索引和索引合并
sphinx配置增量索引和索引合并
配置增量索引
1,配置csft.conf文件。
其中base为父类,scr1和tmp_src1都是他的子类,相应配置如下。
searchd{
listen = 9312
listen = 9306:mysql41
read_timeout =5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd.pid
log = /usr/local/coreseek/var/log/searchd.log
query_log = /usr/local/coreseek/var/log/query.log
binlog_path =
}
#全局配置
source base
{
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass =
sql_db = test
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query =
}
source src1: base
{
sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content FROM documents
sql_attr_uint = id
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_field_string = title
sql_field_string = content
sql_query_info_pre = SET NAMES utf8
}
index src1{
source = src1
path = /usr/local/coreseek/var/data/test1
docinfo = extern
mlock =0
morphology = none
min_word_len =1
html_strip =0
#index_sp = 1
charset_type = zh_cn.utf-8
charset_dictpath = /usr/local/mmseg3/etc/
}
source tmp_src1 : base
{
sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content FROM documents WHERE id>( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
sql_attr_uint = id
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_field_string = title
sql_field_string = content
}
index tmp_src1{
source = tmp_src1
path = /usr/local/coreseek/var/data/tmp_src1
docinfo = extern
mlock =0
morphology = none
min_word_len =1
html_strip =0
charset_type = zh_cn.utf-8
charset_dictpath = /usr/local/mmseg3/etc/
}
对应的sql文件为:
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for documents
-- ----------------------------
DROP TABLE IF EXISTS `documents`;
CREATE TABLE `documents` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`group_id` int(11) NOT NULL,
`date_added` datetime NOT NULL,
`title` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`content` text NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Records of documents
-- ----------------------------
INSERT INTO `documents` VALUES (‘1‘, ‘1‘, ‘2017-06-06 21:45:58‘, ‘中国‘, ‘中国真美,地大物博‘);
INSERT INTO `documents` VALUES (‘2‘, ‘1‘, ‘2017-06-06 21:45:58‘, ‘中国美食‘, ‘台北小吃,各地美食‘);
INSERT INTO `documents` VALUES (‘3‘, ‘2‘, ‘2017-06-06 21:45:58‘, ‘美女之家‘, ‘美女之国‘);
INSERT INTO `documents` VALUES (‘4‘, ‘2‘, ‘2017-06-06 21:45:58‘, ‘hello‘, ‘this is to test groups‘);
INSERT INTO `documents` VALUES (‘5‘, ‘3‘, ‘2017-07-27 17:00:09‘, ‘熊猫‘, ‘中国国宝‘);
INSERT INTO `documents` VALUES (‘6‘, ‘3‘, ‘2017-07-14 17:00:04‘, ‘竹子‘, ‘熊猫吃竹子‘);
INSERT INTO `documents` VALUES (‘7‘, ‘4‘, ‘2017-07-14 17:30:36‘, ‘猫科动物‘, ‘老虎吃人‘);
INSERT INTO `documents` VALUES (‘8‘, ‘4‘, ‘2017-07-14 17:30:36‘, ‘猫科动物2‘, ‘东北虎‘);
INSERT INTO `documents` VALUES (‘9‘, ‘5‘, ‘2017-07-14 17:34:24‘, ‘动物园‘, ‘老鼠‘);
SET FOREIGN_KEY_CHECKS=1;
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for sph_counter
-- ----------------------------
DROP TABLE IF EXISTS `sph_counter`;
CREATE TABLE `sph_counter` (
`counter_id` int(11) NOT NULL AUTO_INCREMENT,
`max_doc_id` int(11) DEFAULT NULL,
PRIMARY KEY (`counter_id`)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Records of sph_counter
-- ----------------------------
INSERT INTO `sph_counter` VALUES (‘1‘, ‘6‘);
SET FOREIGN_KEY_CHECKS=1;
其中,sph_counter表中,的max_doc_id为当前在coreseek已经存放的索引的最大值。而配置文件tmp_src1中有这样一句话
sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content FROM documents WHERE id>( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
意思是将新更新的部分加入到tmp_src1中。
索引合并
将在mysql中的最新数据加入到tmp_scr1中
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --all --rotate
进行索引合并
/usr/local/coreseek/bin/indexer --merge src1 tmp_src1 --merge-dst-range deleted 0 0
这样,新增加的数据就到了src1的索引内。
sphinx配置增量索引和索引合并
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。