首页 > 代码库 > python实例
python实例
# encoding:utf-8‘‘‘Created on 2014年7月14日@author: caoshouxin‘‘‘import osimport reimport os.pathfrom lxml import etreefrom sogou import offdb,docidimport tracebackimport structimport logging as Lfrom time import localtime,strftimeL.basicConfig(level=L.INFO, format=‘[%(asctime)s] %(levelname)-8s %(message)s‘) filename="baike_soso_upload_20140717-160704.26523.xml"print "文件操作"+os.getcwd()class sosobaikeProcess(): def __init__(self,filename,ip="127.0.0.1",port="9999"): url_beg="http://baike.sogou.com/v" url_end=".htm\n" self.file_name=filename self.offdb_rand=offdb.QuickAdapter() self.offdb_rand.open(ip,port,5) now_time=strftime("%Y-%m-%d",localtime()) dir="" result_tup=self.getlemmaId_type() if result_tup is not None: (lemmaId,baike_type,value)=result_tup outputFile="sosobaike_"+now_time+"_"+baike_type outf=open(outputFile,‘a‘) outf.write(url_beg+url_end) outf.close() self.put_qdb(lemmaId, value) def put_qdb(self,lemmaId,value): try: key=struct.pack(‘i‘,int(lemmaId)) ret=self.offdb_rand.put(key,value,0,5) if ret==0 or ret==1: L.info("put file %s/%s success %d"%(self.file_name,lemmaId,1)) else: self.offdb_reconnect(5,3) except Exception,e: L.error("put file %s/%s err %d because:%s"%(self.file_name,lemmaId,1,traceback.format_exc())) self.offdb_rand.close() def getlemmaId_type(self): lemmaId_obj=re.compile("<lemmaId>(.*?)</lemmaId>.*?<action>(.*?)</action>",re.M) lemma_obj=re.compile("<lemmaId>(.*?)</lemmaId>",re.M) lemmaId="" baike_type="" if os.path.isfile(self.file_name): f=open(self.file_name) #为节约内存和提高匹配速度,只读取文件的1024字节 filecontent=f.read(1024) f.close() m=re.search(lemmaId_obj, filecontent) if m is not None: lemmaId=m.group(1) baike_type=m.group(2) L.info("put file %s lemmaId=%s, type=%s "%(self.file_name,lemmaId,baike_type)) return (lemmaId,baike_type) else: m_1=re.search(lemma_obj,filecontent) if m_1 is not None: lemmaId=m.group(1) baike_type="update" L.info("put file %s lemmaId=%s, type=%s "%(self.file_name,lemmaId,baike_type)) return (lemmaId,baike_type) else: L.info("put file %s not found result"%(self.file_name)) print None else: L.info("put file%s not found"%(self.file_name)) return None
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。