首页 > 代码库 > python实例

python实例

# encoding:utf-8‘‘‘Created on 2014年7月14日@author: caoshouxin‘‘‘import osimport reimport os.pathfrom lxml import etreefrom sogou import offdb,docidimport tracebackimport structimport logging as Lfrom time import localtime,strftimeL.basicConfig(level=L.INFO, format=[%(asctime)s] %(levelname)-8s %(message)s)    filename="baike_soso_upload_20140717-160704.26523.xml"print "文件操作"+os.getcwd()class sosobaikeProcess():    def __init__(self,filename,ip="127.0.0.1",port="9999"):        url_beg="http://baike.sogou.com/v"        url_end=".htm\n"        self.file_name=filename        self.offdb_rand=offdb.QuickAdapter()        self.offdb_rand.open(ip,port,5)        now_time=strftime("%Y-%m-%d",localtime())        dir=""        result_tup=self.getlemmaId_type()        if result_tup is not None:            (lemmaId,baike_type,value)=result_tup            outputFile="sosobaike_"+now_time+"_"+baike_type            outf=open(outputFile,a)            outf.write(url_beg+url_end)            outf.close()            self.put_qdb(lemmaId, value)    def put_qdb(self,lemmaId,value):        try:            key=struct.pack(i,int(lemmaId))            ret=self.offdb_rand.put(key,value,0,5)            if ret==0 or ret==1:                L.info("put file %s/%s success %d"%(self.file_name,lemmaId,1))            else:                self.offdb_reconnect(5,3)        except Exception,e:            L.error("put file %s/%s err %d because:%s"%(self.file_name,lemmaId,1,traceback.format_exc()))         self.offdb_rand.close()    def getlemmaId_type(self):        lemmaId_obj=re.compile("<lemmaId>(.*?)</lemmaId>.*?<action>(.*?)</action>",re.M)        lemma_obj=re.compile("<lemmaId>(.*?)</lemmaId>",re.M)        lemmaId=""        baike_type=""        if os.path.isfile(self.file_name):            f=open(self.file_name)            #为节约内存和提高匹配速度,只读取文件的1024字节            filecontent=f.read(1024)            f.close()            m=re.search(lemmaId_obj, filecontent)            if m is not None:                lemmaId=m.group(1)                baike_type=m.group(2)                L.info("put file %s lemmaId=%s, type=%s "%(self.file_name,lemmaId,baike_type))                return (lemmaId,baike_type)            else:                m_1=re.search(lemma_obj,filecontent)                if m_1 is not None:                    lemmaId=m.group(1)                    baike_type="update"                    L.info("put file %s lemmaId=%s, type=%s "%(self.file_name,lemmaId,baike_type))                    return (lemmaId,baike_type)                else:                    L.info("put file %s not found result"%(self.file_name))                    print None        else:            L.info("put file%s not found"%(self.file_name))            return None