首页 > 代码库 > python 多线程 基于正则表达式的多线程文本替换功能实现

python 多线程 基于正则表达式的多线程文本替换功能实现

#!/usr/bin/env pythonimport osimport os.pathimport sysimport reimport shutilimport csvfrom multiprocessing import Pool #support muti thread map reduce functionspecialChars = [., ^ , $, *, +, ? ,\\, [, ], |, (, ) ]g_max_thread_pool_size = 30#max thread pool sizedef dealSpecialChars(str):    s = ‘‘    for c in str:        if c in specialChars:            s = s + ([ + c + ])        else:            s = s+c    return sdef readDictsFromCsv(filePath):    #print filePath    dicts = dict()    with open(filePath, rb) as csvfile:        dictsReader = csv.DictReader(csvfile, fieldnames=[srcName, newName], restkey=None, delimiter=,, quotechar=|)        for d in dictsReader:            #dicts[dealSpecialChars(d[‘srcName‘])] = d[‘newName‘]            dicts[d[srcName]] = d[newName]    return dictsBinaryExtList = [.bmp, .avi, .res, .xls, .doc, .dll, .lib, .bpl, .exe, .chm]replaceDicts = {r"RNC820V400R008C00SPC500": r"93" }  def ApplyReplace(str, keys, replaceDicts):    ret = str        for pattern in keys:        try:            ret = re.sub(pattern, replaceDicts[pattern], ret)        except:            print "Unexpected error ApplyReplace(str, keys, replaceDicts):",str,         finally:            print  str, ret    return retdef NeedReplace(str, keys):    for pattern in keys:        #print pattern, str        try:            if re.search(pattern, str):                return True        except:            print "Unexpected error NeedReplace(str, keys):",str, :    return False    defaultExtList = [.txt, .xml]  def findFile(srcDir, filter = None):    if(filter == None):        filter = defaultExtList    filelist = []    for name in os.listdir(srcDir):        fullPath = srcPath + \\ + name        if os.path.isdir(fullPath):            filelist.append(findFile(fullPath))        else:            if os.path.splitext(fullPath)[1].lower() in filter:                filelist.append(fullPath)    return filelist                def ReplaceAllStrInFile(file, dicts, keys, filter = defaultExtList):    print ReplaceAllStrInFile:file-,file, begin!       for key in keys:        print key, dicts[key]#for test    fullPath = file    if os.path.splitext(fullPath)[1].lower() not in filter:        return    #print ‘convert file:‘, fullPath    try:        srcFile = open(fullPath, r)        tempfile = fullPath+temp        destFile = open(tempfile, w)        needRewrite = False        try:            for line in srcFile:                if NeedReplace(line, keys):                    line = ApplyReplace(line, keys, dicts)                destFile.write(line)            srcFile.close()            destFile.close()            os.remove(fullPath)            os.rename(tempfile, fullPath)            print convert file:, fullPath, success!        except:            print convert file:, fullPath, failed!            srcFile.close()            destFile.close()            os.remove(tempfile)            except:        print convert file:, fullPath, failed!        return False    return Truedef ReplaceAllStrInFileByRows(srcfile, csvfilePath, maxRow = 10):    if maxRow < 1:        return false    dicts = readDictsFromCsv(csvfilePath)    keys=(sorted(dicts.keys(), key=lambda key: len(key), reverse=True))#reverse keys by elements‘ length    length = len(keys)    rows = range(length/maxRow + 1)    for i in rows:        ReplaceAllStrInFile(srcfile, dicts, keys[(i*maxRow):(i+1)*maxRow])        print  .join(keys[(i*maxRow):(i+1)*maxRow])def f(x):    return ReplaceAllStrInFileByRows(x[0], x[1])if __name__ == "__main__":    args = sys.argv        if len(args) <> 3:        print ‘‘‘        usage: python testcsv.py D:\ss\temp.csv D:\ss\test        ‘‘‘        exit        csvfilePath = args[1]    srcPath = args[2]        filelist = findFile(srcPath)    #print filelist, len(filelist)       dataItems = []    for file in filelist:        dataItems.append([file, csvfilePath])    #ReplaceAllStrInFileByRows(dataItems[0][0], dataItems[0][1])        pool_size = g_max_thread_pool_size    if len(filelist) < g_max_thread_pool_size:        pool_size = len(filelist)    pool = Pool(processes=pool_size)#muti thread       pool.map(f, dataItems)    #print result.get(timeout=10)    #pool.map  

 

python 多线程 基于正则表达式的多线程文本替换功能实现