首页 > 代码库 > python 多线程 基于正则表达式的多线程文本替换功能实现
python 多线程 基于正则表达式的多线程文本替换功能实现
#!/usr/bin/env pythonimport osimport os.pathimport sysimport reimport shutilimport csvfrom multiprocessing import Pool #support muti thread map reduce functionspecialChars = [‘.‘, ‘^‘ , ‘$‘, ‘*‘, ‘+‘, ‘?‘ ,‘\\‘, ‘[‘, ‘]‘, ‘|‘, ‘(‘, ‘)‘ ]g_max_thread_pool_size = 30#max thread pool sizedef dealSpecialChars(str): s = ‘‘ for c in str: if c in specialChars: s = s + (‘[‘ + c + ‘]‘) else: s = s+c return sdef readDictsFromCsv(filePath): #print filePath dicts = dict() with open(filePath, ‘rb‘) as csvfile: dictsReader = csv.DictReader(csvfile, fieldnames=[‘srcName‘, ‘newName‘], restkey=None, delimiter=‘,‘, quotechar=‘|‘) for d in dictsReader: #dicts[dealSpecialChars(d[‘srcName‘])] = d[‘newName‘] dicts[d[‘srcName‘]] = d[‘newName‘] return dictsBinaryExtList = [‘.bmp‘, ‘.avi‘, ‘.res‘, ‘.xls‘, ‘.doc‘, ‘.dll‘, ‘.lib‘, ‘.bpl‘, ‘.exe‘, ‘.chm‘]replaceDicts = {r"RNC820V400R008C00SPC500": r"93" } def ApplyReplace(str, keys, replaceDicts): ret = str for pattern in keys: try: ret = re.sub(pattern, replaceDicts[pattern], ret) except: print "Unexpected error ApplyReplace(str, keys, replaceDicts):",str, finally: print str, ret return retdef NeedReplace(str, keys): for pattern in keys: #print pattern, str try: if re.search(pattern, str): return True except: print "Unexpected error NeedReplace(str, keys):",str, ‘:‘ return False defaultExtList = [‘.txt‘, ‘.xml‘] def findFile(srcDir, filter = None): if(filter == None): filter = defaultExtList filelist = [] for name in os.listdir(srcDir): fullPath = srcPath + ‘\\‘ + name if os.path.isdir(fullPath): filelist.append(findFile(fullPath)) else: if os.path.splitext(fullPath)[1].lower() in filter: filelist.append(fullPath) return filelist def ReplaceAllStrInFile(file, dicts, keys, filter = defaultExtList): print ‘ReplaceAllStrInFile:file-‘,file, ‘begin!‘ for key in keys: print key, dicts[key]#for test fullPath = file if os.path.splitext(fullPath)[1].lower() not in filter: return #print ‘convert file:‘, fullPath try: srcFile = open(fullPath, ‘r‘) tempfile = fullPath+‘temp‘ destFile = open(tempfile, ‘w‘) needRewrite = False try: for line in srcFile: if NeedReplace(line, keys): line = ApplyReplace(line, keys, dicts) destFile.write(line) srcFile.close() destFile.close() os.remove(fullPath) os.rename(tempfile, fullPath) print ‘convert file:‘, fullPath, ‘success!‘ except: print ‘convert file:‘, fullPath, ‘failed!‘ srcFile.close() destFile.close() os.remove(tempfile) except: print ‘convert file:‘, fullPath, ‘failed!‘ return False return Truedef ReplaceAllStrInFileByRows(srcfile, csvfilePath, maxRow = 10): if maxRow < 1: return false dicts = readDictsFromCsv(csvfilePath) keys=(sorted(dicts.keys(), key=lambda key: len(key), reverse=True))#reverse keys by elements‘ length length = len(keys) rows = range(length/maxRow + 1) for i in rows: ReplaceAllStrInFile(srcfile, dicts, keys[(i*maxRow):(i+1)*maxRow]) print ‘ ‘.join(keys[(i*maxRow):(i+1)*maxRow])def f(x): return ReplaceAllStrInFileByRows(x[0], x[1])if __name__ == "__main__": args = sys.argv if len(args) <> 3: print ‘‘‘ usage: python testcsv.py D:\ss\temp.csv D:\ss\test ‘‘‘ exit csvfilePath = args[1] srcPath = args[2] filelist = findFile(srcPath) #print filelist, len(filelist) dataItems = [] for file in filelist: dataItems.append([file, csvfilePath]) #ReplaceAllStrInFileByRows(dataItems[0][0], dataItems[0][1]) pool_size = g_max_thread_pool_size if len(filelist) < g_max_thread_pool_size: pool_size = len(filelist) pool = Pool(processes=pool_size)#muti thread pool.map(f, dataItems) #print result.get(timeout=10) #pool.map
python 多线程 基于正则表达式的多线程文本替换功能实现
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。