首页 > 代码库 > URL去重

URL去重

import socketdictlist ={};def ReadHost():    hosts = [];    obn = open(‘d:/sss.txt‘, ‘rb‘);    for line in obn:        #sometime you should filter \r\n        line = line.strip(‘\n‘)        hosts.append(line)    obn.close();    return hosts;def SysDNS():    hosts = ReadHost();    for host in hosts:        #print(host)        try:            myaddrs = socket.getaddrinfo(host,None)            for eachaddr in myaddrs:    	        addrs = eachaddr[4][0]                #print((addrs))                if(dictlist.has_key(addrs)):                    break;                else:                    dictlist[addrs] = host;                    #print(host)                    break;        except socket.herror,e:            continue;        except socket.gaierror,e1:            continue;def showDict():    fw = open("d:/out.txt","wb");    for (k,v) in dictlist.items():        #print(k,v)        fw.writelines(v);    fw.close();if __name__ == "__main__":    SysDNS();    showDict();