首页 > 代码库 > python分析日志脚本
python分析日志脚本
1 #!/usr/bin/env python 2 # coding:utf-8 3 4 import sys,time 5 6 class DisplayFormat(object): 7 8 def format_size(self,size): 9 KB = 1024 # KB -> B 1024 10 MB = 1048576 # MB -> B 1024 * 1024 11 GB = 1073741824 # GB -> B 1024 * 1024 * 1024 12 TB = 1099511627776 # TB -> B 1024 * 1024 * 1024 13 14 if size >= TB: 15 size = str(size >> 40) + ‘T‘ 16 elif size < KB: 17 size = str(size) + ‘B‘ 18 elif size >= GB and size < TB: 19 size = str(size >> 30) + ‘G‘ 20 elif size >= MB and size < GB: 21 size = str(size >> 20) + ‘M‘ 22 else: 23 size = str(size >> 10) + ‘K‘ 24 25 return size 26 27 formatstring = ‘%-18s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s‘ 28 29 def echo_line(self): 30 ‘‘‘输出头部横线‘‘‘ 31 print self.formatstring % (‘-‘*15,‘-‘*10,‘-‘*12,‘-‘*12,‘-‘*10,‘-‘*10,‘-‘*10,‘-‘*10,‘-‘*10,‘-‘*10,‘-‘*10,) 32 33 def echo_head(self): 34 ‘‘‘输出头部信息‘‘‘ 35 print self.formatstring % (‘IP‘,‘Traffic‘,‘Time‘,‘Time%‘,200,404,403,503,500,302,304) 36 37 def echo_error(self): 38 ‘‘‘输出错误信息‘‘‘ 39 print ‘Usage: ‘ + sys.argv[0] + ‘filepath [number]‘ 40 41 def echo_time(self): 42 ‘‘‘输出脚本执行时间‘‘‘ 43 print ‘The script is running %s second‘ % time.clock() 44 45 46 class HostInfo(object): 47 48 # 定义一个主机ip 的所有状态列表 49 host_info = [‘200‘,‘404‘,‘403‘,‘503‘,‘500‘,‘302‘,‘304‘,‘size‘,‘time‘] 50 51 def __init__(self,host): 52 ‘‘‘初始化一个主机信息字典‘‘‘ 53 self.host = host = {}.fromkeys(self.host_info,0) 54 55 def add_1(self,status_size,is_size): 56 ‘‘‘对访问次数,http返回的状态码,ip流量进行加1操作‘‘‘ 57 if status_size == ‘time‘: 58 self.host[‘time‘] += 1 59 elif is_size: 60 self.host[‘size‘] = self.host[‘size‘] + status_size 61 else: 62 self.host[status_size] += 1 63 64 def get_value(self,value): 65 ‘‘‘取出字典的值‘‘‘ 66 return self.host[value] 67 68 69 class AnalysisFile(object): 70 71 def __init__(self): 72 ‘‘‘初始化一个空字典‘‘‘ 73 self.empty = {} 74 self.total_request_time,self.total_traffic,self.total_200, 75 self.total_404,self.total_403,self.total_503,self.total_500, 76 self.total_302,self.total_304 = 0,0,0,0,0,0,0,0,0 77 78 def split_line_todict(self,line): 79 ‘‘‘传入文件的每一行取出0、8、9字段 生成字典 并返回这个字典‘‘‘ 80 line_split = line.split() 81 line_dict = {‘remote_host‘:line_split[0],‘status‘:line_split[8],‘bytes_sent‘:line_split[9]} 82 return line_dict 83 84 def read_log(self,logs): 85 for line in logs: 86 try: 87 dict_line = self.split_line_todict(line) 88 host = dict_line[‘remote_host‘] 89 status = dict_line[‘status‘] 90 except ValueError: 91 continue 92 except IndexError: 93 continue 94 95 if host not in self.empty: 96 host_info_obj = HostInfo(host) 97 self.empty[host] = host_info_obj 98 else: 99 host_info_obj = self.empty[host]100 101 host_info_obj.add_1(‘time‘,False)102 103 if status in host_info_obj.host_info:104 host_info_obj.add_1(status,False)105 106 try:107 bytes_sent = int(dict_line[‘bytes_sent‘])108 except ValueError:109 bytes_sent = 0110 111 host_info_obj.add_1(bytes_sent,True)112 113 return self.empty114 115 def return_sorted_list(self,true_dict):116 ‘‘‘循环读取字典,计算总的流量、总的访问次数以及总的http返回码‘‘‘117 for host_key in true_dict:118 host_value =http://www.mamicode.com/ true_dict[host_key]119 time = host_value.get_value(‘time‘)120 self.total_request_time = self.total_request_time + time121 size = host_value.get_value(‘size‘)122 self.total_traffic = self.total_traffic + size123 124 # 获取http返回状态码的次数125 v_200 = host_value.get_value(‘200‘)126 v_404 = host_value.get_value(‘404‘)127 v_403 = host_value.get_value(‘403‘)128 v_503 = host_value.get_value(‘503‘)129 v_500 = host_value.get_value(‘500‘)130 v_302 = host_value.get_value(‘302‘)131 v_304 = host_value.get_value(‘304‘)132 133 # 重新规划字典134 true_dict[host_key] = {‘200‘:v_200,‘404‘:v_404,‘403‘:v_403,135 ‘503‘:v_503,‘500‘:v_500,‘302‘:v_302,136 ‘304‘:v_304,‘size‘:size,‘time‘:time}137 138 139 # 计算http返回状态码的总量140 self.total_200 = self.total_200 + v_200141 self.total_404 = self.total_404 + v_404142 self.total_403 = self.total_403 + v_403143 self.total_503 = self.total_503 + v_503144 self.total_500 = self.total_500 + v_500145 self.total_302 = self.total_302 + v_302146 self.total_304 = self.total_304 + v_304147 148 # 对总的访问次数和访问流量进行降序排序,并生成一个有序的列表149 sorted_list = sorted(true_dict.items(),key=lambda i:(i[1][‘size‘],150 i[1][‘time‘]),reverse=True)151 152 return sorted_list153 154 155 class Main(object):156 157 def main(self):158 ‘‘‘主调函数‘‘‘159 # 初始化DisplayFormat类的实例160 displayformat = DisplayFormat()161 162 args = len(sys.argv)163 if args == 1:164 displayformat.echo_error()165 elif args == 2 or args == 3:166 log_file = sys.argv[1]167 try:168 files = open(log_file,‘r‘)169 if args == 3:170 lines = int(sys.argv[2])171 else:172 lines = 0173 except IOError,e:174 print175 print e176 displayformat.echo_error()177 except VaueError,e:178 print179 print e180 displayformat.echo_error()181 182 else:183 displayformat.echo_error()184 185 186 #AnalysisFile类的实例化187 fileanalysis = AnalysisFile()188 189 # 调用read_log方法190 news_dict = fileanalysis.read_log(files)191 192 # 调用return_sorted_list方法193 new_list = fileanalysis.return_sorted_list(news_dict)194 195 # 计算所有ip的总量196 total_ip = len(new_list)197 198 if lines:199 new_list = new_list[0:lines]200 files.close()201 202 # 打印出总的ip数,总访问流量,总的访问次数203 print204 total_request_time = fileanalysis.total_request_time205 total_traffic = displayformat.format_size(fileanalysis.total_traffic)206 print ‘总IP数量: %s 总的访问流量: %s 总的请求次数: %d‘ % (total_ip,207 total_traffic,208 total_request_time)209 210 # 打印头部信息,和横线 211 print212 displayformat.echo_head()213 displayformat.echo_line()214 215 # 循环读取news_list列表取出time项目 计算time百分比 通过displayformat格式化输出主机信息216 for i in new_list:217 time = i[1][‘time‘]218 time_percentage = (float(time) / float(fileanalysis.total_request_time)) * 100219 print displayformat.formatstring % (i[0],220 displayformat.format_size(i[1][‘size‘]),221 time,str(time_percentage)[0:5],222 i[1][‘200‘],i[1][‘404‘],i[1][‘403‘],223 i[1][‘503‘],i[1][‘500‘],i[1][‘302‘],i[1][‘304‘])224 225 if not lines or total_ip == lines:226 displayformat.echo_line()227 print displayformat.formatstring % (total_ip,total_traffic,total_request_time,‘100%‘,228 fileanalysis.total_200,fileanalysis.total_404,229 fileanalysis.total_403,fileanalysis.total_503,230 fileanalysis.total_500,fileanalysis.total_302,231 fileanalysis.total_304)232 233 # 显示执行脚本的时间234 print235 displayformat.echo_time()236 237 if __name__ == ‘__main__‘:238 main = Main()239 main.main()
python分析日志脚本
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。