首页 > 代码库 > python分析日志脚本

python分析日志脚本

  1 #!/usr/bin/env python  2 # coding:utf-8  3   4 import sys,time  5   6 class DisplayFormat(object):  7   8     def format_size(self,size):  9         KB = 1024                   # KB -> B  1024 10         MB = 1048576                # MB -> B  1024 * 1024 11         GB = 1073741824             # GB -> B  1024 * 1024 * 1024 12         TB = 1099511627776          # TB -> B  1024 * 1024 * 1024 13  14         if size >= TB: 15             size = str(size >> 40) + T 16         elif size < KB: 17             size = str(size) + B 18         elif size >= GB and size < TB: 19             size = str(size >> 30) + G 20         elif size >= MB and size < GB: 21             size = str(size >> 20) + M 22         else: 23             size = str(size >> 10) + K 24  25         return size 26  27     formatstring = %-18s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s 28  29     def echo_line(self): 30         ‘‘‘输出头部横线‘‘‘ 31         print self.formatstring % (-*15,-*10,-*12,-*12,-*10,-*10,-*10,-*10,-*10,-*10,-*10,) 32  33     def echo_head(self): 34         ‘‘‘输出头部信息‘‘‘ 35         print self.formatstring % (IP,Traffic,Time,Time%,200,404,403,503,500,302,304) 36  37     def echo_error(self): 38         ‘‘‘输出错误信息‘‘‘ 39         print Usage:  + sys.argv[0] + filepath [number] 40  41     def echo_time(self): 42         ‘‘‘输出脚本执行时间‘‘‘ 43         print The script is running %s second % time.clock() 44  45  46 class HostInfo(object): 47  48     # 定义一个主机ip 的所有状态列表 49     host_info = [200,404,403,503,500,302,304,size,time] 50  51     def __init__(self,host): 52         ‘‘‘初始化一个主机信息字典‘‘‘ 53         self.host = host = {}.fromkeys(self.host_info,0) 54  55     def add_1(self,status_size,is_size): 56         ‘‘‘对访问次数,http返回的状态码,ip流量进行加1操作‘‘‘ 57         if status_size == time: 58             self.host[time] += 1 59         elif is_size: 60             self.host[size] = self.host[size] + status_size 61         else: 62             self.host[status_size] += 1 63  64     def get_value(self,value): 65         ‘‘‘取出字典的值‘‘‘ 66         return self.host[value] 67  68  69 class AnalysisFile(object): 70  71     def __init__(self): 72         ‘‘‘初始化一个空字典‘‘‘ 73         self.empty = {} 74         self.total_request_time,self.total_traffic,self.total_200, 75         self.total_404,self.total_403,self.total_503,self.total_500, 76         self.total_302,self.total_304 = 0,0,0,0,0,0,0,0,0 77  78     def split_line_todict(self,line): 79         ‘‘‘传入文件的每一行取出0、8、9字段 生成字典 并返回这个字典‘‘‘ 80         line_split = line.split() 81         line_dict = {remote_host:line_split[0],status:line_split[8],bytes_sent:line_split[9]} 82         return line_dict 83  84     def read_log(self,logs): 85         for line in logs: 86             try: 87                 dict_line = self.split_line_todict(line) 88                 host = dict_line[remote_host] 89                 status = dict_line[status] 90             except ValueError: 91                 continue 92             except IndexError: 93                 continue 94  95             if host not in self.empty: 96                 host_info_obj = HostInfo(host) 97                 self.empty[host] = host_info_obj 98             else: 99                 host_info_obj = self.empty[host]100 101             host_info_obj.add_1(time,False)102 103             if status in host_info_obj.host_info:104                 host_info_obj.add_1(status,False)105 106             try:107                 bytes_sent = int(dict_line[bytes_sent])108             except ValueError:109                 bytes_sent = 0110 111             host_info_obj.add_1(bytes_sent,True)112 113         return self.empty114 115     def return_sorted_list(self,true_dict):116         ‘‘‘循环读取字典,计算总的流量、总的访问次数以及总的http返回码‘‘‘117         for host_key in true_dict:118             host_value =http://www.mamicode.com/ true_dict[host_key]119             time = host_value.get_value(time)120             self.total_request_time = self.total_request_time + time121             size = host_value.get_value(size)122             self.total_traffic = self.total_traffic + size123 124             # 获取http返回状态码的次数125             v_200 = host_value.get_value(200)126             v_404 = host_value.get_value(404)127             v_403 = host_value.get_value(403)128             v_503 = host_value.get_value(503)129             v_500 = host_value.get_value(500)130             v_302 = host_value.get_value(302)131             v_304 = host_value.get_value(304)132 133             # 重新规划字典134             true_dict[host_key] = {200:v_200,404:v_404,403:v_403,135                                    503:v_503,500:v_500,302:v_302,136                                    304:v_304,size:size,time:time}137 138 139             # 计算http返回状态码的总量140             self.total_200 = self.total_200 + v_200141             self.total_404 = self.total_404 + v_404142             self.total_403 = self.total_403 + v_403143             self.total_503 = self.total_503 + v_503144             self.total_500 = self.total_500 + v_500145             self.total_302 = self.total_302 + v_302146             self.total_304 = self.total_304 + v_304147 148                 # 对总的访问次数和访问流量进行降序排序,并生成一个有序的列表149         sorted_list = sorted(true_dict.items(),key=lambda i:(i[1][size],150                                                                  i[1][time]),reverse=True)151 152         return sorted_list153 154 155 class Main(object):156 157     def main(self):158         ‘‘‘主调函数‘‘‘159         # 初始化DisplayFormat类的实例160         displayformat = DisplayFormat()161 162         args = len(sys.argv)163         if args == 1:164             displayformat.echo_error()165         elif args == 2 or args == 3:166             log_file = sys.argv[1]167             try:168                 files = open(log_file,r)169                 if args == 3:170                     lines = int(sys.argv[2])171                 else:172                     lines = 0173             except IOError,e:174                 print175                 print e176                 displayformat.echo_error()177             except VaueError,e:178                 print179                 print e180                 displayformat.echo_error()181 182         else:183             displayformat.echo_error()184 185 186         #AnalysisFile类的实例化187         fileanalysis = AnalysisFile()188 189         # 调用read_log方法190         news_dict = fileanalysis.read_log(files)191 192         # 调用return_sorted_list方法193         new_list = fileanalysis.return_sorted_list(news_dict)194 195         # 计算所有ip的总量196         total_ip = len(new_list)197 198         if lines:199             new_list = new_list[0:lines]200         files.close()201 202         # 打印出总的ip数,总访问流量,总的访问次数203         print204         total_request_time = fileanalysis.total_request_time205         total_traffic = displayformat.format_size(fileanalysis.total_traffic)206         print 总IP数量: %s    总的访问流量: %s    总的请求次数: %d % (total_ip,207                                                                    total_traffic,208                                                                    total_request_time)209         210         # 打印头部信息,和横线                                                                      211         print212         displayformat.echo_head()213         displayformat.echo_line()214 215         # 循环读取news_list列表取出time项目 计算time百分比 通过displayformat格式化输出主机信息216         for i in new_list:217             time = i[1][time]218             time_percentage = (float(time) / float(fileanalysis.total_request_time)) * 100219             print displayformat.formatstring % (i[0],220                                                 displayformat.format_size(i[1][size]),221                                                 time,str(time_percentage)[0:5],222                                                 i[1][200],i[1][404],i[1][403],223                                                 i[1][503],i[1][500],i[1][302],i[1][304])224 225         if not lines or total_ip == lines:226             displayformat.echo_line()227             print displayformat.formatstring % (total_ip,total_traffic,total_request_time,100%,228                                                 fileanalysis.total_200,fileanalysis.total_404,229                                                 fileanalysis.total_403,fileanalysis.total_503,230                                                 fileanalysis.total_500,fileanalysis.total_302,231                                                 fileanalysis.total_304)232 233         # 显示执行脚本的时间234         print235         displayformat.echo_time()236 237 if __name__ == __main__:238     main = Main()239     main.main()

 

python分析日志脚本