首页 > 代码库 > shell脚本分析nginx日志

shell脚本分析nginx日志

第一版,比较粗糙,仅限于能用

正在写入的文件不能用tar进行压缩

--------压缩日志----------------------
94 access.log
95 tar: access.log: file changed as we read it
96 #### 压缩日志失败 ####

#!/bin/sh
#分析nginx日志
DATE=`date +%Y%m%d-%H%M` 
ARCHIVE=/usr/log_bak/nginx_$DATE.tar.gz
MESSAGE=/usr/log_bak/"Nginx_Analysis""$DATE"
FILENAME=/data/nginx/logs/access.log
BACKNAME=/usr/log_bak/"nginx_bak"

function Mail(){
        mail -s "***Nginx Report***" 666@qq.com < $MESSAGE
}

function Bowser(){
key[0]=" 200 [0-9]{3};word[0]=http 200
key[1]=" 206 [0-9]{3};word[1]=http 206
key[2]=" 404 [0-9]{3};word[2]=http 404
key[3]=" 503 [0-9]{3};word[3]=http 503
##########
#  seo/seo.html" target="_blank">
key[4]=Googlebot.*google.com/bot.html;word[4]=Google Browser
key[5]=Baiduspider.*baidu.com/search/spider.html;word[5]=Baidu Browser
key[6]=bingbot.*bing.com/bingbot.htm;word[6]=Bing Browser
#Soso Sosospider.*soso.com/webspider.htm
#óDμà YoudaoBot.*youdao.com/help/webmaster/spider/
#Yahoo?D1ú Yahoo! Slurp China
##########
# 
key[7]=MSIE;word[7]=MSIE
key[8]=Gecko/.*Firefox;word[8]=Firefox
key[9]=AppleWebKit.*like Gecko;word[9]=Webkit
key[10]=Opera.*Presto;word[10]=Opera

key[11]=Windows NT 6.1;word[11]=Windows 7 访问
key[12]=Macintosh; Intel Mac OS X;word[12]=Mac OS X 访问
key[13]=X11.*Linux;word[13]=Linux with X11
key[14]=Android;;word[14]=Android
#Windows?μáD win2000Windows NT 5.0 winxpWindows NT 5.1 winvastaWindows NT 6.0 win7Windows NT 6.1
#SymbianOS SymbianOS
##########
# 
key[15]=iPad.*like Mac OS X;word[15]=iPad 访问
key[16]=Nokia;word[16]=Nokia
key[17]=Nokia5800;word[17]=Nokia5800 XpressMusic
#iPhone iPhone.*like Mac OS X
##########
#
key[18]=GET /.*.mp3 HTTP;word[18]="访问 mp3 file"
key[19]=GET /.*.jpg HTTP;word[19]="访问 jpg file"

#echo $filename
#echo "nginx日志: ${FILENAME},一共${totle}行,需要处理 ${#key[@]}条" >> $MESSAGE
#echo "来源IP$(cat $FILENAME | awk ‘{print $1}‘ |sort|uniq|wc -l)" >> $MESSAGE
i=4
echo "----浏览器来源----" >> $MESSAGE
echo "--浏览器-----总计------占比--" >> $MESSAGE
while [ $i -lt ${#key[@]} ]
    do 
         s1=${word[$i]}
         s2=$(cat $BACKNAME | grep ‘‘"${key[$i]}"‘‘ | wc -l)
         s3=$(awk BEGIN{printf "%.2f%",($s2/$totle)*100})
           echo "${s1} ${s2} ${s3}" >> $MESSAGE
         ((i++))
    done
if [[ $? == 0 ]]; then
    echo "分析浏览器标示成功" >> $MESSAGE 
else
    echo "分析浏览器标示失败" >> $MESSAGE 
fi
 echo "--------------------" >> $MESSAGE
}

Check_http_status()
{
#grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" access.log
#拿到日志中所有的包含HTTP状态码的部分,拿出第二段来判断,并将结果分配到数组中
codes=(`grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" $BACKNAME | awk -F"[ ]+"    BEGIN{i=0;j=0;k=0;n=0;p=0;}{ if($2>=100&&$2<200)               
                        {i++}
                else if($2>=200&&$2<300)
                        {j++}
                else if($2>=300&&$2<400)
                        {k++}
                else if($2>=400&&$2<500)
                        {n++}
                else if($2>=500)
                        {p++}
        }END{
                print i?i:0,j?j:0,k?k:0,n?n:0,p?p:0,i+j+k+n+p
                }`)    
echo "--HTTP状态码---COUNT---PERCENT------" >> $MESSAGE
echo "status[100+]:--${codes[0]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[0]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE
echo "status[200+]:--${codes[1]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[1]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE
echo "status[300+]:--${codes[2]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[2]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE
echo "status[400+]:--${codes[3]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[3]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE
echo "status[500+]:--${codes[4]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[4]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE
echo "----所有的状态码: ${codes[5]}----" >> $MESSAGE
}

function IpUrlTime(){

echo "来源IP共--$(cat $BACKNAME | awk ‘{print $1}‘ |sort|uniq|wc -l)--个" >> $MESSAGE
ip=$(cat $BACKNAME | awk {print $1}|sort | uniq -c | sort -nr | head -n 20)                         
echo "----访问前20个IP统计----" >> $MESSAGE
echo "$ip" >> $MESSAGE

#通过日志查看当天访问页面的url:
url=$(cat $BACKNAME | awk {print $7}|sort | uniq -c | sort -nr |head -n 20)
echo "----访问前20个URL统计----" >> $MESSAGE
echo "$url" >> $MESSAGE

#通过日志查看当天访问次数最多的时间段
time=$(awk {print $4} $BACKNAME  |cut -c 14-18 | sort | uniq -c | sort -nr | head | head -n 20)
echo "----访问前20个时间点统计----" >> $MESSAGE
echo "$time" >> $MESSAGE
}
#----------start---------------
ip=`ifconfig | grep inet addr:|grep -v 127.0.0.1|awk -F [ :]+ {print $4}`
echo "--------Server $ip---------------" >> $MESSAGE
echo "--------$(df -h)---------------" >> $MESSAGE

cd /usr/log_bak

if [ $? == 0 ]
   then
    echo "进入目录/usr/log_bak" >> $MESSAGE
else
    echo "####进入目录失败,退出####" >> $MESSAGE 
    exit 0
fi

echo "---------------------" >> $MESSAGE
echo "备份日志:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "---------------------" >> $MESSAGE
#bak access.log
cp $FILENAME $BACKNAME
#check bak
if [[ $? == 0 ]]
   then
    echo "日志复制成功" >> $MESSAGE 
else
    echo "####日志复制失败,退出####" >> $MESSAGE 
    exit 0
fi


echo "-------------------------------" >> $MESSAGE
echo "分析时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "-------------------------------" >> $MESSAGE
totle=$(cat $BACKNAME | wc -l)
size=$(ls -sh $BACKNAME | awk {print $1})
echo "nginx日志,${size},一共${totle}行 " >> $MESSAGE
IpUrlTime
Check_http_status
Bowser

echo "--------压缩日志----------------------" >> $MESSAGE

#直接备份复制的文件,否者access.log1正在写入,无法压缩
tar czvf $ARCHIVE nginx_bak >> $MESSAGE 2>&1
#判断catalina.out备份是否成功
if [[ $? == 0 ]]
    then
    #创建备份文件的压缩包
#    tar czvf $ARCHIVE $LOG >> log.txt 2>&1
    echo "[$ARCHIVE] 日志压缩成功!" >> $MESSAGE
# clear access.log
    > $FILENAME
    if [[ $? == 0 ]]
       then 
        echo "清空日志清空日志成功" >> $MESSAGE
        rm -f $BACKNAME
    else
        echo "###清空日志失败 Failed #####" >> $MESSAGE
    fi
    #只需保留备份文件的压缩包即可
else
    echo "#### 压缩日志失败 ####" >> $MESSAGE
    exit 0
fi


echo "---------------------" >> $MESSAGE
echo "结束时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE
echo "---------------------" >> $MESSAGE

Mail

 

shell脚本分析nginx日志