首页 > 代码库 > [python] 系统监控

[python] 系统监控

每分钟 采集一次linux信息,采集到的数据暂存到本地sqlite

10分钟通过http上报一次

sqlite库中保存7天的过期数据

monitor4a.out是所有输出,monitor4a.log是INFO级别以上的日志(100MB的5个日志文件循环)

python v2.6.6-2.7.8通过

#!/usr/bin/env python
# _*_ coding:UTF-8 _*_
__author__ = ‘shanl‘
import socket

store_root_dir= "."
configure={
    "domain_name":      "BeiJing",
    "auth_address":     "127.0.0.1",
    "local_ip":         socket.gethostbyname(socket.gethostname()), 
    "db_path":          "%s/monitor4a.db3" % store_root_dir,
    "encoding":         "UTF-8",

    "interval_collection":  60,         #60 采集一次信息
    "interval_upload":      60*10,      #60*10 上报间隔
    "interval_overdue":     60*60*24*7, #60*60*24*7 删除过期日志间隔

    "logger_out":           "%s/monitor4a.out" % store_root_dir,
    "logger_log":           "%s/monitor4a.log" % store_root_dir,
    "logger_format":        "[%(levelname)s] %(asctime)s [line:%(lineno)d] %(message)s",
    "logger_maxBytes":      100*1024*1024,
    "logger_backupCount":   5,

    "debug":                True,
}

try:
    from os import popen
except:
    from subprocess import popen
import os
import sys
import httplib
import multiprocessing
import time
import sqlite3
from datetime import datetime as dt
from uuid import uuid4 as uuid
from zlib import crc32
import urllib
import logging
from logging.handlers import RotatingFileHandler

#logger config
logging.basicConfig(
    #filename=configure["logger_out"],
    filename=configure["logger_out"],
    level=logging.DEBUG,
    format=configure["logger_format"],
    filemode=‘w‘
)
Rthandler = RotatingFileHandler(
    configure["logger_log"],
    maxBytes=configure["logger_maxBytes"],
    backupCount=configure["logger_backupCount"])
Rthandler.setLevel(logging.INFO)
formatter = logging.Formatter(configure["logger_format"])
Rthandler.setFormatter(formatter)
logging.getLogger(‘‘).addHandler(Rthandler)

#monitor main function
def pymain():
    logging.info("waitting...")
    with MonitorDB() as db:
        db.initTables()
        time.sleep(3)

    logging.info("monitor start...")
    p = MonitorTask()
    p.start()

def getUUID():
    return crc32("%s%s" % (uuid(),uuid())) #uuid4,基于随机数的uuid4,有一定机率重复。产生两次后crc32

class MonitorDB():
    __db = None
    __init_sqls = {
        "t_monitor":‘‘‘
            create table t_monitor(
                uuid long,
                time varchar(32),
                key varchar(32),
                value varchar(64),
                overdue int
            )
        ‘‘‘,
    }

    def __init__(self, dbPath=None):
        if not dbPath is None: self.connect(dbPath)
        else: self.connect(configure["db_path"])

    def connect(self, dbPath):
        logging.debug("initialize sqlite3:‘%s‘ done." % dbPath)
        self.__db = sqlite3.connect(dbPath,5)

    def initTables(self):
        notExist = False
        for tn,sql in self.__init_sqls.items():
            cur=self.__db.cursor()
            for i in cur.execute("SELECT COUNT(*) FROM sqlite_master where type=‘table‘ and name=‘%s‘" % tn):
                notExist = True if i[0]==0 else False
                break
            cur.close()

            if notExist:
                cur=self.__db.cursor()
                cur.execute(sql)
                cur.close()
            self.__db.commit()
        logging.debug("initialize sqlite3 tables done.")

    def close(self):
        self.__db.close()
        logging.debug("close sqlite3.")

    def save(self,rows):
        try:
            cur = self.__db.cursor()
            cur.executemany("insert into t_monitor (uuid,time,key,value,overdue)values(?,?,?,?,0)", rows)
            cur.close()
            self.__db.commit()
            logging.debug(‘save success,rows:%s‘ % rows)
        except Exception as e:
            logging.error(‘save error:%s,rows:%s.‘ % (e,rows))

    def getTop(self,n=10):
        ret = []
        try:
            cur = self.__db.cursor()
            for i in cur.execute(‘select uuid,time,key,value from t_monitor where overdue=0 order by time limit 0,?‘, (n,)):
                ret.append({
                    "uuid":i[0], "time":i[1],
                    "key":i[2], "value":i[3]
                })
            cur.close()
            self.__db.commit()
            logging.debug(‘getTop success,n:%d‘ % n)
        except Exception as e:
            logging.error("getTop error:%s,n:%d." % (e,n))
        return ret

    def setToOverdue(self,ids):
        try:
            cur = self.__db.cursor()
            cur.executemany("update t_monitor set overdue=1 where uuid=?", ids)
            cur.close()
            self.__db.commit()
            logging.debug("setToOverdue success,ids:%s" % ids)
        except Exception as e:
            logging.debug("setToOverdue error:%s,ids:%s" % e,ids)

    def deleOverdue(self):
        try:
            cur = self.__db.cursor()
            cur.execute("delete from t_monitor where overdue=1")
            cur.close()
            self.__db.commit()
            logging.debug("setToOverdue success")
        except Exception as e:
            logging.debug("setToOverdue error:%s" % e)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

class HostinfoCollection():
    def __nowTime(self):
        return dt.today().__str__()[:-7]

    def cpu(self):
        if configure["debug"]: return (‘2.13‘)
        logging.debug(‘collection cpu.‘)
        ls = popen("export LC_ALL=en_US && mpstat|grep ‘all‘").readline().strip().split()
        cpuUse = "%.2f" % (float(ls[3])+float(ls[5]) )
        #return {"cpuUse":usePer}
        return (cpuUse,)

    def memory(self):
        if configure["debug"]: return (‘11112‘,‘21212‘)

        logging.debug(‘collection memory.‘)
        ls = popen("export LC_ALL=en_US && free -m|grep ‘Mem‘").readline().strip().split()
        memMax = "%s" % ls[1]
        memPer = "%s" % ls[2]
        #return {"memMax":memMax,"memPer":memPer}
        return (memMax,memPer)

    def disk(self):
        if configure["debug"]: return (‘1.1‘,‘23232‘)

        logging.debug(‘collection disk.‘)
        ls = popen("export LC_ALL=en_US && df -l|grep ‘/edass4a_ssg‘").readline().strip().split()
        if ls==[]: ls = popen("export LC_ALL=en_US && df -l|grep ‘/dev/sda1‘").readline().strip().split()
        if ls==[]: ls = popen("export LC_ALL=en_US && df -l|grep ‘% /‘").readline().strip().split()
        HDUse = ls[2]
        HDMax = str(int(ls[2])+int(ls[3]))
        #return {"HDUse":HDUse,"HDMax":HDMax}
        return (HDUse,HDMax)

    def ip(self):
        logging.debug(‘collection ip.‘)
        #return {"domainid":configure["domain_name"],"ip":configure["local_ip"]}
        return (configure["domain_name"],configure["local_ip"])


class MonitorTask(multiprocessing.Process):
    __interval={
        "collection":   configure["interval_collection"],
        "upload":       configure["interval_upload"],
        "overdue":      configure["interval_overdue"],
    }

    def __init__(self, interval=None):
        if not interval is None: self.__interval = interval
        multiprocessing.Process.__init__(self)

    def __collection(self,db):
        tnow=dt.today().__str__()[:-7]

        rows=[]
        hostinfo = HostinfoCollection()

        cpu = hostinfo.cpu()
        rows.append((getUUID(),tnow,‘cpuUse‘,cpu[0]))

        mem = hostinfo.memory()
        rows.append((getUUID(),tnow,‘memMax‘,mem[0]))
        rows.append((getUUID(),tnow,‘memPer‘,mem[1]))

        disk = hostinfo.disk()
        rows.append((getUUID(),tnow,‘HDUse‘,disk[0]))
        rows.append((getUUID(),tnow,‘HDMax‘,disk[1]))

        db.save(rows)

    def __upload(self,db):
        ret = db.getTop()
        if len(ret)==0: return

        upload_success_list = []
        ip = HostinfoCollection().ip()

        headers = {
            #"Content-type": "application/x-www-form-urlencoded" ,
            "Accept": "text/plain;charset=%s" % configure["encoding"]
        }
        try:
            for i in ret:
                params = urllib.urlencode({
                    ‘mkey‘:         i[‘key‘],
                    ‘mvalue‘:       i[‘value‘],
                    ‘uptime‘:       i[‘time‘],
                    ‘domainid‘:     ip[0],
                    ‘ip‘:           ip[1],
                    ‘encoding‘:     configure["encoding"],
                })

                logging.debug(‘http connect to:%s,params:%s‘ % (configure["auth_address"],params) )
                httpClient = httplib.HTTPConnection(configure["auth_address"],timeout=3)
                httpClient.connect()
                httpClient.request("GET", "/?%s" % params)
                #httpClient.request("GET", "?%s" % params, headers=headers)
                resp_status = httpClient.getresponse().status
                logging.debug(‘http response status:%s‘ % resp_status)
                if 200==resp_status: upload_success_list.append((i[‘uuid‘],) )
                httpClient.close()
        except Exception as e:
            logging.error("upload error:%s" % e)
        db.setToOverdue(upload_success_list)
        logging.info("upload rows: %d" % len(upload_success_list))

    def __overdue(self,db):
        db.deleOverdue()

    def run(self):
        ltime = int(time.mktime(time.localtime()))
        lastExecTime = {
            "collection":   ltime,
            "upload":       ltime,
            "overdue":      ltime
        }
        try:
            while True:
                lnow = int(time.mktime(time.localtime()))

                if lnow-lastExecTime[‘collection‘]>=self.__interval[‘collection‘]:
                    logging.info("run.collection()")
                    with MonitorDB() as db: self.__collection(db)
                    lastExecTime[‘collection‘] = lnow

                if lnow-lastExecTime[‘upload‘]>=self.__interval[‘upload‘]:
                    logging.info("run.upload()")
                    with MonitorDB() as db: self.__upload(db)
                    lastExecTime[‘upload‘] = lnow

                if lnow-lastExecTime[‘overdue‘]>=self.__interval[‘overdue‘]:
                    logging.info("run.overdue()")
                    with MonitorDB() as db: self.__overdue(db)
                    lastExecTime[‘overdue‘] = lnow

                time.sleep(1)
        except Exception as e:
            logging.error("run error:%s" % e)

if __name__=="__main__":
    pymain()


服务器端,随便写个jsp或其他什么的

比如这个node.js

var LISTEN_PORT = 80;
var HOST=‘‘;

var TEST_HTML="<html><body>node.js httpServer.</body></html>\n";

function httpserver0(){	
	var server = require(‘http‘).createServer();
	server.listen(LISTEN_PORT,HOST);	
	
	server.on(‘request‘,function(req, resp){
		console.log(req.url);
	
		resp.writeHead(200, {						
			‘Content-Type‘: ‘text/html‘
		});
		
		resp.end(TEST_HTML);
	});
	
	server.on(‘connection‘,function(socket){
		//console.log(‘new conntion.‘);
	});
	console.log(‘server running at ‘+HOST+‘:‘+LISTEN_PORT);
}

httpserver0();

E:\a\node1>node httpserver1.js
server running at :80
/?uptime=2014-08-04+15%3A17%3A58&domainid=%E5%8C%97%E4%BA%AC&encoding=UTF-8&ip=1
92.168.2.183&mvalue=http://www.mamicode.com/0.090000&mkey=cpuUse>