首页 > 代码库 > python 增量同步远程文件夹

python 增量同步远程文件夹

因为发的时候,有些敏感信息没去除,所以大家发文的时候,切记要过滤。不然抓取的很厉害。

这个脚本的作用是同步远程机器的少量log到一台机器上,然后通过logstash过滤显示

犯了好几个错误

  1. local写错locals,locals是python特有定义变量的,不能重名

  2. sftp老报错,IOError: [Errno 2] No such file,这个是因为没有创建本地目录

  3. 第一次全量同步,第二次同步五分钟之内修改的文件

  4. 特别对列表的处理,用strip去除\n

  5. read()和readlines的区别,其实python基础编程类书籍有讲到

演示

cat filelist

x.x.x.x::password::tomcat::/home/python/::/opt/logs/::

#!/usr/bin/python   
import pexpect
import paramiko  
import os
import sys
import time
import multiprocessing
import datetime
import crash_on_ipy
from stat import S_ISDIR
  
ip_list = []
#room_id = sys.argv[1]


class run_cmd():
      def __init__(self,hostname=None,password=None,username=None,port=None,echo_cmd=None):
          #threading.Thread.__init__(self)
          self.hostname=hostname
          self.password=password
          self.username=username
          self.port=port
          self.echo_cmd=echo_cmd
          #self.thread_stop=False
      def run(self):
          paramiko.util.log_to_file(‘paramiko.log‘)
          s=paramiko.SSHClient()
          s.set_missing_host_key_policy(paramiko.AutoAddPolicy())
          s.connect(hostname = self.hostname,username=self.username, password=self.password)
          stdin,stdout,stderr=s.exec_command(self.echo_cmd)
          return stdout.readlines()
          s.close()
      def stop(self):
           self.thread_stop=True


class get_thread():
    def __init__(self,hostname,password,username,port=None):
   #def __init__(self,hostname,username=‘root‘,key_file=None,password=None): 
   #def __init__(self,hostname=None,password=None,username=None,port=None,local_dir=None,remote_dir=None):
        self.hostname = hostname
        self.username = username
        self.password = password
        self.scp = paramiko.SSHClient()
        self.scp.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        self.scp = paramiko.Transport((hostname, 22))
        self.scp.connect(username=username, password=password)
        self.sftp = paramiko.SFTPClient.from_transport(self.scp)
    def _walk_remote(self, dirpath):
        dirnames = []
        filenames = []

        for fd in self.sftp.listdir_attr(dirpath):
            if S_ISDIR(fd.st_mode):
                dirnames.append(fd.filename)
            else:
                filenames.append(fd.filename)
        yield dirpath, dirnames, filenames

        for dirname in dirnames:
            new_dirpath = os.path.join(dirpath, dirname)
            # yield from self._walk_remote(new_dirpath)
            for walk in self._walk_remote(new_dirpath):
                yield walk

    def getall(self,local,remote):
        
        st_mode = self.sftp.stat(remote).st_mode
        if not S_ISDIR(st_mode):
            filename = os.path.basename(remote)
            self.sftp.get(remote, os.path.join(local, filename))
        else:
            parent, child = os.path.split(remote)

            for dirpath, dirnames, filenames in self._walk_remote(remote):
                dirpath = dirpath.replace(parent, ‘.‘)
                parentc = os.path.join(local,dirpath)
                if not os.path.exists(parentc):
                  os.makedirs(parentc)
                for dirname in dirnames:
                    try:
                        os.makedirs(os.path.join(local, dirpath, dirname))
                    except:
                        pass

                for filename in filenames:
                    localpath = os.path.join(local, dirpath, filename)
                    remotepath = os.path.join(parent, dirpath, filename)
                    self.sftp.get(remotepath, localpath)
        self.scp.close()
if __name__==‘__main__‘:
    port = 22
    now = datetime.datetime.now()
    strdatetime = now.strftime("%Y-%m-%d")
    year=strdatetime.split(‘-‘)[0]
    mon=strdatetime.split(‘-‘)[1]
    day=strdatetime.split(‘-‘)[2]
    Datenow1= year + "/" + mon + "/" + day + "/"
    Datenow= year + "/" + mon
    print "-"*50
    f = file(‘/home/python/filelist‘,‘r‘)
    c = f.readlines()
    for x in c:
        hostname = x.split(‘::‘)[0]
        password = x.split(‘::‘)[1]
        username = x.split(‘::‘)[2]
        local= x.split(‘::‘)[3].strip(‘\n‘)
        remotes = x.split(‘::‘)[4].strip(‘\n‘)
        localz=local + "/" + mon + "/" + day
        if remotes.endswith(‘/‘):
            remote1 = remotes + Datenow
            remote2 = remotes + Datenow1
        else:
            remote3 = remotes
        if not os.path.exists(localz):
            remote = remote1
            getthread=get_thread(hostname,password,username)
            getthread.getall(local,remote)
        else:
            remote = remote2
            echo_cmd=‘/bin/find %s -maxdepth 1 -type d -mmin -5‘ % (remote)
            cmd_thread=run_cmd(hostname,password,username,port,echo_cmd)
            result=cmd_thread.run()
            del result[0]
            for item in result:
                print str(item)
                items = item.strip(‘\n‘)
                getthread=get_thread(hostname,password,username)
                getthread.getall(localz,items)
    f.close()

执行time python test.py

最主要可以增量五分钟修改的,节约时间。

本文出自 “人,要有自己的想法” 博客,谢绝转载!

python 增量同步远程文件夹