首页 > 代码库 > SQLmap源码分析之框架初始化(一)

SQLmap源码分析之框架初始化(一)

SQLmap是现在搞web人手一个的注入神器,不仅包含了主流数据库的SQL注入检测,而且包含提权以及后渗透模块。基于python2.x开发而成,使用方便。所以研究web安全少不了分析源码,学习代码的同时,也可以学习先进的漏洞检测技术。多的不多说,咱们来分析一下源码。

 

使用的工具如下:

IDE: sublime text
SQLmap源码:https://github.com/sqlmapproject/sqlmap
当前分析版本: 1.1.2.5

 

0x00 从入口文件开始

我们在拿到源代码以后,先拉进sublime text中然后开始从sqlmap.py的入口文件开始分析。在入口函数调用之前,首先从lib目录下引入了很多的文件用作系统的初始化操作。然后我们来看入口文件main()函数中都做了哪些事情?

def main():
    """
    Main function of sqlmap when running from command line.
    """

    try:
        checkEnvironment()

        setPaths(modulePath())

        banner()

        # Store original command line options for possible later restoration
        cmdLineOptions.update(cmdLineParser().__dict__)

        initOptions(cmdLineOptions)

        if hasattr(conf, "api"):
            # heavy imports
            from lib.utils.api import StdDbOut
            from lib.utils.api import setRestAPILog

            # Overwrite system standard output and standard error to write
            # to an IPC database
            sys.stdout = StdDbOut(conf.taskid, messagetype="stdout")
            sys.stderr = StdDbOut(conf.taskid, messagetype="stderr")
            setRestAPILog()

        conf.showTime = True
        dataToStdout("[!] legal disclaimer: %s\n\n" % LEGAL_DISCLAIMER, forceOutput=True)
        dataToStdout("[*] starting at %s\n\n" % time.strftime("%X"), forceOutput=True)

        init()

        if conf.profile:
            profile()
        elif conf.smokeTest:
            smokeTest()
        elif conf.liveTest:
            liveTest()
        else:
            try:
                start()
            except thread.error as ex:
                if "can‘t start new thread" in getSafeExString(ex):
                    errMsg = "unable to start new threads. Please check OS (u)limits"
                    logger.critical(errMsg)
                    raise SystemExit
                else:
                    raise

    except SqlmapUserQuitException:
        errMsg = "user quit"
        try:
            logger.error(errMsg)
        except KeyboardInterrupt:
            pass

    except (SqlmapSilentQuitException, bdb.BdbQuit):
        pass

    except SqlmapShellQuitException:
        cmdLineOptions.sqlmapShell = False

    except SqlmapBaseException as ex:
        errMsg = getSafeExString(ex)
        try:
            logger.critical(errMsg)
        except KeyboardInterrupt:
            pass
        raise SystemExit

    except KeyboardInterrupt:
        print

        errMsg = "user aborted"
        try:
            logger.error(errMsg)
        except KeyboardInterrupt:
            pass

    except EOFError:
        print
        errMsg = "exit"

        try:
            logger.error(errMsg)
        except KeyboardInterrupt:
            pass

    except SystemExit:
        pass

    except:
        print
        errMsg = unhandledExceptionMessage()
        excMsg = traceback.format_exc()

        try:
            if not checkIntegrity():
                errMsg = "code integrity check failed (turning off automatic issue creation). "
                errMsg += "You should retrieve the latest development version from official GitHub "
                errMsg += "repository at ‘%s‘" % GIT_PAGE
                logger.critical(errMsg)
                print
                dataToStdout(excMsg)
                raise SystemExit

            elif "tamper/" in excMsg:
                logger.critical(errMsg)
                print
                dataToStdout(excMsg)
                raise SystemExit

            elif "MemoryError" in excMsg:
                errMsg = "memory exhaustion detected"
                logger.error(errMsg)
                raise SystemExit

            elif any(_ in excMsg for _ in ("No space left", "Disk quota exceeded")):
                errMsg = "no space left on output device"
                logger.error(errMsg)
                raise SystemExit

            elif all(_ in excMsg for _ in ("No such file", "_‘", "self.get_prog_name()")):
                errMsg = "corrupted installation detected (‘%s‘). " % excMsg.strip().split(\n)[-1]
                errMsg += "You should retrieve the latest development version from official GitHub "
                errMsg += "repository at ‘%s‘" % GIT_PAGE
                logger.error(errMsg)
                raise SystemExit

            elif "Read-only file system" in excMsg:
                errMsg = "output device is mounted as read-only"
                logger.error(errMsg)
                raise SystemExit

            elif "OperationalError: disk I/O error" in excMsg:
                errMsg = "I/O error on output device"
                logger.error(errMsg)
                raise SystemExit

            elif "_mkstemp_inner" in excMsg:
                errMsg = "there has been a problem while accessing temporary files"
                logger.error(errMsg)
                raise SystemExit

            elif "can‘t start new thread" in excMsg:
                errMsg = "there has been a problem while creating new thread instance. "
                errMsg += "Please make sure that you are not running too many processes"
                if not IS_WIN:
                    errMsg += " (or increase the ‘ulimit -u‘ value)"
                logger.error(errMsg)
                raise SystemExit

            elif all(_ in excMsg for _ in ("pymysql", "configparser")):
                errMsg = "wrong initialization of pymsql detected (using Python3 dependencies)"
                logger.error(errMsg)
                raise SystemExit

            elif "bad marshal data (unknown type code)" in excMsg:
                match = re.search(r"\s*(.+)\s+ValueError", excMsg)
                errMsg = "one of your .pyc files are corrupted%s" % (" (‘%s‘)" % match.group(1) if match else "")
                errMsg += ". Please delete .pyc files on your system to fix the problem"
                logger.error(errMsg)
                raise SystemExit

            elif "valueStack.pop" in excMsg and kb.get("dumpKeyboardInterrupt"):
                raise SystemExit

            for match in re.finditer(rFile "(.+?)", line, excMsg):
                file_ = match.group(1)
                file_ = os.path.relpath(file_, os.path.dirname(__file__))
                file_ = file_.replace("\\", /)
                file_ = re.sub(r"\.\./", /, file_).lstrip(/)
                excMsg = excMsg.replace(match.group(1), file_)

            errMsg = maskSensitiveData(errMsg)
            excMsg = maskSensitiveData(excMsg)

            if hasattr(conf, "api"):
                logger.critical("%s\n%s" % (errMsg, excMsg))
            else:
                logger.critical(errMsg)
                kb.stickyLevel = logging.CRITICAL
                dataToStdout(excMsg)
                createGithubIssue(errMsg, excMsg)

        except KeyboardInterrupt:
            pass

    finally:
        kb.threadContinue = False

        if conf.get("showTime"):
            dataToStdout("\n[*] shutting down at %s\n\n" % time.strftime("%X"), forceOutput=True)

        kb.threadException = True

        if kb.get("tempDir"):
            for prefix in (MKSTEMP_PREFIX.IPC, MKSTEMP_PREFIX.TESTING, MKSTEMP_PREFIX.COOKIE_JAR, MKSTEMP_PREFIX.BIG_ARRAY):
                for filepath in glob.glob(os.path.join(kb.tempDir, "%s*" % prefix)):
                    try:
                        os.remove(filepath)
                    except OSError:
                        pass
            if not filter(None, (filepath for filepath in glob.glob(os.path.join(kb.tempDir, *)) if not any(filepath.endswith(_) for _ in (.lock, .exe, _)))):
                shutil.rmtree(kb.tempDir, ignore_errors=True)

        if conf.get("hashDB"):
            try:
                conf.hashDB.flush(True)
            except KeyboardInterrupt:
                pass

        if cmdLineOptions.get("sqlmapShell"):
            cmdLineOptions.clear()
            conf.clear()
            kb.clear()
            main()

        if hasattr(conf, "api"):
            try:
                conf.databaseCursor.disconnect()
            except KeyboardInterrupt:
                pass

        if conf.get("dumper"):
            conf.dumper.flush()

        # short delay for thread finalization
        try:
            _ = time.time()
            while threading.activeCount() > 1 and (time.time() - _) > THREAD_FINALIZATION_TIMEOUT:
                time.sleep(0.01)
        except KeyboardInterrupt:
            pass
        finally:
            # Reference: http://stackoverflow.com/questions/1635080/terminate-a-multi-thread-python-program
            if threading.activeCount() > 1:
                os._exit(0)

我们可以看到这里,首先调用了checkEnvironment()函数,根据名字我们知道这个函数的作用是检测环境。我们跟进来看这个函数:

def checkEnvironment():
    try:
        os.path.isdir(modulePath())
    except UnicodeEncodeError:
        errMsg = "your system does not properly handle non-ASCII paths. "
        errMsg += "Please move the sqlmap‘s directory to the other location"
        logger.critical(errMsg)
        raise SystemExit

    if distutils.version.LooseVersion(VERSION) < distutils.version.LooseVersion("1.0"):
        errMsg = "your runtime environment (e.g. PYTHONPATH) is "
        errMsg += "broken. Please make sure that you are not running "
        errMsg += "newer versions of sqlmap with runtime scripts for older "
        errMsg += "versions"
        logger.critical(errMsg)
        raise SystemExit

    # Patch for pip (import) environment
    if "sqlmap.sqlmap" in sys.modules:
        for _ in ("cmdLineOptions", "conf", "kb"):
            globals()[_] = getattr(sys.modules["lib.core.data"], _)

        for _ in ("SqlmapBaseException", "SqlmapShellQuitException", "SqlmapSilentQuitException", "SqlmapUserQuitException"):
            globals()[_] = getattr(sys.modules["lib.core.exception"], _)

调用了module()函数并且判断是否是一个正确的路径,如果不是的话,那么将会打印错误信息并且抛出一个异常终止程序继续运行。

我们继续来看module()函数中做了一些什么:

def modulePath():
    """
    This will get us the program‘s directory, even if we are frozen
    using py2exe
    """

    try:
        _ = sys.executable if weAreFrozen() else __file__  #如果用py2exe封装,那么_为python的绝对路径否则就是当前文件名也就是sqlmap.py
    except NameError:
        _ = inspect.getsourcefile(modulePath)

    return getUnicode(os.path.dirname(os.path.realpath(_)), encoding=sys.getfilesystemencoding() or UNICODE_ENCODING)

我们在注释中可以看到是为了获取程序所在的目录。为了防止乱码,返回unicode编码的路径。

getUnicode()函数在这里:sqlmap\lib\core\common.py。这里就不贴代码了。

然后checkEnvironment()判断版本。接着判断sqlmap.sqlmap是否已经加载,如果加载,那么就获取到cmdLineOptions, conf, kb几个属性并且把它们作为全局变量。

接下来,setPaths(modulePath())设置了一下系统各个部分的绝对路径,并且判断.txt, .xml, .zip为扩展名的文件是否存在并且是否可读。

这里我们来思考一个问题,为什么全局要用绝对路径呢?做过开发的同学就知道了,用绝对路径可以避免很多不必要的麻烦,比如说包含文件时候,用相对路径,互相包含,最后越搞越乱,一旦换了一个目录,就会出问题。也不方便日后的维护。用绝对路径,所有的调用全部放在主入口文件,这样单一入口的原则使得系统不仅调用方便,而且看起来还紧凑有序。

然后就是打印banner的信息。这里还有一个值得注意的点就是AttribDict这个数据类型。是这样定义的:

 

class AttribDict(dict):
    """
    This class defines the sqlmap object, inheriting from Python data
    type dictionary.

    >>> foo = AttribDict()
    >>> foo.bar = 1
    >>> foo.bar
    1
    """

    def __init__(self, indict=None, attribute=None):
        if indict is None:
            indict = {}

        # Set any attributes here - before initialisation
        # these remain as normal attributes
        self.attribute = attribute
        dict.__init__(self, indict)
        self.__initialised = True

        # After initialisation, setting attributes
        # is the same as setting an item

    def __getattr__(self, item):
        """
        Maps values to attributes
        Only called if there *is NOT* an attribute with this name
        """

        try:
            return self.__getitem__(item)
        except KeyError:
            raise AttributeError("unable to access item ‘%s‘" % item)

    def __setattr__(self, item, value):
        """
        Maps attributes to values
        Only if we are initialised
        """

        # This test allows attributes to be set in the __init__ method
        if "_AttribDict__initialised" not in self.__dict__:
            return dict.__setattr__(self, item, value)

        # Any normal attributes are handled normally
        elif item in self.__dict__:
            dict.__setattr__(self, item, value)

        else:
            self.__setitem__(item, value)

    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, dict):
        self.__dict__ = dict

    def __deepcopy__(self, memo):
        retVal = self.__class__()
        memo[id(self)] = retVal

        for attr in dir(self):
            if not attr.startswith(_):
                value = getattr(self, attr)
                if not isinstance(value, (types.BuiltinFunctionType, types.FunctionType, types.MethodType)):
                    setattr(retVal, attr, copy.deepcopy(value, memo))

        for key, value in self.items():
            retVal.__setitem__(key, copy.deepcopy(value, memo))

        return retVal

继承了内置的dict,并且重写了一些方法。然后就可以这样去访问键值对:var.key。感慨 一下,好牛!!!

 

0x01  获取命令行参数选项

这里主要使用了optparse这个函数库。python中十分好用的一个命令行工具。具体可以参考这里:https://docs.python.org/2/library/optparse.html

首先将获取到的命令行参数选项进行判断和拆分以后转变成dict键值对的形式存入到cmdLineOptions。然后开始依据传入的参数进行后续操作。

在获取命令行参数的时候,有很多dirty hack写法,感兴趣可以好好品味。这个层次的认知来源于对底层库函数的熟悉。再次感慨,好牛!!!

这里重要的一个操作是_mergeOptions(),主要的作用是将配置项中的参数和命令行获得的参数选项以及缺省选项进行合并。函数是这么写的:

def _mergeOptions(inputOptions, overrideOptions):
    """
    Merge command line options with configuration file and default options.

    @param inputOptions: optparse object with command line options.
    @type inputOptions: C{instance}
    """

    if inputOptions.pickledOptions:
        try:
            unpickledOptions = base64unpickle(inputOptions.pickledOptions, unsafe=True)

            if type(unpickledOptions) == dict:
                unpickledOptions = AttribDict(unpickledOptions)

            _normalizeOptions(unpickledOptions)

            unpickledOptions["pickledOptions"] = None
            for key in inputOptions:
                if key not in unpickledOptions:
                    unpickledOptions[key] = inputOptions[key]

            inputOptions = unpickledOptions
        except Exception, ex:
            errMsg = "provided invalid value ‘%s‘ for option ‘--pickled-options‘" % inputOptions.pickledOptions
            errMsg += " (%s)" % repr(ex)
            raise SqlmapSyntaxException(errMsg)

    if inputOptions.configFile:
        configFileParser(inputOptions.configFile)

    if hasattr(inputOptions, "items"):
        inputOptionsItems = inputOptions.items()
    else:
        inputOptionsItems = inputOptions.__dict__.items()

    for key, value in inputOptionsItems:
        if key not in conf or value not in (None, False) or overrideOptions:
            conf[key] = value

    if not hasattr(conf, "api"):
        for key, value in conf.items():
            if value is not None:
                kb.explicitSettings.add(key)

    for key, value in defaults.items():
        if hasattr(conf, key) and conf[key] is None:
            conf[key] = value

    lut = {}
    for group in optDict.keys():
        lut.update((_.upper(), _) for _ in optDict[group])

    envOptions = {}
    for key, value in os.environ.items():
        if key.upper().startswith(SQLMAP_ENVIRONMENT_PREFIX):
            _ = key[len(SQLMAP_ENVIRONMENT_PREFIX):].upper()
            if _ in lut:
                envOptions[lut[_]] = value

    if envOptions:
        _normalizeOptions(envOptions)
        for key, value in envOptions.items():
            conf[key] = value

    mergedOptions.update(conf)

然后我们来调试一下,打印一下最后的mergedOptions,结果如下:

{code: None, getUsers: None, resultsFilename: None, excludeSysDbs: None, ignoreTimeouts: None, skip: None, db: None, prefix: None, osShell: None, googlePage: 1, query: None, getComments: None, randomAgent: None, testSkip: None, authType: None, getPasswordHashes: None, parameters: {}, predictOutput: None, wizard: None, stopFail: None, forms: None, uChar: None, authUsername: None, pivotColumn: None, dropSetCookie: None, dbmsCred: None, tests: [], paramExclude: None, risk: 1, sqlFile: None, rParam: None, getCurrentUser: None, notString: None, getRoles: None, getPrivileges: None, testParameter: None, tbl: None, offline: None, trafficFile: None, osSmb: None, level: 1, dnsDomain: None, skipStatic: None, secondOrder: None, hashDBFile: None, method: None, skipWaf: None, osBof: None, hostname: None, firstChar: None, torPort: None, wFile: None, binaryFields: None, checkTor: None, commonTables: None, direct: None, paramDict: {}, proxyList: None, titles: None, getSchema: None, timeSec: 5, paramDel: None, safeReqFile: None, port: None, getColumns: None, headers: None, crawlExclude: None, authCred: None, boundaries: [], loadCookies: None, showVersion: None, outputDir: None, tmpDir: None, disablePrecon: None, murphyRate: None, invalidLogical: None, getCurrentDb: None, hexConvert: None, proxyFile: None, answers: None, resultsFP: None, host: None, dependencies: None, cookie: None, dbmsHandler: None, path: None, alert: None, optimize: None, safeUrl: None, limitStop: None, search: None, uFrom: None, requestFile: None, noCast: None, testFilter: None, eta: None, dumpPath: None, csrfToken: None, threads: 1, logFile: None, os: None, col: None, proxy: None, proxyCred: None, verbose: 1, crawlDepth: None, updateAll: None, privEsc: None, forceDns: None, getAll: None, cj: None, hpp: None, tmpPath: None, header: None, url: uwww.baidu.com, invalidBignum: None, regexp: None, getDbs: None, httpHeaders: [], outputPath: None, freshQueries: None, uCols: None, smokeTest: None, ignoreProxy: None, regData: None, udfInject: None, invalidString: None, tor: None, forceSSL: None, ignore401: None, beep: None, noEscape: None, configFile: None, ipv6: False, scope: None, scheme: None, authFile: None, dbmsConnector: None, torType: SOCKS5, regVal: None, string: None, hashDB: None, mnemonics: None, skipUrlEncode: None, referer: None, agent: None, regType: None, purgeOutput: None, retries: 3, wFileType: None, extensiveFp: None, dumpTable: None, advancedHelp: None, batch: None, limitStart: None, flushSession: None, osCmd: None, suffix: None, smart: None, regDel: None, shLib: None, sitemapUrl: None, identifyWaf: None, msfPath: None, dumpAll: None, getHostname: None, sessionFile: None, delay: 0, disableColoring: None, getTables: None, safeFreq: None, liveTest: None, multipleTargets: False, lastChar: None, authPassword: None, nullConnection: None, dbms: None, forceThreads: None, dumpWhere: None, tamper: None, ignoreRedirects: None, charset: None, runCase: None, regKey: None, osPwn: None, evalCode: None, cleanup: None, csrfUrl: None, isDba: None, getBanner: None, profile: None, regRead: None, bulkFile: None, csvDel: ,, excludeCol: None, dumpFormat: CSV, safePost: None, rFile: None, user: None, parseErrors: None, getCount: None, dFile: None, data: None, regAdd: None, dummy: None, trafficFP: None, dnsServer: None, sqlmapShell: None, mobile: None, googleDork: None, timeout: 30, pickledOptions: None, saveConfig: None, sqlShell: None, pageRank: None, tech: BEUSTQ, textOnly: None, cookieDel: None, commonColumns: None, keepAlive: None}

接着对是否调用api,以及传入的api参数进行处理。

 

0x02 命令行参数处理

 

然后就是十分核心的参数处理了,调用init()函数进行处理:

def init():
    """
    Set attributes into both configuration and knowledge base singletons
    based upon command line and configuration file options.
    """

    _useWizardInterface()
    setVerbosity()
    _saveConfig()
    _setRequestFromFile()
    _cleanupOptions()
    _cleanupEnvironment()
    _dirtyPatches()
    _purgeOutput()
    _checkDependencies()
    _createTemporaryDirectory()
    _basicOptionValidation()
    _setProxyList()
    _setTorProxySettings()
    _setDNSServer()
    _adjustLoggingFormatter()
    _setMultipleTargets()
    _setTamperingFunctions()
    _setWafFunctions()
    _setTrafficOutputFP()
    _resolveCrossReferences()
    _checkWebSocket()

    parseTargetUrl()
    parseTargetDirect()

    if any((conf.url, conf.logFile, conf.bulkFile, conf.sitemapUrl, conf.requestFile, conf.googleDork, conf.liveTest)):
        _setHTTPTimeout()
        _setHTTPExtraHeaders()
        _setHTTPCookies()
        _setHTTPReferer()
        _setHTTPHost()
        _setHTTPUserAgent()
        _setHTTPAuthentication()
        _setHTTPHandlers()
        _setDNSCache()
        _setSocketPreConnect()
        _setSafeVisit()
        _doSearch()
        _setBulkMultipleTargets()
        _setSitemapTargets()
        _checkTor()
        _setCrawler()
        _findPageForms()
        _setDBMS()
        _setTechnique()

    _setThreads()
    _setOS()
    _setWriteFile()
    _setMetasploit()
    _setDBMSAuthentication()
    loadBoundaries()
    loadPayloads()
    _setPrefixSuffix()
    update()
    _loadQueries()

_useWizardInterface()这个函数是为了给初学者提供一个友好的界面。

运行如下:

技术分享

交互式的输入参数。这样你可以跟着这个提示来进行注入测试。

然后是setVerbosity()主要目的是设置了报错的等级,函数如下:

def setVerbosity():
    """
    This function set the verbosity of sqlmap output messages.
    """

    if conf.verbose is None:
        conf.verbose = 1

    conf.verbose = int(conf.verbose)

    if conf.verbose == 0:
        logger.setLevel(logging.ERROR)
    elif conf.verbose == 1:
        logger.setLevel(logging.INFO)
    elif conf.verbose > 2 and conf.eta:
        conf.verbose = 2
        logger.setLevel(logging.DEBUG)
    elif conf.verbose == 2:
        logger.setLevel(logging.DEBUG)
    elif conf.verbose == 3:
        logger.setLevel(CUSTOM_LOGGING.PAYLOAD)
    elif conf.verbose == 4:
        logger.setLevel(CUSTOM_LOGGING.TRAFFIC_OUT)
    elif conf.verbose >= 5:
        logger.setLevel(CUSTOM_LOGGING.TRAFFIC_IN)

_saveConfig()的作用是将程序运行过程中的配置选项保存到一个文件中。命令使用是这样的:

python ./sqlmap.py -u "http://vultest.com/index.php?id=1" --save save.txt

然后会生成一个文件进行保存。生成的东西就不贴了。整个东西够大的了,以免影响阅读体验。

接下来是_setRequestFromFile()函数,整个函数的作用是处理-r 选项,也就是处理传入一个文件。这个文件可以是burpsuite抓取到的数据包。使用方法如下:

python ./sqlmap.py -r test.txt

这里有个python的小bug,如果传入的是‘~‘,将会出现问题。bug地址:http://bugs.python.org/issue18171

为了处理这个问题,我们用os模块中路径处理方法os.path.expanduser()进行处理。它的作用是将‘~‘替换成用户目录。运行如下:

>>> os.path.expanduser(~)
C:\\Users\\10920
>>>

然后调用_parseBurpLog()方法来处理burp抓取到的数据包,实现如下:

    def _parseBurpLog(content):
        """
        Parses burp logs
        """

        if not re.search(BURP_REQUEST_REGEX, content, re.I | re.S):
            if re.search(BURP_XML_HISTORY_REGEX, content, re.I | re.S):
                reqResList = []
                for match in re.finditer(BURP_XML_HISTORY_REGEX, content, re.I | re.S):
                    port, request = match.groups()
                    try:
                        request = request.decode("base64")
                    except binascii.Error:
                        continue
                    _ = re.search(r"%s:.+" % re.escape(HTTP_HEADER.HOST), request)
                    if _:
                        host = _.group(0).strip()
                        if not re.search(r":\d+\Z", host):
                            request = request.replace(host, "%s:%d" % (host, int(port)))
                    reqResList.append(request)
            else:
                reqResList = [content]
        else:
            reqResList = re.finditer(BURP_REQUEST_REGEX, content, re.I | re.S)

        for match in reqResList:
            request = match if isinstance(match, basestring) else match.group(0)
            request = re.sub(r"\A[^\w]+", "", request)

            schemePort = re.search(r"(http[\w]*)\:\/\/.*?\:([\d]+).+?={10,}", request, re.I | re.S)

            if schemePort:
                scheme = schemePort.group(1)
                port = schemePort.group(2)
            else:
                scheme, port = None, None

            if not re.search(r"^[\n]*(%s).*?\sHTTP\/" % "|".join(getPublicTypeMembers(HTTPMETHOD, True)), request, re.I | re.M):
                continue

            if re.search(r"^[\n]*%s.*?\.(%s)\sHTTP\/" % (HTTPMETHOD.GET, "|".join(CRAWL_EXCLUDE_EXTENSIONS)), request, re.I | re.M):
                continue

            getPostReq = False
            url = None
            host = None
            method = None
            data = None
            cookie = None
            params = False
            newline = None
            lines = request.split(\n)
            headers = []

            for index in xrange(len(lines)):
                line = lines[index]

                if not line.strip() and index == len(lines) - 1:
                    break

                newline = "\r\n" if line.endswith(\r) else \n
                line = line.strip(\r)
                match = re.search(r"\A(%s) (.+) HTTP/[\d.]+\Z" % "|".join(getPublicTypeMembers(HTTPMETHOD, True)), line) if not method else None

                if len(line.strip()) == 0 and method and method != HTTPMETHOD.GET and data is None:
                    data = ""
                    params = True

                elif match:
                    method = match.group(1)
                    url = match.group(2)

                    if any(_ in line for _ in (?, =, CUSTOM_INJECTION_MARK_CHAR)):
                        params = True

                    getPostReq = True

                # POST parameters
                elif data is not None and params:
                    data += "%s%s" % (line, newline)

                # GET parameters
                elif "?" in line and "=" in line and ": " not in line:
                    params = True

                # Headers
                elif re.search(r"\A\S+:", line):
                    key, value = line.split(":", 1)
                    value = value.strip().replace("\r", "").replace("\n", "")

                    # Cookie and Host headers
                    if key.upper() == HTTP_HEADER.COOKIE.upper():
                        cookie = value
                    elif key.upper() == HTTP_HEADER.HOST.upper():
                        if :// in value:
                            scheme, value = value.split(://)[:2]
                        splitValue = value.split(":")
                        host = splitValue[0]

                        if len(splitValue) > 1:
                            port = filterStringValue(splitValue[1], "[0-9]")

                    # Avoid to add a static content length header to
                    # headers and consider the following lines as
                    # POSTed data
                    if key.upper() == HTTP_HEADER.CONTENT_LENGTH.upper():
                        params = True

                    # Avoid proxy and connection type related headers
                    elif key not in (HTTP_HEADER.PROXY_CONNECTION, HTTP_HEADER.CONNECTION):
                        headers.append((getUnicode(key), getUnicode(value)))

                    if CUSTOM_INJECTION_MARK_CHAR in re.sub(PROBLEMATIC_CUSTOM_INJECTION_PATTERNS, "", value or ""):
                        params = True

            data = data.rstrip("\r\n") if data else data

            if getPostReq and (params or cookie):
                if not port and isinstance(scheme, basestring) and scheme.lower() == "https":
                    port = "443"
                elif not scheme and port == "443":
                    scheme = "https"

                if conf.forceSSL:
                    scheme = "https"
                    port = port or "443"

                if not host:
                    errMsg = "invalid format of a request file"
                    raise SqlmapSyntaxException, errMsg

                if not url.startswith("http"):
                    url = "%s://%s:%s%s" % (scheme or "http", host, port or "80", url)
                    scheme = None
                    port = None

                if not(conf.scope and not re.search(conf.scope, url, re.I)):
                    if not kb.targets or url not in addedTargetUrls:
                        kb.targets.add((url, conf.method or method, data, cookie, tuple(headers)))
                        addedTargetUrls.add(url)

 

SQLmap源码分析之框架初始化(一)