首页 > 代码库 > 汽车检测SIFT+BOW+SVM

汽车检测SIFT+BOW+SVM

整个执行过程如下:
1)获取一个训练数据集。

2)创建BOW训练器并获得视觉词汇。

3)采用词汇训练SVM。

4)尝试对测试图像的图像金字塔采用滑动宽口进行检测。

5)对重叠的矩形使用非极大抑制。

6)输出结果。

该项目的结构如下:

|-----car_detector
|       |--detector.py
|   |--__init__.py
|   |--non_maximum.py
|   |--pyramid.py
|   |--sliding_window.py
|-----car_sliding_windows.py

样本与代码见连接。

主程序是car_sliding_windows.py,所有的工具都包含在car_detector文件夹中。由于使用的是Python2.7,因此在文件夹中需要一个检测模块文件__init__.py。

car_detector模块的四个文件如下:

  • SVM训练的模型
  • 非极大抑制函数
  • 图像金字塔
  • 滑动窗口函数

图像金字塔 pyramid.py

#coding= utf-8
import cv2

"""
功能:缩放图像
输入:图片、尺度
输出:缩放后图像
"""
def resize(img, scaleFactor):
  return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)

"""
功能:建立图像金字塔
输入:图片、尺度、最小尺寸
输出:图像金字塔
"""
def pyramid(image, scale=1.5, minSize=(200, 80)):
  yield image
  """
  yield 的作用就是把一个函数变成一个 generator,带有 yield 的函数不再是一个普通函数,Python 解释器会将其视为一个 generator,
  调用 pyramid() 不会执行 pyramid() 函数,而是返回一个 iterable 对象!在循环执行时,每次循环都会执行 pyramid 函数内部的代码,
  执行到 yield 时,pyramid() 函数就返回一个迭代值,下次迭代时,代码从 yield 的下一条语句继续执行,
  而函数的本地变量看起来和上次中断执行前是完全一样的,于是函数继续执行,直到再次遇到 yield。
  """
  while True:
    image = resize(image, scale)
    if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
      break

    yield image

 

滑动窗口函数 sliding_window.py

#coding= utf-8
"""
功能:滑动窗口
输入:图像、步长、滑动窗大小
输出:图像窗口
"""
def sliding_window(image, step, window_size):
  for y in xrange(0, image.shape[0], step):
    for x in xrange(0, image.shape[1], step):
      yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

 

非极大抑制 non_maximum.py

这个函数对于一系列的目标候选框矩阵,按照分类器得分排序。每次将评分最高的矩形框保存,消除掉剩下的矩形框中重叠超过阈值的矩形。

#coding= utf-8
# import the necessary packages
import numpy as np

# Malisiewicz et al.
# Python port by Adrian Rosebrock
"""
功能:非极大抑制
输入:目标框、重合率
输出:最后目标框
"""
def non_max_suppression_fast(boxes, overlapThresh):
  # 如果目标框列表为空,返回空
  if len(boxes) == 0:
    return []

  # 如果目标框参数是整型,转换成浮点型
  # 这很重要,因为后面有一系列除法
  if boxes.dtype.kind == "i":
    boxes = boxes.astype("float")

  # 初始化筛选列表
  pick = []

  # 获得目标框坐标
  x1 = boxes[:,0]
  y1 = boxes[:,1]
  x2 = boxes[:,2]
  y2 = boxes[:,3]
  scores = boxes[:,4]
  # 计算所有目标框面积
  # 并将所有目标框按照score重新排列
  area = (x2 - x1 + 1) * (y2 - y1 + 1)
  idxs = np.argsort(scores)[::-1]

  # keep looping while some indexes still remain in the indexes
  # list
  while len(idxs) > 0:
    # 获得最大得分目标框索引,并放入筛选结果中
    last = len(idxs) - 1
    i = idxs[last]
    pick.append(i)

    # 获得得分最高目标框与其他目标框最大起始坐标和最小终止坐标
    xx1 = np.maximum(x1[i], x1[idxs[:last]])
    yy1 = np.maximum(y1[i], y1[idxs[:last]])
    xx2 = np.minimum(x2[i], x2[idxs[:last]])
    yy2 = np.minimum(y2[i], y2[idxs[:last]])

    # 计算最小目标框长、宽
    w = np.maximum(0, xx2 - xx1 + 1)
    h = np.maximum(0, yy2 - yy1 + 1)

    # 计算除得分最高外的所有目标框与最小目标框的重合度
    overlap = (w * h) / area[idxs[:last]]

    # 删除得分最高(已保存在筛选结果列表)、重合度大于阈值的目标框的索引
    idxs = np.delete(idxs, np.concatenate(([last],
      np.where(overlap > overlapThresh)[0])))

  # return only the bounding boxes that were picked using the
  # integer data type
  return boxes[pick].astype("int")

 

检测函数 detector.py

这个代码包含 SIFT特征提取、BOW图像词汇描述符获得、SVM分类器训练

#coding= utf-8
import cv2
import numpy as np


datapath = "./CarData/TrainImages/"
SAMPLES = 400

def path(cls,i):
    return "%s/%s%d.pgm"  % (datapath,cls,i+1)

# 定义 FLANN 匹配器函数
def get_flann_matcher():
  flann_params = dict(algorithm = 1, trees = 5)
  return cv2.FlannBasedMatcher(flann_params, {})

def get_bow_extractor(extract, match):
  return cv2.BOWImgDescriptorExtractor(extract, match)

# 创建 SIFT 特征检测器
def get_extract_detect():
  return cv2.xfeatures2d.SIFT_create(), cv2.xfeatures2d.SIFT_create()

def extract_sift(fn, extractor, detector):
  im = cv2.imread(fn,0)
  return extractor.compute(im, detector.detect(im))[1]

# 创建 BOW 训练器
def bow_features(img, extractor_bow, detector):
  return extractor_bow.compute(img, detector.detect(img))

def car_detector():
  pos, neg = "pos-", "neg-"
  detect, extract = get_extract_detect()
  matcher = get_flann_matcher()

  print "building BOWKMeansTrainer..."
  bow_kmeans_trainer = cv2.BOWKMeansTrainer(12)
  extract_bow = cv2.BOWImgDescriptorExtractor(extract, matcher)

  print "adding features to trainer"
  for i in range(8):
    print i
    bow_kmeans_trainer.add(extract_sift(path(pos,i), extract, detect))
    bow_kmeans_trainer.add(extract_sift(path(neg,i), extract, detect))

  vocabulary = bow_kmeans_trainer.cluster()
  extract_bow.setVocabulary(vocabulary)

  traindata, trainlabels = [],[]
  print "adding to train data"
  for i in range(SAMPLES):
    print i
    traindata.extend(bow_features(cv2.imread(path(pos, i), 0), extract_bow, detect))
    trainlabels.append(1)
    traindata.extend(bow_features(cv2.imread(path(neg, i), 0), extract_bow, detect))
    trainlabels.append(-1)

  # 创建 SVM 分类器
  svm = cv2.ml.SVM_create()
  svm.setType(cv2.ml.SVM_C_SVC)
  svm.setGamma(1)
  svm.setC(35) # 此参数决定分类器的训练误差和预测误差
  svm.setKernel(cv2.ml.SVM_RBF) # 核函数
  # 训练
  svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels))
  return svm, extract_bow

 

主函数 car_sliding_windows.py

#coding= utf-8
import cv2
import numpy as np
from car_detector.detector import car_detector, bow_features
from car_detector.pyramid import pyramid
from car_detector.non_maximum import non_max_suppression_fast as nms
from car_detector.sliding_window import sliding_window
import urllib

def in_range(number, test, thresh=0.2):
  return abs(number - test) < thresh

test_image = "../images/cars.jpg" # 测试样本路径
img_path = "../images/test.jpg"

urllib.urlretrieve(test_image, img_path) # 检测文件是否存在

svm, extractor = car_detector() # 提取特征训练分类器
detect = cv2.xfeatures2d.SIFT_create() #

w, h = 100, 40
img = cv2.imread(img_path)
#img = cv2.imread(test_image)

rectangles = []
counter = 1
scaleFactor = 1.25
scale = 1
font = cv2.FONT_HERSHEY_PLAIN

for resized in pyramid(img, scaleFactor):  
  scale = float(img.shape[1]) / float(resized.shape[1])
  for (x, y, roi) in sliding_window(resized, 20, (100, 40)):
    
    if roi.shape[1] != w or roi.shape[0] != h:
      continue

    try:
      bf = bow_features(roi, extractor, detect)
      _, result = svm.predict(bf)
      a, res = svm.predict(bf, flags=cv2.ml.STAT_MODEL_RAW_OUTPUT | cv2.ml.STAT_MODEL_UPDATE_MODEL)
      print "Class: %d, Score: %f, a: %s" % (result[0][0], res[0][0], res)
      score = res[0][0]
      if result[0][0] == 1:
        if score < -1.0:
          rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale)
          rectangles.append([rx, ry, rx2, ry2, abs(score)])
    except:
      pass

    counter += 1 

windows = np.array(rectangles)
boxes = nms(windows, 0.25)


for (x, y, x2, y2, score) in boxes:
  print x, y, x2, y2, score
  cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1)
  cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0))

cv2.imshow("img", img)
cv2.waitKey(0)

 

汽车检测SIFT+BOW+SVM