首页 > 代码库 > SVM_python

SVM_python

#coding:utf-8
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
def loadData(filename):
    fr=open(filename)
    dataset=[]
    datalabel=[]
    for line in fr.readlines():
        arrline=line.strip().split("\t")
        dataset.append([float(arrline[0]),float(arrline[1])])
        datalabel.append(float(arrline[-1]))
    return dataset,datalabel
def selectJ(i,m):
    j=i
    while(j==i):
        j=int(random.uniform(0,m))
    return j
def clipalpha(L,H,al):
    if al>H:
        al=H
    if L>al:
        al=L
    return al
def smo(dataset,datalabel,C,toler,maxiter):
    datamat=mat(dataset)
    labelmat=mat(datalabel).transpose()
    m,n=shape(datamat)
    b=0.0
    alpha=mat(zeros((m,1)))
    iter=0
    while(iter<maxiter):
        alphachange=0
        for i in range(m):
            di=float(multiply(alpha,labelmat).T*(datamat*datamat[i,:].T))+b#对最小化函数求导后得到关于拉格朗日乘子alpha的表达式,将w带入
            #决策函数后得到sum(alpha*yi*K(x,xi))+b,此函数得到实际输出值,这里要注意的是每个样本对应一个拉格朗日乘子
            ei=di-float(labelmat[i])#在该拉格朗日乘子下的误差
            if ((labelmat[i]*ei<-toler) and (alpha[i]<C)) or((labelmat[i]*ei>toler) and (alpha[i]>0)):
                j=selectJ(i,m)#选择另一个拉格朗日乘子
                dj=float(multiply(alpha,labelmat).T*(datamat*datamat[j,:].T))+b
                ej=dj-float(labelmat[j])
                Iold=alpha[i].copy()
                Jold=alpha[j].copy()
                if(labelmat[i]!=labelmat[j]):#这里的约束条件很重要,将拉格朗日乘子作为坐标轴可以很容易推出
                    L=max(0,alpha[j]-alpha[i])
                    H=min(C,C+alpha[j]-alpha[i])
                else:
                    L=max(0,alpha[i]+alpha[j]-C)
                    H=min(C,alpha[i]+alpha[j])
                if L==H:continue
                #eta是由有关alpha[j]的二次函数最小值的推到过程中得到的,其表达式为K(xi,xi)+K(xj,xj)-2K(xi,xj),这是求最大值,反过来就是求最小值这里K表示核函数
                eta=2.0*datamat[i,:]*datamat[j,:].T-datamat[i,:]*datamat[i,:].T-datamat[j,:]*datamat[j,:].T
                #eta>=0是,核函数矩阵不是非半负定矩阵,所以无解
                if eta>=0:continue
                #alpha[j]由关于它的二次函数得到,推理过程复杂
                alpha[j]-=labelmat[j]*(ei-ej)/eta
                #选择满足约束条件的new alpha[j]
                alpha[j]=clipalpha(L,H,alpha[j])
                if (abs(alpha[j]-Jold)<0.0001):continue
                #newalpha[i]*yi+newalpha[j]yj=oldalpha[j]*oldalpha[i]保持约束不变性
                alpha[i]+=labelmat[i]*labelmat[j]*(Jold-alpha[j])
                bi=b-ei-labelmat[i]*datamat[i,:]*datamat[i,:].T*(Iold-alpha[i])-labelmat[j]*datamat[j,:]*datamat[i,:].T*(Jold-alpha[j])
                bj=b-ej-labelmat[i]*datamat[i,:]*datamat[j,:].T*(Iold-alpha[i])-labelmat[j]*datamat[j,:]*datamat[j,:].T*(Jold-alpha[j])
                if (0<alpha[i]) and (C>alpha[i]):b=bi
                elif (0<alpha[j]) and(C>alpha[j]):b=bj
                else:b=(bi+bj)/2.0
                alphachange+=1
        if (alphachange==0):iter+=1
        else:iter=0
    return b,alpha
def calcw(alpha,data,label):
    datamat=mat(data)
    labelmat=mat(label).transpose()
    m,n=shape(datamat)
    w=zeros((n,1))
    for i in range(m):
        w+=multiply(alpha[i]*labelmat[i],datamat[i,:].T)
    return w
def classifier(w,b,intdata):
    val=intdata*mat(w)+b
    if val>0:
        return 1
    else:
        return -1
data,label=loadData("testSet.txt")
b,alpha=smo(data,label,10,0.001,50)
supervec=[]
w=calcw(alpha,data,label)
x=array([arange(-2,12,0.1)])
y=(-b-w[0]*x)/w[1]
for i in range(len(data)):
    if alpha[i]>0.0: supervec.append(data[i])
sx=[]
sy=[]
for j in range(len(supervec)):
    sx.append(supervec[j][0])
    sy.append(supervec[j][1])
xdata=[]
ydata=[]
for i in range(len(data)):
    xdata.append(data[i][0])
    ydata.append(data[i][1])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xdata,ydata,c=green)
ax.scatter(sx,sy,c=red)
ax.plot(x[0],y.A[0])
plt.show()
技术分享

 

SVM_python