首页 > 代码库 > BackTrace for EditDistace

BackTrace for EditDistace

#!/usr/bin/env python

#encoding=gbk
import sys
"""
1. Set n to be the length of s.
   Set m to be the length of t.
   If n = 0, return m and exit.
   If m = 0, return n and exit.
 
2. Construct a matrix containing 0..m rows and 0..n columns.
   Initialize the first row to 0..n.
   Initialize the first column to 0..m.
 
3. Examine each character of s (i from 1 to n).
 
4. Examine each character of t (j from 1 to m).
 
5. If s[i] equals t[j], the cost is 0.
   If s[i] doesn‘t equal t[j], the cost is 1.
 
6. Set cell d[i,j] of the matrix equal to the minimum of:
   a. The cell immediately above plus 1: d[i-1,j] + 1.
   b. The cell immediately to the left plus 1: d[i,j-1] + 1.
   c. The cell diagonally above and to the left plus the cost: d[i-1,j-1] + cost.
 
After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d[n,m].
"""
def show(matirx):
    for ir in range(0,len(matirx)):
        cl = matirx[ir]
        for il in range(0,len(cl)):
            print "%d "%matirx[ir][il],
        print ""
    print ""
 
def minimum(a, b, c):
    mi = a
    if (b < mi): mi = b
    if (c < mi): mi = c
    return mi
 
def BLD(t, p):
    ut = unicode(t, "gbk")
    up = unicode(p, "gbk")
    = len(ut)
    = len(up)
 
    if == 0:return " ".join("*" * len(up)), " ".join(up.encode("gbk")), " ".join("I"*len(up))
    if == 0:return " ".join(ut.encode("gbk")), " ".join("*" * len(ut)), " ".join("D"*len(ut))
 
    = [[0* (m + 1)for in range(0, n + 1)]
    = [[0* (m + 1)for in range(0, n + 1)]
    #show(d)
    #show(b)
 
    for in range(1len(d[0])):d[0][i] = i
    for in range(1len(d)):d[i][0= i
 
    #show(d)
 
    for in range(1len(d)):#row num, p‘s length
        for in range(1len(d[0])):#column num, t‘s length
            """ins cost:d[i - 1][j] + 1; del cost:d[i][j - 1] + 1; sub cost:d[i - 1][j - 1] + 0 if up[i - 1] == ut[j - 1] else 1"""
            d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1+ 1, d[i - 1][j - 1]+ (0 if up[i - 1== ut[j - 1else 1))
            """backtrace matrix:[1:tag ins | 2:tag del | 3:tag sub]"""
            if d[i][j] == d[i - 1][j] + 1: b[i][j] = 1
            elif d[i][j] == d[i][j - 1+ 1: b[i][j] = 2
            elif d[i][j] == d[i - 1][j - 1+ 0 if up[i - 1== ut[j - 1else 1: b[i][j] = 3
     
    show(d)
    show(b)
    #print "t: %s\np: %s\ncost: %d"%(ut,up,d[n][m])
 
    """put the best path point into a list"""
    bestPath = []
    = len(d) - 1 #row num
    = len(d[0]) - 1 #column num
 
    while i != 0 or j != 0:
        bestPath.append([i, j, b[i][j]])
        """
        if tag is sub : i - 1, j - 1; b[i - 1][j - 1]
        if tag is del : j - 1       ; b[i][j - 1]
        if tag is ins : i - 1       ; b[i - 1][j]
 
        """
        if b[i][j] == 1: i -= 1
        elif b[i][j] == 2: j -= 1
        elif b[i][j] == 3: i -= 1; j -= 1
        else:
            if i > 0: i -= 1
            if j > 0: j -=1
    print bestPath
     
    """ read best path and print info
        part  I: handle best path‘s first point
        part II: handle best path‘s left points
    """
    it = "*" if bestPath[-1][1== 0 else ut[bestPath[-1][1- 1].encode("gbk")
    ip = "*" if bestPath[-1][0== 0 else up[bestPath[-1][0- 1].encode("gbk")
    sf = ""
    if bestPath[-1][2== 1: sf = "I"
    elif bestPath[-1][2== 2: sf = "D"
    elif bestPath[-1][2== 3: sf = " " if ut[0== up[0else "S"
    else: sf = "I" if bestPath[-1][1== 0 else "D"
     
    for in range(len(bestPath) - 10-1):
        current = bestPath[i - 1]
        forward = bestPath[i]
        ip += up[current[0- 1].encode("gbk"if current[0] != forward[0and current[0] > 0 else "*"
        it += ut[current[1- 1].encode("gbk"if current[1] != forward[1and current[1] > 0 else "*"
        if current[2== 1: sf += "I"
        elif current[2== 2: sf += "D"
        elif current[2== 3: sf += "S" if up[current[0- 1] != ut[current[1- 1else " "
        else: sf += "I" if current[1== 0 else "D"
     
    return " ".join(it), " ".join(ip), " ".join(sf)
 
= "abcdfx"
= "fdsxdf"
st,sp,sf = BLD(t, p)
print "\n%s\n%s\n%s"%(st, sp, sf)
"""
time cost:  O(m * n)
space cost: O(m * n)
backtrace cost: O(m + n)
"""

BackTrace for EditDistace