首页 > 代码库 > BackTrace for EditDistace
BackTrace for EditDistace
#!/usr/bin/env python
#encoding=gbk
import
sys
"""
1. Set n to be the length of s.
Set m to be the length of t.
If n = 0, return m and exit.
If m = 0, return n and exit.
2. Construct a matrix containing 0..m rows and 0..n columns.
Initialize the first row to 0..n.
Initialize the first column to 0..m.
3. Examine each character of s (i from 1 to n).
4. Examine each character of t (j from 1 to m).
5. If s[i] equals t[j], the cost is 0.
If s[i] doesn‘t equal t[j], the cost is 1.
6. Set cell d[i,j] of the matrix equal to the minimum of:
a. The cell immediately above plus 1: d[i-1,j] + 1.
b. The cell immediately to the left plus 1: d[i,j-1] + 1.
c. The cell diagonally above and to the left plus the cost: d[i-1,j-1] + cost.
After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d[n,m].
"""
def
show(matirx):
for
ir
in
range
(
0
,
len
(matirx)):
cl
=
matirx[ir]
for
il
in
range
(
0
,
len
(cl)):
print
"%d "
%
matirx[ir][il],
print
""
print
""
def
minimum(a, b, c):
mi
=
a
if
(b < mi): mi
=
b
if
(c < mi): mi
=
c
return
mi
def
BLD(t, p):
ut
=
unicode
(t,
"gbk"
)
up
=
unicode
(p,
"gbk"
)
m
=
len
(ut)
n
=
len
(up)
if
m
=
=
0
:
return
" "
.join(
"*"
*
len
(up)),
" "
.join(up.encode(
"gbk"
)),
" "
.join(
"I"
*
len
(up))
if
n
=
=
0
:
return
" "
.join(ut.encode(
"gbk"
)),
" "
.join(
"*"
*
len
(ut)),
" "
.join(
"D"
*
len
(ut))
d
=
[[
0
]
*
(m
+
1
)
for
i
in
range
(
0
, n
+
1
)]
b
=
[[
0
]
*
(m
+
1
)
for
i
in
range
(
0
, n
+
1
)]
#show(d)
#show(b)
for
i
in
range
(
1
,
len
(d[
0
])):d[
0
][i]
=
i
for
i
in
range
(
1
,
len
(d)):d[i][
0
]
=
i
#show(d)
for
i
in
range
(
1
,
len
(d)):
#row num, p‘s length
for
j
in
range
(
1
,
len
(d[
0
])):
#column num, t‘s length
"""ins cost:d[i - 1][j] + 1; del cost:d[i][j - 1] + 1; sub cost:d[i - 1][j - 1] + 0 if up[i - 1] == ut[j - 1] else 1"""
d[i][j]
=
minimum(d[i
-
1
][j]
+
1
, d[i][j
-
1
]
+
1
, d[i
-
1
][j
-
1
]
+
(
0
if
up[i
-
1
]
=
=
ut[j
-
1
]
else
1
))
"""backtrace matrix:[1:tag ins | 2:tag del | 3:tag sub]"""
if
d[i][j]
=
=
d[i
-
1
][j]
+
1
: b[i][j]
=
1
elif
d[i][j]
=
=
d[i][j
-
1
]
+
1
: b[i][j]
=
2
elif
d[i][j]
=
=
d[i
-
1
][j
-
1
]
+
0
if
up[i
-
1
]
=
=
ut[j
-
1
]
else
1
: b[i][j]
=
3
show(d)
show(b)
#print "t: %s\np: %s\ncost: %d"%(ut,up,d[n][m])
"""put the best path point into a list"""
bestPath
=
[]
i
=
len
(d)
-
1
#row num
j
=
len
(d[
0
])
-
1
#column num
while
i !
=
0
or
j !
=
0
:
bestPath.append([i, j, b[i][j]])
"""
if tag is sub : i - 1, j - 1; b[i - 1][j - 1]
if tag is del : j - 1 ; b[i][j - 1]
if tag is ins : i - 1 ; b[i - 1][j]
"""
if
b[i][j]
=
=
1
: i
-
=
1
elif
b[i][j]
=
=
2
: j
-
=
1
elif
b[i][j]
=
=
3
: i
-
=
1
; j
-
=
1
else
:
if
i >
0
: i
-
=
1
if
j >
0
: j
-
=
1
print
bestPath
""" read best path and print info
part I: handle best path‘s first point
part II: handle best path‘s left points
"""
it
=
"*"
if
bestPath[
-
1
][
1
]
=
=
0
else
ut[bestPath[
-
1
][
1
]
-
1
].encode(
"gbk"
)
ip
=
"*"
if
bestPath[
-
1
][
0
]
=
=
0
else
up[bestPath[
-
1
][
0
]
-
1
].encode(
"gbk"
)
sf
=
""
if
bestPath[
-
1
][
2
]
=
=
1
: sf
=
"I"
elif
bestPath[
-
1
][
2
]
=
=
2
: sf
=
"D"
elif
bestPath[
-
1
][
2
]
=
=
3
: sf
=
" "
if
ut[
0
]
=
=
up[
0
]
else
"S"
else
: sf
=
"I"
if
bestPath[
-
1
][
1
]
=
=
0
else
"D"
for
i
in
range
(
len
(bestPath)
-
1
,
0
,
-
1
):
current
=
bestPath[i
-
1
]
forward
=
bestPath[i]
ip
+
=
up[current[
0
]
-
1
].encode(
"gbk"
)
if
current[
0
] !
=
forward[
0
]
and
current[
0
] >
0
else
"*"
it
+
=
ut[current[
1
]
-
1
].encode(
"gbk"
)
if
current[
1
] !
=
forward[
1
]
and
current[
1
] >
0
else
"*"
if
current[
2
]
=
=
1
: sf
+
=
"I"
elif
current[
2
]
=
=
2
: sf
+
=
"D"
elif
current[
2
]
=
=
3
: sf
+
=
"S"
if
up[current[
0
]
-
1
] !
=
ut[current[
1
]
-
1
]
else
" "
else
: sf
+
=
"I"
if
current[
1
]
=
=
0
else
"D"
return
" "
.join(it),
" "
.join(ip),
" "
.join(sf)
t
=
"abcdfx"
p
=
"fdsxdf"
st,sp,sf
=
BLD(t, p)
print
"\n%s\n%s\n%s"
%
(st, sp, sf)
"""
time cost: O(m * n)
space cost: O(m * n)
backtrace cost: O(m + n)
"""
BackTrace for EditDistace
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。