-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathedit-distance.py
95 lines (61 loc) · 2.51 KB
/
edit-distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Edit Distance between two strings."""
import sys
def edit_distance(x, y, cost):
# print("%r %r %r" % (x, y, cost))
m, n = 1 + len(x), 1 + len(y)
# Use two rows instead of a complete matrix
prev = [cost["insert"] * j for j in range(n)]
# Go over each character of strings x & y
for i in range(1, m):
curr = []
for j in range(1, n):
if j == 1:
curr.append(cost["delete"] * i)
# Costs of various ways of making the strings equal
costs = []
# Copy is only possible when the characters match
if x[i-1] == y[j-1]: # account for 0 based string indexing
costs.append(cost["copy"] + prev[j-1])
# Other ways are always possible
# Replace
costs.append(cost["replace"] + prev[j-1])
# Insert
costs.append(cost["insert"] + curr[j-1])
# Delete
costs.append(cost["delete"] + prev[j])
# We want the way that returns the minimum
curr.append(min(costs))
# sys.stdout.write("\r%d" % i)
prev = curr
# sys.stdout.write("\r")
return curr[n-1]
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: edit-distance.py <filename>")
exit(1)
with open(sys.argv[1]) as f:
lines = list(map(lambda l: l.strip(), f.readlines()))
if len(lines) % 3 != 1:
print("Input file should contain 3T + 1 lines:\n"
"the first line has number of test cases T\n"
"where a single test case has "
"line 1: string x\n"
"line 2: string y\n"
"line 3: cost list in the format <copy> <insert> <replace> <delete>")
exit(1)
t = int(lines[0])
for i in range(0, t):
x = lines[3*i + 1]
y = lines[3*i + 2]
if x and y and not (x.isalnum() and y.isalnum()):
print("Strings should be alpha-numeric: [a-zA-Z0-9]*")
exit(1)
cost_type = ["copy", "insert", "replace", "delete"]
cost_list = list(map(int, lines[3*i + 3].split(" ")))
if len(cost_list) != 4:
print("Cost list should have 4 entries separated by <space>: \n"
"<copy> <insert> <replace> <delete>")
exit(1)
cost = dict(zip(cost_type, cost_list))
ans = edit_distance(x, y, cost)
print(ans % (10 ** 9 + 7))