Merge pull request #107 from MikeSmithEU/master

MikeSmithEU · web-flow · commit 4d0b04c39d92 · 2019-08-13T12:25:46.000+02:00
fix opcode counts bug
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.0rc4
+1.0rc5
diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py
@@ -28,15 +28,16 @@ def get_opcode_counts(opcodes):
         elif tag == 'delete':
             counts[tag] += ahi - alo
         elif tag == 'replace':
-            counts[tag] += ahi - alo
-            if ahi - alo < bhi - blo:
-                c = bhi - blo - ahi + alo
-                counts['insert'] += c
-                counts[tag] -= c
-            elif ahi - alo > bhi - blo:
-                c = ahi - alo - bhi + blo
-                counts['delete'] += c
-                counts[tag] -= c
+            ca = ahi - alo
+            cb = bhi - blo
+            if ca < cb:
+                counts['insert'] += cb - ca
+                counts['replace'] += ca
+            elif ca > cb:
+                counts['delete'] += ca - cb
+                counts['replace'] += cb
+            else:
+                counts[tag] += ahi - alo
     return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete'])
 
 
diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py
@@ -14,6 +14,15 @@ def test_one_insert(differ):
     assert list(sm.get_opcodes()) == [('equal', 0, 50, 0, 50),
                                       ('insert', 50, 50, 50, 51),
                                       ('equal', 50, 100, 51, 101)]
+    ref = "a b c d e f"
+    hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd"
+    sm = differ(ref, hyp)
+    assert list(sm.get_opcodes()) == [('equal', 0, 3, 0, 3),
+                                      ('delete', 3, 5, 3, 3),
+                                      ('equal', 5, 10, 3, 8),
+                                      ('insert', 10, 10, 8, 9),
+                                      ('equal', 10, 11, 9, 10),
+                                      ('insert', 11, 11, 10, 49)]
 
 
 @differs_decorator
diff --git a/tests/benchmarkstt/test_metrics_core.py b/tests/benchmarkstt/test_metrics_core.py
@@ -15,6 +15,8 @@
     ['changes 1 word', 'changes one word', (2, 1, 0, 0)],
     ['0 1 2 3 4', '0 1 22 2 3 4', (5, 0, 1, 0)],
     ['0 1 2 3 4', '0 1 2 3 4', (5, 0, 0, 0)],
+    ['a b c d e f', 'a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd', (4, 1, 6, 1)],
+    ['HELLO CRUEL WORLD OF MINE', 'GOODBYE WORLD OF MINE', (3, 1, 0, 1)],
 ])
 def test_diffcounts(a, b, exp):
     assert DiffCounts().compare(PlainText(a), PlainText(b)) == OpcodeCounts(*exp)
@@ -27,7 +29,8 @@ def test_diffcounts(a, b, exp):
     ['aa bb cc dd', 'aa aa bb cc dd dd', (.5, .25)],
     ['aa bb cc dd', '', (1, 0.5)],
     ['', 'aa bb cc', (1, 1)],
-    ['aa', 'bb aa cc', (2, 1)]
+    ['aa', 'bb aa cc', (2, 1)],
+    ['a b c d e f', 'a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd', (8/6, 3/4)],
 ])
 def test_wer(a, b, exp):
     wer_strict, wer_hunt = exp