Files
why3/examples/edit_distance.mlw
2018-06-15 16:45:58 +02:00

174 lines
4.7 KiB
Plaintext

(* Correctness of a program computing the minimal distance between
two words (code by Claude Marché).
This program computes a variant of the Levenshtein distance. Given
two strings [w1] and [w2] of respective lengths [n1] and [n2], it
computes the minimal numbers of insertions and deletions to
perform in one of the strings to get the other one. (The
traditional edit distance also includes substitutions.)
The nice point about this algorithm, due to Claude Marché, is to
work in linear space, in an array of min(n1,n2) integers. Time
complexity is O(n1 * n2), as usual.
*)
theory Word
use export int.Int
use export int.MinMax
use export list.List
use export list.Length
type char
val eq (c1 c2:char) : bool
ensures { result <-> c1 = c2 }
type word = list char
inductive dist word word int =
| dist_eps :
dist Nil Nil 0
| dist_add_left :
forall w1 w2: word, n: int.
dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1)
| dist_add_right :
forall w1 w2: word, n: int.
dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1)
| dist_context :
forall w1 w2:word, n: int.
dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n
predicate min_dist (w1 w2: word) (n: int) =
dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m
(* intermediate lemmas *)
use export list.Append
function last_char (a: char) (u: word) : char = match u with
| Nil -> a
| Cons c u' -> last_char c u'
end
function but_last (a: char) (u: word) : word = match u with
| Nil -> Nil
| Cons c u' -> Cons a (but_last c u')
end
lemma first_last_explicit:
forall u: word, a: char.
but_last a u ++ Cons (last_char a u) Nil = Cons a u
lemma first_last:
forall a: char, u: word. exists v: word, b: char.
v ++ Cons b Nil = Cons a u /\ length v = length u
lemma key_lemma_right:
forall w1 w'2: word, m: int, a: char.
dist w1 w'2 m ->
forall w2: word. w'2 = Cons a w2 ->
exists u1 v1: word, k: int.
w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1
lemma dist_symetry:
forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n
lemma key_lemma_left:
forall w1 w2: word, m: int, a: char.
dist (Cons a w1) w2 m ->
exists u2 v2: word, k: int.
w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1
lemma dist_concat_left:
forall u v w: word, n: int.
dist v w n -> dist (u ++ v) w (length u + n)
lemma dist_concat_right:
forall u v w: word, n: int.
dist v w n -> dist v (u ++ w) (length u + n)
(* main lemmas *)
lemma min_dist_equal:
forall w1 w2: word, a: char, n: int.
min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n
lemma min_dist_diff:
forall w1 w2: word, a b: char, m p: int.
a <> b ->
min_dist (Cons a w1) w2 p ->
min_dist w1 (Cons b w2) m ->
min_dist (Cons a w1) (Cons b w2) (min m p + 1)
lemma min_dist_eps:
forall w: word, a: char, n: int.
min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1)
lemma min_dist_eps_length:
forall w: word. min_dist Nil w (length w)
end
module EditDistance
use Word
use list.Length as L
use ref.Ref
use array.Array
(* Auxiliary definitions for the program and its specification. *)
function suffix (a: array char) (i: int) : word
axiom suffix_nil:
forall a: array char. suffix a a.length = Nil
axiom suffix_cons:
forall a: array char, i: int.
0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1))
lemma suffix_length:
forall a: array char, i: int.
0 <= i <= a.length -> L.length (suffix a i) = (a.length - i)
predicate min_suffix (a1 a2: array char) (i j n: int) =
min_dist (suffix a1 i) (suffix a2 j) n
function word_of (a: array char) : word = suffix a 0
(* The program. *)
let distance (w1: array char) (w2: array char)
ensures { min_dist (word_of w1) (word_of w2) result }
= let n1 = length w1 in
let n2 = length w2 in
let t = Array.make (n2+1) 0 in
(* initialization of t *)
for i = 0 to n2 do
invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j }
t[i] <- n2 - i
done;
(* loop over w1 *)
for i = n1 - 1 downto 0 do
invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] }
let oldt = ref t[n2] in
t[n2] <- t[n2] + 1;
(* loop over w2 *)
for j = n2 - 1 downto 0 do
invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] }
invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] }
invariant { min_suffix w1 w2 (i+1) (j+1) !oldt }
let temp = !oldt in
oldt := t[j];
if eq w1[i] w2[j] then
t[j] <- temp
else
t[j] <- (min t[j] t[j + 1]) + 1
done
done;
t[0]
end