mirror of
https://github.com/AdaCore/why3.git
synced 2026-02-12 12:34:55 -08:00
174 lines
4.7 KiB
Plaintext
174 lines
4.7 KiB
Plaintext
|
|
(* Correctness of a program computing the minimal distance between
|
|
two words (code by Claude Marché).
|
|
|
|
This program computes a variant of the Levenshtein distance. Given
|
|
two strings [w1] and [w2] of respective lengths [n1] and [n2], it
|
|
computes the minimal numbers of insertions and deletions to
|
|
perform in one of the strings to get the other one. (The
|
|
traditional edit distance also includes substitutions.)
|
|
|
|
The nice point about this algorithm, due to Claude Marché, is to
|
|
work in linear space, in an array of min(n1,n2) integers. Time
|
|
complexity is O(n1 * n2), as usual.
|
|
*)
|
|
|
|
theory Word
|
|
|
|
use export int.Int
|
|
use export int.MinMax
|
|
use export list.List
|
|
use export list.Length
|
|
|
|
type char
|
|
|
|
val eq (c1 c2:char) : bool
|
|
ensures { result <-> c1 = c2 }
|
|
|
|
type word = list char
|
|
|
|
inductive dist word word int =
|
|
| dist_eps :
|
|
dist Nil Nil 0
|
|
| dist_add_left :
|
|
forall w1 w2: word, n: int.
|
|
dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1)
|
|
| dist_add_right :
|
|
forall w1 w2: word, n: int.
|
|
dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1)
|
|
| dist_context :
|
|
forall w1 w2:word, n: int.
|
|
dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n
|
|
|
|
predicate min_dist (w1 w2: word) (n: int) =
|
|
dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m
|
|
|
|
(* intermediate lemmas *)
|
|
|
|
use export list.Append
|
|
|
|
function last_char (a: char) (u: word) : char = match u with
|
|
| Nil -> a
|
|
| Cons c u' -> last_char c u'
|
|
end
|
|
|
|
function but_last (a: char) (u: word) : word = match u with
|
|
| Nil -> Nil
|
|
| Cons c u' -> Cons a (but_last c u')
|
|
end
|
|
|
|
lemma first_last_explicit:
|
|
forall u: word, a: char.
|
|
but_last a u ++ Cons (last_char a u) Nil = Cons a u
|
|
|
|
lemma first_last:
|
|
forall a: char, u: word. exists v: word, b: char.
|
|
v ++ Cons b Nil = Cons a u /\ length v = length u
|
|
|
|
lemma key_lemma_right:
|
|
forall w1 w'2: word, m: int, a: char.
|
|
dist w1 w'2 m ->
|
|
forall w2: word. w'2 = Cons a w2 ->
|
|
exists u1 v1: word, k: int.
|
|
w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1
|
|
|
|
lemma dist_symetry:
|
|
forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n
|
|
|
|
lemma key_lemma_left:
|
|
forall w1 w2: word, m: int, a: char.
|
|
dist (Cons a w1) w2 m ->
|
|
exists u2 v2: word, k: int.
|
|
w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1
|
|
|
|
lemma dist_concat_left:
|
|
forall u v w: word, n: int.
|
|
dist v w n -> dist (u ++ v) w (length u + n)
|
|
|
|
lemma dist_concat_right:
|
|
forall u v w: word, n: int.
|
|
dist v w n -> dist v (u ++ w) (length u + n)
|
|
|
|
(* main lemmas *)
|
|
|
|
lemma min_dist_equal:
|
|
forall w1 w2: word, a: char, n: int.
|
|
min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n
|
|
|
|
lemma min_dist_diff:
|
|
forall w1 w2: word, a b: char, m p: int.
|
|
a <> b ->
|
|
min_dist (Cons a w1) w2 p ->
|
|
min_dist w1 (Cons b w2) m ->
|
|
min_dist (Cons a w1) (Cons b w2) (min m p + 1)
|
|
|
|
lemma min_dist_eps:
|
|
forall w: word, a: char, n: int.
|
|
min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1)
|
|
|
|
lemma min_dist_eps_length:
|
|
forall w: word. min_dist Nil w (length w)
|
|
|
|
end
|
|
|
|
module EditDistance
|
|
|
|
use Word
|
|
use list.Length as L
|
|
use ref.Ref
|
|
use array.Array
|
|
|
|
(* Auxiliary definitions for the program and its specification. *)
|
|
|
|
function suffix (a: array char) (i: int) : word
|
|
|
|
axiom suffix_nil:
|
|
forall a: array char. suffix a a.length = Nil
|
|
|
|
axiom suffix_cons:
|
|
forall a: array char, i: int.
|
|
0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1))
|
|
|
|
lemma suffix_length:
|
|
forall a: array char, i: int.
|
|
0 <= i <= a.length -> L.length (suffix a i) = (a.length - i)
|
|
|
|
predicate min_suffix (a1 a2: array char) (i j n: int) =
|
|
min_dist (suffix a1 i) (suffix a2 j) n
|
|
|
|
function word_of (a: array char) : word = suffix a 0
|
|
|
|
(* The program. *)
|
|
|
|
let distance (w1: array char) (w2: array char)
|
|
ensures { min_dist (word_of w1) (word_of w2) result }
|
|
= let n1 = length w1 in
|
|
let n2 = length w2 in
|
|
let t = Array.make (n2+1) 0 in
|
|
(* initialization of t *)
|
|
for i = 0 to n2 do
|
|
invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j }
|
|
t[i] <- n2 - i
|
|
done;
|
|
(* loop over w1 *)
|
|
for i = n1 - 1 downto 0 do
|
|
invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] }
|
|
let oldt = ref t[n2] in
|
|
t[n2] <- t[n2] + 1;
|
|
(* loop over w2 *)
|
|
for j = n2 - 1 downto 0 do
|
|
invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] }
|
|
invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] }
|
|
invariant { min_suffix w1 w2 (i+1) (j+1) !oldt }
|
|
let temp = !oldt in
|
|
oldt := t[j];
|
|
if eq w1[i] w2[j] then
|
|
t[j] <- temp
|
|
else
|
|
t[j] <- (min t[j] t[j + 1]) + 1
|
|
done
|
|
done;
|
|
t[0]
|
|
|
|
end
|