mirror of
https://github.com/AdaCore/why3.git
synced 2026-02-12 12:34:55 -08:00
634 lines
17 KiB
Plaintext
634 lines
17 KiB
Plaintext
(** {1 Theory of strings}
|
|
|
|
This file provides a theory over strings. It contains all the
|
|
predicates and functions currently supported by the smt-solvers CVC4
|
|
and Z3.
|
|
|
|
*)
|
|
|
|
(** {2 String operations} *)
|
|
|
|
module String
|
|
|
|
use int.Int
|
|
|
|
(** The type `string` is built-in. Indexes for strings start at `0`. *)
|
|
|
|
(** The following functions/predicates are available in this theory:
|
|
{h
|
|
<style type="text/css">td{padding:0 15px 0 15px;}</style>
|
|
<table cellpadding="5" cellspacing="0" border="1" style="border-collapse:collapse">
|
|
<tr><th> Description </th> <th> Name </th> <th> Arguments </th> <th> Result </th> </tr>
|
|
<tr>
|
|
<td/> Concatenate
|
|
<td/> <code> concat <code> <td/> string string <td/> string
|
|
</tr><tr>
|
|
<td/> Length
|
|
<td/><code> length </code> <td/> string <td/> int
|
|
</tr><tr>
|
|
<td/> Get string of length 1
|
|
<td/><code> s_at </code> <td/> string int <td/> string
|
|
</tr><tr>
|
|
<td/> Get substring
|
|
<td/><code> substring </code> <td/> string int int <td/> string
|
|
</tr><tr>
|
|
<td/> Index of substring
|
|
<td/><code> indexof </code> <td/> string string int <td/> int
|
|
</tr><tr>
|
|
<td/> Replace first occurrence
|
|
<td/><code> replace </code> <td/> string string string <td/> string
|
|
</tr><tr>
|
|
<td/> Replace all occurrences
|
|
<td/><code> replaceall </code><td/> string string string <td/> string
|
|
</tr><tr>
|
|
<td/> String to int
|
|
<td/><code> to_int </code> <td/> string <td/> int
|
|
</tr><tr>
|
|
<td/> Int to string
|
|
<td/><code> from_int </code> <td/> int <td/> string
|
|
</tr><tr>
|
|
<td/> Comparison less than
|
|
<td/><code> lt </code> <td/> string string <td/>
|
|
</tr><tr>
|
|
<td/> Comparison less or equal than
|
|
<td/><code> le </code> <td/> string string <td/>
|
|
</tr><tr>
|
|
<td/> Check if it a prefix
|
|
<td/><code> prefixof </code> <td/> string string <td/>
|
|
</tr><tr>
|
|
<td/> Check if it is a suffix
|
|
<td/><code> suffixof </code> <td/> string string <td/>
|
|
</tr><tr>
|
|
<td/> Check if contains
|
|
<td/><code> contains </code> <td/> string string <td/>
|
|
</tr><tr>
|
|
<td/> Check if it is a digit
|
|
<td/><code> is_digit </code> <td/> string <td/>
|
|
</tr>
|
|
|
|
|
|
</table>}
|
|
*)
|
|
|
|
|
|
constant empty : string = ""
|
|
(** the empty string. *)
|
|
|
|
function concat string string : string
|
|
(** `concat s1 s2` is the concatenation of `s1` and `s2`. *)
|
|
|
|
axiom concat_assoc: forall s1 s2 s3.
|
|
concat (concat s1 s2) s3 = concat s1 (concat s2 s3)
|
|
|
|
axiom concat_empty: forall s.
|
|
concat s empty = concat empty s = s
|
|
|
|
function length string : int
|
|
(** `length s` is the length of the string `s`. *)
|
|
|
|
(* axiom length_nonneg: forall s. length s >= 0 *)
|
|
|
|
axiom length_empty: length "" = 0
|
|
|
|
axiom length_concat: forall s1 s2.
|
|
length (concat s1 s2) = length s1 + length s2
|
|
|
|
predicate lt string string
|
|
(** `lt s1 s2` returns `True` iff `s1` is lexicographically smaller
|
|
than `s2`. *)
|
|
|
|
axiom lt_empty: forall s.
|
|
s <> empty -> lt empty s
|
|
|
|
axiom lt_not_com: forall s1 s2.
|
|
lt s1 s2 -> not (lt s2 s1)
|
|
|
|
axiom lt_ref: forall s1. not (lt s1 s1)
|
|
|
|
axiom lt_trans: forall s1 s2 s3.
|
|
lt s1 s2 && lt s2 s3 -> lt s1 s3
|
|
|
|
predicate le string string
|
|
(** `le s1 s2` returns `True` iff `s1` is lexicographically smaller
|
|
or equal than `s2`. *)
|
|
|
|
axiom le_empty: forall s.
|
|
le empty s
|
|
|
|
axiom le_ref: forall s1.
|
|
le s1 s1
|
|
|
|
axiom lt_le: forall s1 s2.
|
|
lt s1 s2 -> le s1 s2
|
|
|
|
axiom lt_le_eq: forall s1 s2.
|
|
le s1 s2 -> lt s1 s2 || s1 = s2
|
|
|
|
axiom le_trans: forall s1 s2 s3.
|
|
le s1 s2 && le s2 s3 -> le s1 s3
|
|
|
|
function s_at string int : string
|
|
(** `s_at s i` is:
|
|
|
|
(1) `empty`, if either `i < 0` or `i >= length s`;
|
|
|
|
(2) the string of length `1` containing the character of
|
|
position `i` in string `s`, if `0 <= i < length s`. *)
|
|
|
|
axiom at_out_of_range: forall s i.
|
|
i < 0 || i >= length s -> s_at s i = empty
|
|
|
|
axiom at_empty: forall i.
|
|
s_at empty i = empty
|
|
|
|
axiom at_length: forall s i.
|
|
let j = s_at s i in
|
|
if 0 <= i < length s then length j = 1 else length j = 0
|
|
|
|
axiom concat_at: forall s1 s2.
|
|
let s = concat s1 s2 in
|
|
forall i. (0 <= i < length s1 -> s_at s i = s_at s1 i) &&
|
|
(length s1 <= i < length s -> s_at s i = s_at s2 (i - length s1))
|
|
|
|
function substring string int int : string
|
|
(** `substring s i x` is:
|
|
|
|
(1) the `empty` string if `i < 0`, `i >= length s`, or `x <= 0`;
|
|
|
|
(2) the substring of `s` starting at `i` and of length
|
|
`min x (length s - i)`. *)
|
|
|
|
axiom substring_out_of_range: forall s i x.
|
|
i < 0 || i >= length s -> substring s i x = empty
|
|
|
|
axiom substring_of_length_zero_or_less: forall s i x.
|
|
x <= 0 -> substring s i x = ""
|
|
|
|
axiom substring_of_empty: forall i x.
|
|
substring "" i x = ""
|
|
|
|
axiom substring_smaller: forall s i x.
|
|
length (substring s i x) <= length s
|
|
|
|
axiom substring_smaller_x: forall s i x.
|
|
x >= 0 -> length (substring s i x) <= x
|
|
|
|
axiom substring_length: forall s i x.
|
|
x >= 0 && 0 <= i < length s ->
|
|
if i + x > length s then
|
|
length (substring s i x) = length s - i
|
|
else length (substring s i x) = x
|
|
|
|
axiom substring_at: forall s i.
|
|
s_at s i = substring s i 1
|
|
|
|
axiom substring_substring:
|
|
forall s ofs len ofs' len'.
|
|
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
|
|
0 <= ofs' <= len -> 0 <= len' -> ofs' + len' <= len ->
|
|
substring (substring s ofs len) ofs' len' = substring s (ofs + ofs') len'
|
|
|
|
axiom concat_substring:
|
|
forall s ofs len len'.
|
|
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
|
|
0 <= len' -> 0 <= ofs + len + len' <= length s ->
|
|
concat (substring s ofs len) (substring s (ofs+len) len') =
|
|
substring s ofs (len + len')
|
|
|
|
predicate prefixof string string
|
|
(** `prefixof s1 s2` is `True` iff `s1` is a prefix of `s2`. *)
|
|
|
|
axiom prefixof_substring: forall s1 s2.
|
|
prefixof s1 s2 <-> s1 = substring s2 0 (length s1)
|
|
|
|
axiom prefixof_concat: forall s1 s2.
|
|
prefixof s1 (concat s1 s2)
|
|
|
|
axiom prefixof_empty: forall s2.
|
|
prefixof "" s2
|
|
|
|
axiom prefixof_empty2: forall s1.
|
|
s1 <> empty -> not (prefixof s1 "")
|
|
|
|
predicate suffixof string string
|
|
(** `suffixof s1 s2` is `True` iff `s1` is a suffix of `s2`. *)
|
|
|
|
axiom suffixof_substring: forall s1 s2.
|
|
suffixof s1 s2 <-> s1 = substring s2 (length s2 - length s1) (length s1)
|
|
|
|
axiom suffixof_concat: forall s1 s2.
|
|
suffixof s2 (concat s1 s2)
|
|
|
|
axiom suffixof_empty: forall s2.
|
|
suffixof "" s2
|
|
|
|
axiom suffixof_empty2: forall s1.
|
|
s1 <> empty -> not (suffixof s1 "")
|
|
|
|
predicate contains string string
|
|
(** `contains s1 s2` is `True` iff `s1` contains `s2`. *)
|
|
|
|
axiom contains_prefixof: forall s1 s2.
|
|
prefixof s1 s2 -> contains s2 s1
|
|
|
|
axiom contains_suffixof: forall s1 s2.
|
|
suffixof s1 s2 -> contains s2 s1
|
|
|
|
axiom contains_empty: forall s2.
|
|
contains "" s2 <-> s2 = empty
|
|
|
|
axiom contains_empty2: forall s1.
|
|
contains s1 ""
|
|
|
|
axiom contains_substring: forall s1 s2 i.
|
|
substring s1 i (length s2) = s2 -> contains s1 s2
|
|
|
|
(*** The following should hold, but are not proved by SMT provers *)
|
|
(*** axiom substring_contains: forall s1 s2.
|
|
contains s1 s2 -> exists i. substring s1 i (length s2) = s2 *)
|
|
|
|
axiom contains_concat: forall s1 s2.
|
|
contains (concat s1 s2) s1 && contains (concat s1 s2) s2
|
|
|
|
axiom contains_at: forall s1 s2 i.
|
|
s_at s1 i = s2 -> contains s1 s2
|
|
|
|
(*** The following should hold, but are not proved by SMT provers *)
|
|
(*** axiom at_contains: forall s1 s2.
|
|
contains s1 s2 && length s2 = 1 -> exists i. s_at s1 i = s2 *)
|
|
|
|
function indexof string string int : int
|
|
(** `indexof s1 s2 i` is:
|
|
|
|
(1) the first occurrence of `s2` in `s1` after `i`, if `0 <= i <=
|
|
length s1` (note: the result is `i`, if `s2 = empty` and `0
|
|
<= i <= length s1`);
|
|
|
|
(2) `-1`, if `i < 0`, `i > length s1`, or `0 <= i <= length s1`
|
|
and `s2` does not occur in `s1` after `i`. *)
|
|
|
|
axiom indexof_empty: forall s i.
|
|
0 <= i <= length s -> indexof s "" i = i
|
|
|
|
axiom indexof_empty1: forall s i.
|
|
let j = indexof "" s i in
|
|
j = -1 || (s = "" && i = j = 0)
|
|
|
|
axiom indexof_contains: forall s1 s2.
|
|
let j = indexof s1 s2 0 in
|
|
contains s1 s2 ->
|
|
0 <= j <= length s1 && substring s1 j (length s2) = s2
|
|
|
|
axiom contains_indexof: forall s1 s2 i.
|
|
indexof s1 s2 i >= 0 -> contains s1 s2
|
|
|
|
axiom not_contains_indexof: forall s1 s2 i.
|
|
not (contains s1 s2) -> indexof s1 s2 i = -1
|
|
|
|
axiom substring_indexof: forall s1 s2 i.
|
|
let j = indexof s1 s2 i in
|
|
j >= 0 -> substring s1 j (length s2) = s2
|
|
|
|
axiom indexof_out_of_range: forall i s1 s2.
|
|
not (0 <= i <= length s1) -> indexof s1 s2 i = -1
|
|
|
|
axiom indexof_in_range: forall s1 s2 i.
|
|
let j = indexof s1 s2 i in
|
|
0 <= i <= length s1 -> j = -1 || i <= j <= length s1
|
|
|
|
axiom indexof_contains_substring: forall s1 s2 i.
|
|
0 <= i <= length s1 && contains (substring s1 i (length s1 - i)) s2 ->
|
|
i <= indexof s1 s2 i <= length s1
|
|
|
|
function replace string string string : string
|
|
(** `replace s1 s2 s3` is:
|
|
|
|
(1) `concat s3 s1`, if `s2 = empty`;
|
|
|
|
(2) the string obtained by replacing the first occurrence of `s2`
|
|
(if any) by `s3` in `s1`. *)
|
|
|
|
axiom replace_empty: forall s1 s3.
|
|
replace s1 "" s3 = concat s3 s1
|
|
|
|
axiom replace_not_contains: forall s1 s2 s3.
|
|
not (contains s1 s2) -> replace s1 s2 s3 = s1
|
|
|
|
axiom replace_empty2: forall s2 s3.
|
|
let s4 = replace empty s2 s3 in
|
|
if s2 = empty then s4 = s3 else s4 = empty
|
|
|
|
axiom replace_substring_indexof: forall s1 s2 s3.
|
|
let j = indexof s1 s2 0 in
|
|
replace s1 s2 s3 =
|
|
if j < 0 then s1 else
|
|
concat (concat (substring s1 0 j) s3)
|
|
(substring s1 (j + length s2) (length s1 - j - length s2))
|
|
|
|
(*** Not in SMT-LIB standard, but in CVC4 *)
|
|
function replaceall string string string : string
|
|
(** `replaceall s1 s2 s3` is:
|
|
|
|
(1) `s1`, if `s2 = empty`;
|
|
|
|
(2) the string obtained by replacing all occurrences of `s2` by
|
|
`s3` in `s1`. *)
|
|
|
|
axiom replaceall_empty1: forall s1 s3.
|
|
replaceall s1 "" s3 = s1
|
|
|
|
axiom not_contains_replaceall: forall s1 s2 s3.
|
|
not (contains s1 s2) -> replaceall s1 s2 s3 = s1
|
|
|
|
function to_int string : int
|
|
(** `to_int s` is:
|
|
|
|
(1) an `int` consisting on the digits of `s`, if `s` contains
|
|
exclusively ascii characters in the range 0x30 ... 0x39;
|
|
|
|
(2) `-1`, if `s` contains a character that is not in the range
|
|
0x30 ... 0x39. *)
|
|
|
|
axiom to_int_gt_minus_1: forall s. to_int s >= -1
|
|
|
|
axiom to_int_empty: to_int "" = -1
|
|
|
|
(*** The following lemmas should hold, but CVC4 is not able to prove them *)
|
|
(*** axiom to_int_only_digits: forall s.
|
|
s <> empty &&
|
|
(forall i. 0 <= i < length s -> le "0" (s_at s i) && le (s_at s i) "9")
|
|
-> to_int s >= 0 *)
|
|
|
|
(*** axiom to_int_lt_0: forall s i.
|
|
0 <= i < length s && lt (s_at s i) "0"
|
|
-> to_int s = -1 *)
|
|
|
|
(*** In SMT-Lib now *)
|
|
predicate is_digit (s:string) = 0 <= to_int s <= 9 && length s = 1
|
|
(** `is_digit s` returns `True` iff `s` is of length `1` and
|
|
corresponds to a decimal digit, that is, to a code point in the
|
|
range 0x30 ... 0x39 *)
|
|
|
|
function from_int int : string
|
|
(** `from_int i` is:
|
|
|
|
(1) the corresponding string in the decimal notation if `i >= 0`;
|
|
|
|
(2) `empty`, if `i < 0`. *)
|
|
|
|
axiom from_int_negative: forall i.
|
|
i < 0 <-> from_int i = empty
|
|
|
|
axiom from_int_to_int: forall i.
|
|
if i >= 0 then to_int (from_int i) = i else to_int (from_int i) = -1
|
|
|
|
end
|
|
|
|
(** {2 Characters} *)
|
|
|
|
module Char
|
|
|
|
use String
|
|
use int.Int
|
|
|
|
(** to be mapped into the OCaml char type. *)
|
|
type char = abstract {
|
|
contents: string;
|
|
} invariant {
|
|
length contents = 1
|
|
}
|
|
|
|
axiom char_eq: forall c1 c2.
|
|
c1.contents = c2.contents -> c1 = c2
|
|
|
|
function code char : int
|
|
|
|
axiom code: forall c. 0 <= code c < 256
|
|
|
|
function chr (n: int) : char
|
|
|
|
axiom code_chr: forall n. 0 <= n < 256 -> code (chr n) = n
|
|
|
|
axiom chr_code: forall c. chr (code c) = c
|
|
|
|
function get (s: string) (i: int) : char
|
|
|
|
axiom get: forall s i.
|
|
0 <= i < length s -> (get s i).contents = s_at s i
|
|
|
|
axiom substring_get:
|
|
forall s ofs len i.
|
|
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s -> 0 <= i < len ->
|
|
get (substring s ofs len) i = get s (ofs + i)
|
|
|
|
lemma concat_first: forall s1 s2.
|
|
let s3 = concat s1 s2 in
|
|
forall i. 0 <= i < length s1 ->
|
|
get s3 i = get s1 i
|
|
|
|
lemma concat_second: forall s1 s2.
|
|
let s3 = concat s1 s2 in
|
|
forall i. length s1 <= i < length s1 + length s2 ->
|
|
get s3 i = get s2 (i - length s1)
|
|
|
|
function ([]) (s: string) (i: int) : char = get s i
|
|
|
|
predicate eq_string (s1 s2: string) = length s1 = length s2 &&
|
|
(forall i. 0 <= i < length s1 -> get s1 i = get s2 i)
|
|
|
|
axiom extensionality [@W:non_conservative_extension:N]:
|
|
forall s1 s2. eq_string s1 s2 -> s1 = s2
|
|
|
|
function make (size: int) (v: char) : string
|
|
|
|
axiom make_length: forall size v. size >= 0 ->
|
|
length (make size v) = size
|
|
|
|
axiom make_contents: forall size v. size >= 0 ->
|
|
(forall i. 0 <= i < size -> get (make size v) i = v)
|
|
|
|
end
|
|
|
|
(** {3 Programming API}
|
|
|
|
The following program functions are mapped to OCaml's functions.
|
|
See also module `io.StdIO`. *)
|
|
|
|
module OCaml
|
|
|
|
use int.Int
|
|
use mach.int.Int63
|
|
use String
|
|
use Char
|
|
|
|
(* In OCaml max_string_length is 144_115_188_075_855_863 *)
|
|
|
|
val eq_char (c1 c2: char) : bool
|
|
ensures { result <-> c1 = c2 }
|
|
|
|
val get (s: string) (i: int63) : char
|
|
requires { 0 <= i < length s }
|
|
ensures { result = get s i }
|
|
|
|
let ([]) (s: string) (i: int63) : char
|
|
requires { 0 <= i < length s }
|
|
ensures { result = get s i }
|
|
= get s i
|
|
|
|
val code (c: char) : int63
|
|
ensures { result = code c }
|
|
|
|
val chr (n: int63) : char
|
|
requires { 0 <= n < 256 }
|
|
ensures { result = chr n }
|
|
|
|
val (=) (x y: string) : bool
|
|
ensures { result <-> x = y }
|
|
|
|
val partial length (s: string) : int63
|
|
ensures { result = length s >= 0 }
|
|
|
|
val sub (s: string) (start: int63) (len: int63) : string
|
|
requires { 0 <= start <= length s }
|
|
requires { 0 <= len <= length s - start }
|
|
ensures { result = substring s start len }
|
|
|
|
val concat (s1 s2: string) : string
|
|
ensures { result = concat s1 s2 }
|
|
|
|
val make (size: int63) (v: char) : string
|
|
requires { 0 <= size }
|
|
ensures { result = make size v }
|
|
|
|
end
|
|
|
|
(** The following module is extracted to OCaml's Buffer *)
|
|
|
|
module StringBuffer
|
|
|
|
use int.Int
|
|
use mach.int.Int63
|
|
use String
|
|
use Char
|
|
use OCaml
|
|
|
|
type buffer = abstract {
|
|
mutable str: string;
|
|
}
|
|
meta coercion function str
|
|
|
|
val create (_: int63) : buffer
|
|
ensures { result.str = "" }
|
|
|
|
val length (b: buffer) : int63
|
|
ensures { result = length b.str }
|
|
|
|
val contents (b: buffer) : string
|
|
ensures { result = b.str }
|
|
|
|
val clear (b: buffer) : unit
|
|
writes { b }
|
|
ensures { b.str = "" }
|
|
|
|
val reset (b: buffer) : unit
|
|
writes { b }
|
|
ensures { b.str = "" }
|
|
|
|
val sub (b: buffer) (ofs len: int63) : string
|
|
requires { 0 <= ofs /\ 0 <= len /\ ofs + len <= length b.str }
|
|
ensures { result = substring b.str ofs len }
|
|
|
|
val add_char (b: buffer) (c: char) : unit
|
|
writes { b }
|
|
ensures { b.str = concat (old b.str) c.Char.contents }
|
|
|
|
val add_string (b: buffer) (s: string) : unit
|
|
writes { b }
|
|
ensures { b.str = concat (old b.str) s }
|
|
|
|
val truncate (b: buffer) (n: int63) : unit
|
|
requires { 0 <= n <= length b.str }
|
|
writes { b }
|
|
ensures { b.str = substring (old b.str) 0 n }
|
|
|
|
end
|
|
|
|
(** {2 String realization}
|
|
|
|
This module is intended for string realization. It clones the `String`
|
|
module replacing axioms by goals.
|
|
|
|
*)
|
|
|
|
module StringRealization
|
|
|
|
clone export String
|
|
with goal .
|
|
(** trick to remove axioms from the `String` theory. See file
|
|
`examples/stdlib/stringCheck.mlw` *)
|
|
|
|
end
|
|
|
|
(** {2 Regular expressions} *)
|
|
|
|
module RegExpr
|
|
|
|
use String as S
|
|
|
|
type re
|
|
(** type for regular expressions *)
|
|
|
|
function to_re string : re
|
|
(** string to regular expression injection *)
|
|
|
|
predicate in_re string re
|
|
(** regular expression membership *)
|
|
|
|
function concat re re : re
|
|
(** regular expressions concatenation, left associativity *)
|
|
|
|
function union re re : re
|
|
(** regular expressions union, left associativity *)
|
|
|
|
function inter re re : re
|
|
(** regular expressions intersection, left associativity *)
|
|
|
|
function star re : re
|
|
(** Kleene closure *)
|
|
|
|
function plus re : re
|
|
(** Kleene cross *)
|
|
(*** forall re. plus re = concat re (star re) *)
|
|
|
|
constant none : re
|
|
(** the empty set of strings *)
|
|
|
|
constant allchar : re
|
|
(** the set of all strings of length 1 *)
|
|
|
|
constant all : re = star allchar
|
|
(** the set of all strings *)
|
|
|
|
function opt re : re
|
|
(** regular expression option *)
|
|
(*** forall re. opt re = union re (to_re "") *)
|
|
|
|
function range string string : re
|
|
(** `range s1 s2` is the set of singleton strings such that all
|
|
element `s` of `range s1 s2` satisfies the condition `Str.<= s1 s`
|
|
and `Str.<= s s2`. *)
|
|
|
|
function power int re : re
|
|
(** `power n r` is the `n`th power of `r`; `n` must be an
|
|
integer literal. *)
|
|
(*** power 0 e = to_re ""
|
|
power n e = concat e (power (n-1) e) *)
|
|
|
|
function loop int int re : re
|
|
(** `loop n1 n2 r = if n1 > ne then none else
|
|
if n1 = n2 then power n1 r else
|
|
union (power n1 e) (loop (n1+1) n2 e)` *)
|
|
(*** `n1` and `n2` must be literals. *)
|
|
|
|
end
|