Files
why3/stdlib/string.mlw

636 lines
17 KiB
Plaintext
Raw Permalink Normal View History

(** {1 Theory of strings}
This file provides a theory over strings. It contains all the
predicates and functions currently supported by the smt-solvers CVC4
and Z3.
*)
(** {2 String operations} *)
2019-09-18 14:01:09 +02:00
module String
use int.Int
2019-11-26 09:07:39 +01:00
(** The type `string` is built-in. Indexes for strings start at `0`. *)
(** The following functions/predicates are available in this theory:
{h
<style type="text/css">td{padding:0 15px 0 15px;}</style>
<table cellpadding="5" cellspacing="0" border="1" style="border-collapse:collapse">
<tr><th> Description </th> <th> Name </th> <th> Arguments </th> <th> Result </th> </tr>
<tr>
<td/> Concatenate
<td/> <code> concat <code> <td/> string string <td/> string
</tr><tr>
<td/> Length
<td/><code> length </code> <td/> string <td/> int
</tr><tr>
<td/> Get string of length 1
<td/><code> s_at </code> <td/> string int <td/> string
2019-11-26 09:07:39 +01:00
</tr><tr>
<td/> Get substring
<td/><code> substring </code> <td/> string int int <td/> string
</tr><tr>
<td/> Index of substring
<td/><code> indexof </code> <td/> string string int <td/> int
</tr><tr>
<td/> Replace first occurrence
<td/><code> replace </code> <td/> string string string <td/> string
</tr><tr>
<td/> Replace all occurrences
<td/><code> replaceall </code><td/> string string string <td/> string
</tr><tr>
<td/> String to int
<td/><code> to_int </code> <td/> string <td/> int
</tr><tr>
<td/> Int to string
<td/><code> from_int </code> <td/> int <td/> string
</tr><tr>
<td/> Comparison less than
<td/><code> lt </code> <td/> string string <td/>
</tr><tr>
<td/> Comparison less or equal than
<td/><code> le </code> <td/> string string <td/>
</tr><tr>
<td/> Check if it a prefix
<td/><code> prefixof </code> <td/> string string <td/>
</tr><tr>
<td/> Check if it is a suffix
<td/><code> suffixof </code> <td/> string string <td/>
</tr><tr>
<td/> Check if contains
<td/><code> contains </code> <td/> string string <td/>
</tr><tr>
<td/> Check if it is a digit
<td/><code> is_digit </code> <td/> string <td/>
</tr>
</table>}
*)
2019-09-18 14:01:09 +02:00
2019-09-18 14:01:09 +02:00
constant empty : string = ""
(** the empty string. *)
function concat string string : string
2019-10-31 10:25:46 +01:00
(** `concat s1 s2` is the concatenation of `s1` and `s2`. *)
2019-09-18 14:01:09 +02:00
axiom concat_assoc: forall s1 s2 s3.
concat (concat s1 s2) s3 = concat s1 (concat s2 s3)
axiom concat_empty: forall s.
concat s empty = concat empty s = s
2019-09-18 14:01:09 +02:00
function length string : int
2019-10-31 10:25:46 +01:00
(** `length s` is the length of the string `s`. *)
(* axiom length_nonneg: forall s. length s >= 0 *)
axiom length_empty: length "" = 0
axiom length_concat: forall s1 s2.
length (concat s1 s2) = length s1 + length s2
predicate lt string string
2019-10-31 10:25:46 +01:00
(** `lt s1 s2` returns `True` iff `s1` is lexicographically smaller
than `s2`. *)
axiom lt_empty: forall s.
s <> empty -> lt empty s
axiom lt_not_com: forall s1 s2.
lt s1 s2 -> not (lt s2 s1)
2019-10-24 12:55:38 +02:00
axiom lt_ref: forall s1. not (lt s1 s1)
2019-10-31 10:25:46 +01:00
axiom lt_trans: forall s1 s2 s3.
lt s1 s2 && lt s2 s3 -> lt s1 s3
predicate le string string
2019-10-31 10:25:46 +01:00
(** `le s1 s2` returns `True` iff `s1` is lexicographically smaller
or equal than `s2`. *)
axiom le_empty: forall s.
le empty s
2019-10-31 10:25:46 +01:00
axiom le_ref: forall s1.
le s1 s1
axiom lt_le: forall s1 s2.
lt s1 s2 -> le s1 s2
axiom lt_le_eq: forall s1 s2.
le s1 s2 -> lt s1 s2 || s1 = s2
2019-10-31 10:25:46 +01:00
axiom le_trans: forall s1 s2 s3.
le s1 s2 && le s2 s3 -> le s1 s3
function s_at string int : string
(** `s_at s i` is:
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(1) `empty`, if either `i < 0` or `i >= length s`;
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(2) the string of length `1` containing the character of
position `i` in string `s`, if `0 <= i < length s`. *)
axiom at_out_of_range: forall s i.
i < 0 || i >= length s -> s_at s i = empty
axiom at_empty: forall i.
s_at empty i = empty
2019-10-22 11:07:08 +02:00
axiom at_length: forall s i.
let j = s_at s i in
2019-10-22 11:07:08 +02:00
if 0 <= i < length s then length j = 1 else length j = 0
2020-01-13 14:37:20 +01:00
axiom concat_at: forall s1 s2.
let s = concat s1 s2 in
forall i. (0 <= i < length s1 -> s_at s i = s_at s1 i) &&
(length s1 <= i < length s -> s_at s i = s_at s2 (i - length s1))
2020-01-13 14:37:20 +01:00
function substring string int int : string
2019-09-24 10:08:25 +02:00
(** `substring s i x` is:
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(1) the `empty` string if `i < 0`, `i >= length s`, or `x <= 0`;
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(2) the substring of `s` starting at `i` and of length
`min x (length s - i)`. *)
axiom substring_out_of_range: forall s i x.
i < 0 || i >= length s -> substring s i x = empty
axiom substring_of_length_zero_or_less: forall s i x.
x <= 0 -> substring s i x = ""
2019-10-31 10:25:46 +01:00
axiom substring_of_empty: forall i x.
substring "" i x = ""
axiom substring_smaller: forall s i x.
length (substring s i x) <= length s
axiom substring_smaller_x: forall s i x.
x >= 0 -> length (substring s i x) <= x
axiom substring_length: forall s i x.
x >= 0 && 0 <= i < length s ->
if i + x > length s then
length (substring s i x) = length s - i
else length (substring s i x) = x
2019-10-31 10:25:46 +01:00
axiom substring_at: forall s i.
s_at s i = substring s i 1
2019-10-31 10:25:46 +01:00
axiom substring_substring:
forall s ofs len ofs' len'.
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
0 <= ofs' <= len -> 0 <= len' -> ofs' + len' <= len ->
substring (substring s ofs len) ofs' len' = substring s (ofs + ofs') len'
axiom concat_substring:
forall s ofs len len'.
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
0 <= len' -> 0 <= ofs + len + len' <= length s ->
concat (substring s ofs len) (substring s (ofs+len) len') =
substring s ofs (len + len')
2019-09-18 14:01:09 +02:00
predicate prefixof string string
(** `prefixof s1 s2` is `True` iff `s1` is a prefix of `s2`. *)
2019-09-18 14:01:09 +02:00
axiom prefixof_substring: forall s1 s2.
prefixof s1 s2 <-> s1 = substring s2 0 (length s1)
axiom prefixof_concat: forall s1 s2.
prefixof s1 (concat s1 s2)
axiom prefixof_empty: forall s2.
prefixof "" s2
axiom prefixof_empty2: forall s1.
s1 <> empty -> not (prefixof s1 "")
2019-09-18 14:01:09 +02:00
predicate suffixof string string
(** `suffixof s1 s2` is `True` iff `s1` is a suffix of `s2`. *)
axiom suffixof_substring: forall s1 s2.
suffixof s1 s2 <-> s1 = substring s2 (length s2 - length s1) (length s1)
axiom suffixof_concat: forall s1 s2.
suffixof s2 (concat s1 s2)
axiom suffixof_empty: forall s2.
suffixof "" s2
axiom suffixof_empty2: forall s1.
s1 <> empty -> not (suffixof s1 "")
predicate contains string string
(** `contains s1 s2` is `True` iff `s1` contains `s2`. *)
axiom contains_prefixof: forall s1 s2.
prefixof s1 s2 -> contains s2 s1
axiom contains_suffixof: forall s1 s2.
suffixof s1 s2 -> contains s2 s1
axiom contains_empty: forall s2.
2019-10-31 10:25:46 +01:00
contains "" s2 <-> s2 = empty
axiom contains_empty2: forall s1.
contains s1 ""
axiom contains_substring: forall s1 s2 i.
substring s1 i (length s2) = s2 -> contains s1 s2
2019-10-31 10:25:46 +01:00
(*** The following should hold, but are not proved by SMT provers *)
(*** axiom substring_contains: forall s1 s2.
contains s1 s2 -> exists i. substring s1 i (length s2) = s2 *)
axiom contains_concat: forall s1 s2.
contains (concat s1 s2) s1 && contains (concat s1 s2) s2
2019-10-31 10:25:46 +01:00
axiom contains_at: forall s1 s2 i.
s_at s1 i = s2 -> contains s1 s2
2019-10-31 10:25:46 +01:00
(*** The following should hold, but are not proved by SMT provers *)
(*** axiom at_contains: forall s1 s2.
contains s1 s2 && length s2 = 1 -> exists i. s_at s1 i = s2 *)
2019-10-31 10:25:46 +01:00
function indexof string string int : int
(** `indexof s1 s2 i` is:
2019-10-31 10:25:46 +01:00
2023-11-23 19:04:11 +01:00
(1) the first occurrence of `s2` in `s1` after `i`, if `0 <= i <=
2019-10-31 10:25:46 +01:00
length s1` (note: the result is `i`, if `s2 = empty` and `0
<= i <= length s1`);
2019-09-24 10:08:25 +02:00
(2) `-1`, if `i < 0`, `i > length s1`, or `0 <= i <= length s1`
and `s2` does not occur in `s1` after `i`. *)
axiom indexof_empty: forall s i.
0 <= i <= length s -> indexof s "" i = i
2019-10-31 10:25:46 +01:00
axiom indexof_empty1: forall s i.
let j = indexof "" s i in
j = -1 || (s = "" && i = j = 0)
axiom indexof_contains: forall s1 s2.
let j = indexof s1 s2 0 in
2019-10-31 10:25:46 +01:00
contains s1 s2 ->
0 <= j <= length s1 && substring s1 j (length s2) = s2
axiom contains_indexof: forall s1 s2 i.
2019-10-31 10:25:46 +01:00
indexof s1 s2 i >= 0 -> contains s1 s2
axiom not_contains_indexof: forall s1 s2 i.
not (contains s1 s2) -> indexof s1 s2 i = -1
axiom substring_indexof: forall s1 s2 i.
let j = indexof s1 s2 i in
j >= 0 -> substring s1 j (length s2) = s2
axiom indexof_out_of_range: forall i s1 s2.
not (0 <= i <= length s1) -> indexof s1 s2 i = -1
axiom indexof_in_range: forall s1 s2 i.
let j = indexof s1 s2 i in
0 <= i <= length s1 -> j = -1 || i <= j <= length s1
axiom indexof_contains_substring: forall s1 s2 i.
0 <= i <= length s1 && contains (substring s1 i (length s1 - i)) s2 ->
i <= indexof s1 s2 i <= length s1
function replace string string string : string
(** `replace s1 s2 s3` is:
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(1) `concat s3 s1`, if `s2 = empty`;
2019-10-31 10:25:46 +01:00
2019-09-24 10:08:25 +02:00
(2) the string obtained by replacing the first occurrence of `s2`
(if any) by `s3` in `s1`. *)
axiom replace_empty: forall s1 s3.
replace s1 "" s3 = concat s3 s1
2019-10-31 10:25:46 +01:00
axiom replace_not_contains: forall s1 s2 s3.
not (contains s1 s2) -> replace s1 s2 s3 = s1
axiom replace_empty2: forall s2 s3.
2019-10-31 10:25:46 +01:00
let s4 = replace empty s2 s3 in
if s2 = empty then s4 = s3 else s4 = empty
axiom replace_substring_indexof: forall s1 s2 s3.
let j = indexof s1 s2 0 in
replace s1 s2 s3 =
if j < 0 then s1 else
concat (concat (substring s1 0 j) s3)
(substring s1 (j + length s2) (length s1 - j - length s2))
2019-10-31 10:25:46 +01:00
(*** Not in SMT-LIB standard, but in CVC4 *)
function replaceall string string string : string
2019-10-31 10:25:46 +01:00
(** `replaceall s1 s2 s3` is:
2019-09-18 14:01:09 +02:00
2019-10-31 10:25:46 +01:00
(1) `s1`, if `s2 = empty`;
2023-11-23 19:04:11 +01:00
(2) the string obtained by replacing all occurrences of `s2` by
2019-10-31 10:25:46 +01:00
`s3` in `s1`. *)
2019-09-24 15:26:48 +02:00
axiom replaceall_empty1: forall s1 s3.
replaceall s1 "" s3 = s1
2019-09-24 15:26:48 +02:00
2019-10-31 10:25:46 +01:00
axiom not_contains_replaceall: forall s1 s2 s3.
not (contains s1 s2) -> replaceall s1 s2 s3 = s1
2019-09-24 15:26:48 +02:00
function to_int string : int
(** `to_int s` is:
2019-10-31 10:25:46 +01:00
(1) an `int` consisting on the digits of `s`, if `s` contains
exclusively ascii characters in the range 0x30 ... 0x39;
2019-10-28 14:26:23 +01:00
(2) `-1`, if `s` contains a character that is not in the range
0x30 ... 0x39. *)
2019-09-24 15:26:48 +02:00
2019-10-28 14:26:23 +01:00
axiom to_int_gt_minus_1: forall s. to_int s >= -1
2019-10-29 16:24:16 +01:00
axiom to_int_empty: to_int "" = -1
2019-10-31 10:25:46 +01:00
(*** The following lemmas should hold, but CVC4 is not able to prove them *)
(*** axiom to_int_only_digits: forall s.
s <> empty &&
(forall i. 0 <= i < length s -> le "0" (s_at s i) && le (s_at s i) "9")
2019-10-31 10:25:46 +01:00
-> to_int s >= 0 *)
2019-10-29 16:24:16 +01:00
2019-10-31 10:25:46 +01:00
(*** axiom to_int_lt_0: forall s i.
0 <= i < length s && lt (s_at s i) "0"
2019-10-31 10:25:46 +01:00
-> to_int s = -1 *)
2019-10-29 16:24:16 +01:00
(*** In SMT-Lib now *)
predicate is_digit (s:string) = 0 <= to_int s <= 9 && length s = 1
(** `is_digit s` returns `True` iff `s` is of length `1` and
corresponds to a decimal digit, that is, to a code point in the
2019-10-29 16:24:16 +01:00
range 0x30 ... 0x39 *)
2019-09-24 15:26:48 +02:00
function from_int int : string
(** `from_int i` is:
2019-10-31 10:25:46 +01:00
2019-09-24 15:26:48 +02:00
(1) the corresponding string in the decimal notation if `i >= 0`;
2019-10-31 10:25:46 +01:00
2019-09-24 15:26:48 +02:00
(2) `empty`, if `i < 0`. *)
axiom from_int_negative: forall i.
i < 0 <-> from_int i = empty
axiom from_int_to_int: forall i.
2019-10-29 16:24:16 +01:00
if i >= 0 then to_int (from_int i) = i else to_int (from_int i) = -1
end
2020-03-04 15:42:24 +01:00
(** {2 Characters} *)
module Char
use String
use int.Int
2019-12-06 09:45:13 +01:00
(** to be mapped into the OCaml char type. *)
2020-02-20 14:08:41 +01:00
type char = abstract {
contents: string;
} invariant {
length contents = 1
}
2019-12-06 09:45:13 +01:00
2020-01-15 21:42:07 +01:00
axiom char_eq: forall c1 c2.
2020-02-20 14:08:41 +01:00
c1.contents = c2.contents -> c1 = c2
2020-01-13 14:37:20 +01:00
function code char : int
axiom code: forall c. 0 <= code c < 256
function chr (n: int) : char
2020-01-13 14:37:20 +01:00
axiom code_chr: forall n. 0 <= n < 256 -> code (chr n) = n
axiom chr_code: forall c. chr (code c) = c
function get (s: string) (i: int) : char
axiom get: forall s i.
2020-02-20 14:08:41 +01:00
0 <= i < length s -> (get s i).contents = s_at s i
axiom substring_get:
forall s ofs len i.
0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s -> 0 <= i < len ->
get (substring s ofs len) i = get s (ofs + i)
2020-02-20 14:57:01 +01:00
lemma concat_first: forall s1 s2.
let s3 = concat s1 s2 in
forall i. 0 <= i < length s1 ->
get s3 i = get s1 i
lemma concat_second: forall s1 s2.
let s3 = concat s1 s2 in
forall i. length s1 <= i < length s1 + length s2 ->
get s3 i = get s2 (i - length s1)
function ([]) (s: string) (i: int) : char = get s i
2020-01-17 15:43:02 +01:00
predicate eq_string (s1 s2: string) = length s1 = length s2 &&
(forall i. 0 <= i < length s1 -> get s1 i = get s2 i)
2020-01-17 15:43:02 +01:00
2020-02-12 13:40:19 +01:00
axiom extensionality [@W:non_conservative_extension:N]:
forall s1 s2. eq_string s1 s2 -> s1 = s2
2020-01-17 15:43:02 +01:00
meta extensionality predicate eq_string
2020-01-19 14:44:25 +01:00
function make (size: int) (v: char) : string
axiom make_length: forall size v. size >= 0 ->
length (make size v) = size
2020-02-20 14:08:41 +01:00
axiom make_contents: forall size v. size >= 0 ->
(forall i. 0 <= i < size -> get (make size v) i = v)
2020-01-19 14:44:25 +01:00
end
2020-03-04 15:42:24 +01:00
(** {3 Programming API}
The following program functions are mapped to OCaml's functions.
See also module `io.StdIO`. *)
module OCaml
use int.Int
use mach.int.Int63
use String
use Char
(* In OCaml max_string_length is 144_115_188_075_855_863 *)
2020-02-20 14:08:41 +01:00
2020-02-17 14:55:53 +01:00
val eq_char (c1 c2: char) : bool
ensures { result <-> c1 = c2 }
val get (s: string) (i: int63) : char
2019-12-06 09:45:13 +01:00
requires { 0 <= i < length s }
ensures { result = get s i }
let ([]) (s: string) (i: int63) : char
requires { 0 <= i < length s }
ensures { result = get s i }
= get s i
val code (c: char) : int63
ensures { result = code c }
2020-01-13 14:37:20 +01:00
val chr (n: int63) : char
requires { 0 <= n < 256 }
ensures { result = chr n }
2019-12-06 09:45:13 +01:00
val (=) (x y: string) : bool
ensures { result <-> x = y }
val partial length (s: string) : int63
ensures { result = length s >= 0 }
val sub (s: string) (start: int63) (len: int63) : string
requires { 0 <= start <= length s }
requires { 0 <= len <= length s - start }
ensures { result = substring s start len }
val concat (s1 s2: string) : string
ensures { result = concat s1 s2 }
val make (size: int63) (v: char) : string
2020-01-19 14:44:25 +01:00
requires { 0 <= size }
ensures { result = make size v }
end
(** The following module is extracted to OCaml's Buffer *)
module StringBuffer
use int.Int
use mach.int.Int63
use String
use Char
use OCaml
type buffer = abstract {
mutable str: string;
}
2020-02-20 14:08:41 +01:00
meta coercion function str
val create (_: int63) : buffer
ensures { result.str = "" }
val length (b: buffer) : int63
ensures { result = length b.str }
val contents (b: buffer) : string
ensures { result = b.str }
val clear (b: buffer) : unit
writes { b }
ensures { b.str = "" }
val reset (b: buffer) : unit
writes { b }
ensures { b.str = "" }
val sub (b: buffer) (ofs len: int63) : string
requires { 0 <= ofs /\ 0 <= len /\ ofs + len <= length b.str }
ensures { result = substring b.str ofs len }
val add_char (b: buffer) (c: char) : unit
writes { b }
ensures { b.str = concat (old b.str) c.Char.contents }
val add_string (b: buffer) (s: string) : unit
writes { b }
ensures { b.str = concat (old b.str) s }
val truncate (b: buffer) (n: int63) : unit
requires { 0 <= n <= length b.str }
writes { b }
ensures { b.str = substring (old b.str) 0 n }
end
(** {2 String realization}
This module is intended for string realization. It clones the `String`
module replacing axioms by goals.
*)
module StringRealization
clone export String
with goal .
(** trick to remove axioms from the `String` theory. See file
`examples/stdlib/stringCheck.mlw` *)
2019-09-18 14:01:09 +02:00
end
2019-09-24 15:26:48 +02:00
(** {2 Regular expressions} *)
2019-09-24 15:26:48 +02:00
module RegExpr
use String as S
type re
(** type for regular expressions *)
2019-09-24 15:26:48 +02:00
function to_re string : re
(** string to regular expression injection *)
2019-09-24 15:26:48 +02:00
predicate in_re string re
(** regular expression membership *)
2019-09-24 15:26:48 +02:00
function concat re re : re
(** regular expressions concatenation, left associativity *)
2019-09-24 15:26:48 +02:00
function union re re : re
(** regular expressions union, left associativity *)
2019-09-24 15:26:48 +02:00
function inter re re : re
(** regular expressions intersection, left associativity *)
2019-09-24 15:26:48 +02:00
function star re : re
(** Kleene closure *)
2019-09-24 15:26:48 +02:00
function plus re : re
(** Kleene cross *)
(*** forall re. plus re = concat re (star re) *)
constant none : re
(** the empty set of strings *)
constant allchar : re
(** the set of all strings of length 1 *)
constant all : re = star allchar
(** the set of all strings *)
function opt re : re
(** regular expression option *)
(*** forall re. opt re = union re (to_re "") *)
function range string string : re
(** `range s1 s2` is the set of singleton strings such that all
element `s` of `range s1 s2` satisfies the condition `Str.<= s1 s`
and `Str.<= s s2`. *)
function power int re : re
(** `power n r` is the `n`th power of `r`; `n` must be an
integer literal. *)
(*** power 0 e = to_re ""
power n e = concat e (power (n-1) e) *)
function loop int int re : re
(** `loop n1 n2 r = if n1 > ne then none else
if n1 = n2 then power n1 r else
union (power n1 e) (loop (n1+1) n2 e)` *)
(*** `n1` and `n2` must be literals. *)
2019-09-24 15:26:48 +02:00
end