mirror of
https://github.com/AdaCore/why3.git
synced 2026-02-12 12:34:55 -08:00
200 lines
6.1 KiB
Plaintext
200 lines
6.1 KiB
Plaintext
|
|
(** Splitting a string according to a character separator
|
|
|
|
This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),
|
|
during a talk at the first Dafny workshop (London, 2024).
|
|
|
|
The problem is stated as follows: split a string `s` according to a
|
|
character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
|
|
where
|
|
- the separator does not appear in any `si`;
|
|
- the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.
|
|
|
|
Examples. Assuming that the separator character is '.', here are some
|
|
expected input/output:
|
|
|
|
+---------+-------------------+
|
|
| input | output |
|
|
+---------+-------------------+
|
|
| "" | [""] |
|
|
| "." | ["", ""] |
|
|
| ".." | ["", "", ""] |
|
|
| "abc" | ["abc"] |
|
|
| "abc." | ["abc", ""] |
|
|
| ".abc" | ["", "abc"] |
|
|
| "a.bc" | ["a", "bc"] |
|
|
| "a..bc" | ["a", "", "bc"] |
|
|
+---------+-------------------+
|
|
|
|
Note that the output sequence is never empty.
|
|
|
|
Additionnally, we possibly set a limit to the total number of
|
|
substrings in the output. We do so with an integer parameter `limit`.
|
|
If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
|
|
When a limit is set, the last substring in the output may contain the
|
|
separator.
|
|
|
|
Examples. Assuming that the separator character is '.' and the limit
|
|
is 2, here are some expected input/output:
|
|
|
|
+-----------+----------------+
|
|
| input | output |
|
|
+-----------+----------------+
|
|
| "a." | ["a", ""] |
|
|
| "b.." | ["b", "."] |
|
|
| "b..." | ["b", ".."] |
|
|
| "ba.b.b." | ["ba", "b.b."] |
|
|
+-----------+----------------+
|
|
|
|
Author: Jean-Christophe Filliâtre (CNRS)
|
|
*)
|
|
|
|
module SplitString
|
|
|
|
use int.Int
|
|
use seq.Seq
|
|
use seq.FreeMonoid
|
|
use seq.Mem
|
|
|
|
(** Characters and strings *)
|
|
|
|
type char
|
|
|
|
val (=) (x y: char) : bool
|
|
ensures { result <-> x=y }
|
|
|
|
type string_ = seq char
|
|
|
|
(** Specification *)
|
|
|
|
(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
|
|
for a non-empty sequence of strings *)
|
|
let rec function concat (ss: seq string_) (sep: char) : string_
|
|
requires { length ss >= 1 } variant { length ss }
|
|
= if length ss = 1 then ss[0]
|
|
else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])
|
|
|
|
(** a mere shortcut, for convenience *)
|
|
predicate notin (sep: char) (s: string_)
|
|
= not (mem sep s)
|
|
|
|
(** Code *)
|
|
|
|
let ([]) (s: string_) (i: int) : char
|
|
requires { [@expl:index in string bounds] 0 <= i < length s }
|
|
= get s i
|
|
|
|
let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)
|
|
requires { limit = -1 || limit >= 1 }
|
|
ensures { length ss >= 1 }
|
|
ensures { limit = -1 || length ss <= limit }
|
|
ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
|
|
ensures { length ss = limit || notin sep ss[length ss - 1] }
|
|
ensures { concat ss sep == s }
|
|
= if limit = 1 then return (singleton s);
|
|
let ref ss = empty in
|
|
let ref i = 0 in
|
|
let ref b = 0 in
|
|
while i < length s do
|
|
invariant { 0 <= b <= i <= length s }
|
|
invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
|
|
invariant { forall j. b <= j < i -> s[j] <> sep }
|
|
invariant { limit = -1 || length ss < limit-1 }
|
|
invariant { concat (snoc ss s[b..]) sep == s }
|
|
variant { length s - i }
|
|
if s[i] = sep then (
|
|
ss <- snoc ss s[b..i];
|
|
assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
|
|
if length ss = limit-1 then return snoc ss s[i+1..];
|
|
b <- i+1;
|
|
);
|
|
i <- i+1
|
|
done;
|
|
return snoc ss s[b..]
|
|
|
|
end
|
|
|
|
module SplitStringOCaml
|
|
|
|
use int.Int
|
|
use seq.Seq
|
|
use seq.FreeMonoid
|
|
use seq.Mem
|
|
use mach.int.Int63
|
|
use mach.peano.Peano
|
|
use mach.peano.Int63
|
|
use queue.Queue as Q
|
|
|
|
(** Characters and strings *)
|
|
|
|
type char
|
|
|
|
val (=) (x y: char) : bool
|
|
ensures { result <-> x=y }
|
|
|
|
type string_ = private {
|
|
ghost contents_: seq char;
|
|
}
|
|
meta coercion function contents_
|
|
|
|
(** Specification *)
|
|
|
|
(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
|
|
for a non-empty sequence of strings *)
|
|
let rec ghost function concat (ss: seq string_) (sep: char) : seq char
|
|
requires { length ss >= 1 } variant { length ss }
|
|
= if length ss = 1 then ss[0].contents_
|
|
else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)
|
|
|
|
(** a mere shortcut, for convenience *)
|
|
predicate notin (sep: char) (s: string_)
|
|
= not (mem sep s)
|
|
|
|
(** Code *)
|
|
|
|
val length (s: string_) : int63
|
|
ensures { result = length s }
|
|
|
|
val sub (s: string_) (i j: int63) : string_
|
|
requires { [@expl:index in string bounds] 0 <= i <= j <= length s }
|
|
ensures { result = s[i..j] }
|
|
|
|
val ([]) (s: string_) (i: int63) : char
|
|
requires { [@expl:index in string bounds] 0 <= i < length s }
|
|
ensures { result = s[i] }
|
|
|
|
let partial split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)
|
|
requires { limit = -1 || limit >= 1 }
|
|
ensures { length ss >= 1 }
|
|
ensures { limit = -1 || length ss <= limit }
|
|
ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
|
|
ensures { length ss = limit || notin sep ss[length ss - 1] }
|
|
ensures { concat ss sep == s }
|
|
= let ref ss = Q.create () in
|
|
if limit = (1:int63) then (Q.push s ss; return ss);
|
|
let ref i = 0: int63 in
|
|
let ref b = 0: int63 in
|
|
let ghost ref suffix = s in
|
|
while i < length s do
|
|
invariant { 0 <= b <= i <= length s }
|
|
invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
|
|
invariant { forall j. b <= j < i -> s[j] <> sep }
|
|
invariant { limit = -1 || length ss < limit-1 }
|
|
invariant { suffix = s[b..] }
|
|
invariant { concat (snoc ss suffix) sep == s }
|
|
variant { length s - i }
|
|
if s[i] = sep then (
|
|
Q.push (sub s b i) ss;
|
|
assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
|
|
if to_int63 (Q.length ss) = limit - (1:int63) then
|
|
(Q.push (sub s (i+1) (length s)) ss; return ss);
|
|
b <- i+1;
|
|
suffix <- sub s b (length s)
|
|
);
|
|
i <- i+1
|
|
done;
|
|
Q.push (sub s b (length s)) ss;
|
|
return ss
|
|
|
|
end
|