examples/split_string.mlw


(** Splitting a string according to a character separator

  This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),
  during a talk at the first Dafny workshop (London, 2024).

  The problem is stated as follows: split a string `s` according to a
  character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
  where
  - the separator does not appear in any `si`;
  - the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.

  Examples. Assuming that the separator character is '.', here are some
  expected input/output:

    +---------+-------------------+
    | input   | output            |
    +---------+-------------------+
    | ""      |  [""]             |
    | "."     |  ["", ""]         |
    | ".."    |  ["", "", ""]     |
    | "abc"   |  ["abc"]          |
    | "abc."  |  ["abc", ""]      |
    | ".abc"  |  ["", "abc"]      |
    | "a.bc"  |  ["a", "bc"]      |
    | "a..bc" |  ["a", "", "bc"]  |
    +---------+-------------------+

  Note that the output sequence is never empty.

  Additionnally, we possibly set a limit to the total number of
  substrings in the output. We do so with an integer parameter `limit`.
  If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
  When a limit is set, the last substring in the output may contain the
  separator.

  Examples. Assuming that the separator character is '.' and the limit
  is 2, here are some expected input/output:

    +-----------+----------------+
    | input     | output         |
    +-----------+----------------+
    | "a."      | ["a", ""]      |
    | "b.."     | ["b", "."]     |
    | "b..."    | ["b", ".."]    |
    | "ba.b.b." | ["ba", "b.b."] |
    +-----------+----------------+

  Author: Jean-Christophe Filliâtre (CNRS)
*)

module SplitString

  use int.Int
  use seq.Seq
  use seq.FreeMonoid
  use seq.Mem

  (** Characters and strings *)

  type char

  val (=) (x y: char) : bool
    ensures { result <-> x=y }

  type string_ = seq char

  (** Specification *)

  (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
      for a non-empty sequence of strings *)
  let rec function concat (ss: seq string_) (sep: char) : string_
    requires { length ss >= 1 } variant { length ss }
  = if length ss = 1 then ss[0]
    else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])

  (** a mere shortcut, for convenience *)
  predicate notin (sep: char) (s: string_)
  = not (mem sep s)

  (** Code *)

  let ([]) (s: string_) (i: int) : char
    requires { [@expl:index in string bounds] 0 <= i < length s }
  = get s i

  let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)
    requires { limit = -1 || limit >= 1 }
    ensures  { length ss >= 1 }
    ensures  { limit = -1 || length ss <= limit }
    ensures  { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
    ensures  { length ss = limit || notin sep ss[length ss - 1] }
    ensures  { concat ss sep == s }
  = if limit = 1 then return (singleton s);
    let ref ss = empty in
    let ref i = 0 in
    let ref b = 0 in
    while i < length s do
      invariant { 0 <= b <= i <= length s }
      invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
      invariant { forall j. b <= j < i -> s[j] <> sep }
      invariant { limit = -1 || length ss < limit-1 }
      invariant { concat (snoc ss s[b..]) sep == s }
      variant   { length s - i }
      if s[i] = sep then (
        ss <- snoc ss s[b..i];
        assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
        if length ss = limit-1 then return snoc ss s[i+1..];
        b <- i+1;
      );
      i <- i+1
    done;
    return snoc ss s[b..]

end

module SplitStringOCaml

  use int.Int
  use seq.Seq
  use seq.FreeMonoid
  use seq.Mem
  use mach.int.Int63
  use mach.peano.Peano
  use mach.peano.Int63
  use queue.Queue as Q

  (** Characters and strings *)

  type char

  val (=) (x y: char) : bool
    ensures { result <-> x=y }

  type string_ = private {
    ghost contents_: seq char;
  }
  meta coercion function contents_

  (** Specification *)

  (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
      for a non-empty sequence of strings *)
  let rec ghost function concat (ss: seq string_) (sep: char) : seq char
    requires { length ss >= 1 } variant { length ss }
  = if length ss = 1 then ss[0].contents_
    else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)

  (** a mere shortcut, for convenience *)
  predicate notin (sep: char) (s: string_)
  = not (mem sep s)

  (** Code *)

  val length (s: string_) : int63
    ensures  { result = length s }

  val sub (s: string_) (i j: int63) : string_
    requires { [@expl:index in string bounds] 0 <= i <= j <= length s }
    ensures  { result = s[i..j] }

  val ([]) (s: string_) (i: int63) : char
    requires { [@expl:index in string bounds] 0 <= i < length s }
    ensures  { result = s[i] }

  let partial split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)
    requires { limit = -1 || limit >= 1 }
    ensures  { length ss >= 1 }
    ensures  { limit = -1 || length ss <= limit }
    ensures  { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
    ensures  { length ss = limit || notin sep ss[length ss - 1] }
    ensures  { concat ss sep == s }
  = let ref ss = Q.create () in
    if limit = (1:int63) then (Q.push s ss; return ss);
    let ref i = 0: int63 in
    let ref b = 0: int63 in
    let ghost ref suffix = s in
    while i < length s do
      invariant { 0 <= b <= i <= length s }
      invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
      invariant { forall j. b <= j < i -> s[j] <> sep }
      invariant { limit = -1 || length ss < limit-1 }
      invariant { suffix = s[b..] }
      invariant { concat (snoc ss suffix) sep == s }
      variant   { length s - i }
      if s[i] = sep then (
        Q.push (sub s b i) ss;
        assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
        if to_int63 (Q.length ss) = limit - (1:int63) then
          (Q.push (sub s (i+1) (length s)) ss; return ss);
        b <- i+1;
        suffix <- sub s b (length s)
      );
      i <- i+1
    done;
    Q.push (sub s b (length s)) ss;
    return ss

end
new example: split string 2024-01-23 20:59:33 +01:00
			`(** Splitting a string according to a character separator`

			`This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),`
			`during a talk at the first Dafny workshop (London, 2024).`

			The problem is stated as follows: split a string `s` according to a
			character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
			`where`
			- the separator does not appear in any `si`;
			- the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.

			`Examples. Assuming that the separator character is '.', here are some`
			`expected input/output:`

			`+---------+-------------------+`
			`\| input \| output \|`
			`+---------+-------------------+`
			`\| "" \| [""] \|`
			`\| "." \| ["", ""] \|`
			`\| ".." \| ["", "", ""] \|`
			`\| "abc" \| ["abc"] \|`
			`\| "abc." \| ["abc", ""] \|`
			`\| ".abc" \| ["", "abc"] \|`
			`\| "a.bc" \| ["a", "bc"] \|`
			`\| "a..bc" \| ["a", "", "bc"] \|`
			`+---------+-------------------+`

			`Note that the output sequence is never empty.`

			`Additionnally, we possibly set a limit to the total number of`
			substrings in the output. We do so with an integer parameter `limit`.
			If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
			`When a limit is set, the last substring in the output may contain the`
			`separator.`

			`Examples. Assuming that the separator character is '.' and the limit`
			`is 2, here are some expected input/output:`

			`+-----------+----------------+`
			`\| input \| output \|`
			`+-----------+----------------+`
			`\| "a." \| ["a", ""] \|`
			`\| "b.." \| ["b", "."] \|`
			`\| "b..." \| ["b", ".."] \|`
			`\| "ba.b.b." \| ["ba", "b.b."] \|`
			`+-----------+----------------+`

			`Author: Jean-Christophe Filliâtre (CNRS)`
			`*)`

split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`module SplitString`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`use int.Int`
			`use seq.Seq`
			`use seq.FreeMonoid`
			`use seq.Mem`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`(** Characters and strings *)`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`type char`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`val (=) (x y: char) : bool`
			`ensures { result <-> x=y }`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`type string_ = seq char`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`(** Specification *)`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
			`for a non-empty sequence of strings *)`
			`let rec function concat (ss: seq string_) (sep: char) : string_`
			`requires { length ss >= 1 } variant { length ss }`
			`= if length ss = 1 then ss[0]`
			`else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`(** a mere shortcut, for convenience *)`
			`predicate notin (sep: char) (s: string_)`
			`= not (mem sep s)`
new example: split string 2024-01-23 20:59:33 +01:00
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`(** Code *)`

			`let ([]) (s: string_) (i: int) : char`
			`requires { [@expl:index in string bounds] 0 <= i < length s }`
			`= get s i`

			`let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)`
			`requires { limit = -1 \|\| limit >= 1 }`
			`ensures { length ss >= 1 }`
			`ensures { limit = -1 \|\| length ss <= limit }`
			`ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }`
			`ensures { length ss = limit \|\| notin sep ss[length ss - 1] }`
			`ensures { concat ss sep == s }`
			`= if limit = 1 then return (singleton s);`
			`let ref ss = empty in`
			`let ref i = 0 in`
			`let ref b = 0 in`
			`while i < length s do`
			`invariant { 0 <= b <= i <= length s }`
			`invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }`
			`invariant { forall j. b <= j < i -> s[j] <> sep }`
			`invariant { limit = -1 \|\| length ss < limit-1 }`
			`invariant { concat (snoc ss s[b..]) sep == s }`
			`variant { length s - i }`
			`if s[i] = sep then (`
			`ss <- snoc ss s[b..i];`
			`assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };`
			`if length ss = limit-1 then return snoc ss s[i+1..];`
			`b <- i+1;`
			`);`
			`i <- i+1`
			`done;`
			`return snoc ss s[b..]`

			`end`

			`module SplitStringOCaml`

			`use int.Int`
			`use seq.Seq`
			`use seq.FreeMonoid`
			`use seq.Mem`
			`use mach.int.Int63`
			`use mach.peano.Peano`
split_string example updated change type of limit from peano to int63 2024-01-25 21:49:09 +01:00			`use mach.peano.Int63`
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`use queue.Queue as Q`

			`(** Characters and strings *)`

			`type char`

			`val (=) (x y: char) : bool`
			`ensures { result <-> x=y }`

			`type string_ = private {`
			`ghost contents_: seq char;`
			`}`
			`meta coercion function contents_`

			`(** Specification *)`

			(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
			`for a non-empty sequence of strings *)`
			`let rec ghost function concat (ss: seq string_) (sep: char) : seq char`
			`requires { length ss >= 1 } variant { length ss }`
			`= if length ss = 1 then ss[0].contents_`
			`else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)`

			`(** a mere shortcut, for convenience *)`
			`predicate notin (sep: char) (s: string_)`
			`= not (mem sep s)`

			`(** Code *)`

			`val length (s: string_) : int63`
			`ensures { result = length s }`

			`val sub (s: string_) (i j: int63) : string_`
			`requires { [@expl:index in string bounds] 0 <= i <= j <= length s }`
			`ensures { result = s[i..j] }`

			`val ([]) (s: string_) (i: int63) : char`
			`requires { [@expl:index in string bounds] 0 <= i < length s }`
			`ensures { result = s[i] }`

do not allow implicit inference of partial 2024-10-31 15:27:09 +01:00			`let partial split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)`
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`requires { limit = -1 \|\| limit >= 1 }`
			`ensures { length ss >= 1 }`
			`ensures { limit = -1 \|\| length ss <= limit }`
			`ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }`
			`ensures { length ss = limit \|\| notin sep ss[length ss - 1] }`
			`ensures { concat ss sep == s }`
			`= let ref ss = Q.create () in`
split_string example updated change type of limit from peano to int63 2024-01-25 21:49:09 +01:00			`if limit = (1:int63) then (Q.push s ss; return ss);`
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`let ref i = 0: int63 in`
			`let ref b = 0: int63 in`
			`let ghost ref suffix = s in`
			`while i < length s do`
			`invariant { 0 <= b <= i <= length s }`
			`invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }`
			`invariant { forall j. b <= j < i -> s[j] <> sep }`
			`invariant { limit = -1 \|\| length ss < limit-1 }`
			`invariant { suffix = s[b..] }`
			`invariant { concat (snoc ss suffix) sep == s }`
			`variant { length s - i }`
			`if s[i] = sep then (`
			`Q.push (sub s b i) ss;`
			`assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };`
split_string example updated change type of limit from peano to int63 2024-01-25 21:49:09 +01:00			`if to_int63 (Q.length ss) = limit - (1:int63) then`
split_string with machine arithmetic 2024-01-24 14:50:53 +01:00			`(Q.push (sub s (i+1) (length s)) ss; return ss);`
			`b <- i+1;`
			`suffix <- sub s b (length s)`
			`);`
			`i <- i+1`
			`done;`
			`Q.push (sub s b (length s)) ss;`
			`return ss`

			`end`