(********************************************************************) (* *) (* The Why3 Verification Platform / The Why3 Development Team *) (* Copyright 2010-2025 -- Inria - CNRS - Paris-Saclay University *) (* *) (* This software is distributed under the terms of the GNU Lesser *) (* General Public License version 2.1, with the special exception *) (* on linking described in file LICENSE. *) (********************************************************************) { open Lexing open Py_ast open Py_parser exception Lexing_error of string let () = Why3.Exn_printer.register (fun fmt exn -> match exn with | Lexing_error s -> Format.fprintf fmt "syntax error: %s" s | _ -> raise exn) let id_or_kwd = let h = Hashtbl.create 32 in List.iter (fun (s, tok) -> Hashtbl.add h s tok) ["def", DEF; "if", IF; "else", ELSE; "elif", ELIF; "return", RETURN; "while", WHILE; "pass", PASS; "for", FOR; "in", IN; "and", AND; "or", OR; "not", NOT; "True", TRUE; "False", FALSE; "None", NONE; "from", FROM; "import", IMPORT; "break", BREAK; "continue", CONTINUE; (* annotations *) "forall", FORALL; "exists", EXISTS; "then", THEN; "let", LET; "old", OLD; "at", AT; "variant", VARIANT; "call", CALL; "by", BY; "so", SO; ]; fun s -> try Hashtbl.find h s with Not_found -> IDENT s let annotation = let h = Hashtbl.create 32 in List.iter (fun (s, tok) -> Hashtbl.add h s tok) ["invariant", INVARIANT; "variant", VARIANT; "assert", ASSERT; "assume", ASSUME; "check", CHECK; "requires", REQUIRES; "ensures", ENSURES; "axiom", AXIOM; "lemma", LEMMA; "call", CALL; "constant", CONSTANT; "label", LABEL; "function", FUNCTION; "predicate", PREDICATE; ]; fun s -> try Hashtbl.find h s with Not_found -> raise (Lexing_error ("no such annotation '" ^ s ^ "'")) let string_buffer = Buffer.create 1024 let stack = ref [0] (* indentation stack *) let rec unindent n = match !stack with | m :: _ when m = n -> [] | m :: st when m > n -> stack := st; END :: unindent n | _ -> raise (Lexing_error "bad indentation") let update_stack n = match !stack with | m :: _ when m < n -> stack := n :: !stack; [NEWLINE; BEGIN] | _ -> NEWLINE :: unindent n } let letter = ['a'-'z' 'A'-'Z'] let digit = ['0'-'9'] let ident = (letter | '_')+ (letter | digit | '_')* let integer = ['0'-'9']+ let space = ' ' | '\t' let comment = "#" [^'@''\n'] [^'\n']* rule next_tokens = parse | '\n' | "#\n" { new_line lexbuf; update_stack (indentation lexbuf) } | space+ | comment { next_tokens lexbuf } | "\\" space* '\n' space* "#@"? { next_tokens lexbuf } | "#@" space* (ident as id) { [annotation id] } | "#@" { raise (Lexing_error "expecting an annotation") } | ident as id { [id_or_kwd id] } | (ident ("'" ident)+) as id { [QIDENT id] } | "'" (ident as id) { [TVAR id] } | '+' { [PLUS] } | "+=" { [PLUSEQUAL] } | "-=" { [MINUSEQUAL] } | "*=" { [TIMESEQUAL] } | "//=" { [DIVEQUAL] } | "%=" { [MODEQUAL] } | '-' { [MINUS] } | '*' { [TIMES] } | "//" { [DIV] } | '%' { [MOD] } | '=' { [EQUAL] } | "==" { [CMP Beq] } | "!=" { [CMP Bneq] } | "<" { [CMP Blt] } | "<=" { [CMP Ble] } | ">" { [CMP Bgt] } | ">=" { [CMP Bge] } | '(' { [LEFTPAR] } | ')' { [RIGHTPAR] } | '[' { [LEFTSQ] } | ']' { [RIGHTSQ] } | '{' { [LEFTBR] } | '}' { [RIGHTBR] } | ',' { [COMMA] } | ':' { [COLON] } (* logic symbols *) | "->" { [ARROW] } | "<-" { [LARROW] } | "<->" { [LRARROW] } | "." { [DOT] } | integer as s { [INTEGER s] } | '"' { [STRING (string lexbuf)] } | eof { NEWLINE :: unindent 0 @ [EOF] } | _ as c { raise (Lexing_error ("illegal character: " ^ String.make 1 c)) } (* count the indentation, i.e. the number of space characters from bol *) and indentation = parse | (space+ | comment | '#')* '\n' (* skip empty lines *) { new_line lexbuf; indentation lexbuf } | space* as s { String.length s } and string = parse | '"' { let s = Buffer.contents string_buffer in Buffer.reset string_buffer; s } | "\\n" { Buffer.add_char string_buffer '\n'; string lexbuf } | "\\\"" { Buffer.add_char string_buffer '"'; string lexbuf } | _ as c { Buffer.add_char string_buffer c; string lexbuf } | eof { raise (Lexing_error "unterminated string") } { let next_token = let tokens = Queue.create () in fun lb -> if Queue.is_empty tokens then begin let l = next_tokens lb in List.iter (fun t -> Queue.add t tokens) l end; Queue.pop tokens let parse file c = let lb = Lexing.from_channel c in Why3.Loc.set_file file lb; stack := [0]; (* reinitialise indentation stack *) Why3.Loc.with_location (Py_parser.file next_token) lb (* Entries for transformations: similar to lexer.mll *) let build_parsing_function entry lb = Why3.Loc.with_location (entry next_token) lb let parse_term = build_parsing_function Py_parser.term_eof let parse_term_list = build_parsing_function Py_parser.term_comma_list_eof let parse_list_ident = build_parsing_function Py_parser.ident_comma_list_eof }