term_lexer.ml

   1: (*****************************************************************************
   2:  * Lexer
   3:  *****************************************************************************)
   4: 
   5: open List;;
   6: 
   7: type term_token  = Kwd of char | Ident of string | Int of int
   8: and  token_stream = term_token Stream.t;;
   9: 
  10: 
  11: (* Caractères reconnus par le lexer *)
  12: let special_chars = ['('; ')'; '['; ']'; '<'; '>'; '{'; '}';
  13:                      '$'; '#'; '^'; '.';'*']
  14: ;;
  15: 
  16: let term_lexer : string -> token_stream = fun s ->
  17:   let pos = ref 0
  18:   and length = String.length(s)
  19: 
  20:   in let rec parse_int : unit -> int = fun () ->
  21:     let rec read_int : unit -> (int * int) = fun () ->
  22:       if (!pos >= length) then (0, 1)
  23:       else match s.[!pos] with
  24:         ('0'..'9' as c) ->
  25:           incr pos;
  26:           let (res, base) = read_int ()
  27:           in (res + (Char.code c - Char.code '0') * base, 10 * base)
  28:       | _ -> (0, 1)
  29:     in fst (read_int())
  30: 
  31:   and parse_ident : unit -> string = fun () ->
  32:     let beg = !pos in
  33:     let rec read_ident : unit -> string = fun () ->
  34:       if !pos >= length then String.sub s beg (!pos - beg)
  35:       else
  36:         match s.[!pos] with
  37:           ('a'..'z' | 'A'..'Z' | '0'..'9' | '_' | '-') ->
  38:             incr pos; read_ident ()
  39:         | _ -> String.sub s beg (!pos - beg)
  40:     in incr pos; read_ident ()
  41: 
  42:   and next_token : int -> term_token option = fun count ->
  43:     if !pos >= length then None
  44:     else let c = s.[!pos] in
  45:       if mem c special_chars then (incr pos; Some(Kwd(c)))
  46:       else match c with
  47:           (' ' | '\n' | '\t')   -> incr pos; next_token count
  48:         | ('0'..'9')            -> Some(Int(parse_int()))
  49:         | ('a'..'z' | 'A'..'Z') -> Some(Ident(parse_ident()))
  50:         | _                     -> failwith "invalid char"
  51: 
  52:   in Stream.from next_token
  53: ;;
  54: 
  55: (* Pour tester le lexer *)
  56: let tokens_of_string : string -> term_token list = fun s ->
  57:   let s = term_lexer s
  58:   in let rec tokens () =
  59:     try
  60:       let t = Stream.next s
  61:       in t::(tokens ())
  62:     with Stream.Failure -> []
  63:   in tokens ();;
  64: 
  65: (* Rang de la première occurrence d'un nom de variable dans une liste ;
  66:    utilisé par les parsers pour calculer les indices de de Bruijn *)
  67: let index_of : string -> string list -> int =
  68:   let rec index : int -> string -> string list -> int = fun i x ->
  69:     function
  70:         y::l -> if (x = y) then i else index (i + 1) x l
  71:       | []   -> raise (Stream.Error "Free variable")
  72:   in index 0
  73: ;;

This document was generated using caml2html