????

Your IP : 3.145.10.222


Current Path : C:/opt/msys64/usr/share/guile/3.0/ice-9/peg/
Upload File :
Current File : C:/opt/msys64/usr/share/guile/3.0/ice-9/peg/string-peg.scm

;;;; string-peg.scm --- representing PEG grammars as strings
;;;;
;;;; 	Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;

(define-module (ice-9 peg string-peg)
  #:export (peg-as-peg
            define-peg-string-patterns
            peg-grammar)
  #:use-module (ice-9 peg using-parsers)
  #:use-module (ice-9 peg codegen)
  #:use-module (ice-9 peg simplify-tree))

;; Gets the left-hand depth of a list.
(define (depth lst)
  (if (or (not (list? lst)) (null? lst))
      0
      (+ 1 (depth (car lst)))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; Parse string PEGs using sexp PEGs.
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Grammar for PEGs in PEG grammar.
(define peg-as-peg
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
pattern <-- alternative (SLASH sp alternative)*
alternative <-- ([!&]? sp suffix)+
suffix <-- primary ([*+?] sp)*
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
literal <-- ['] (!['] .)* ['] sp
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
CCrange <-- . '-' .
CCsingle <-- .
nonterminal <-- [a-zA-Z0-9-]+ sp
sp < [ \t\n]*
SLASH < '/'
LB < '['
RB < ']'
")

(define-syntax define-sexp-parser
  (lambda (x)
    (syntax-case x ()
      ((_ sym accum pat)
       (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
              (accumsym (syntax->datum #'accum))
              (syn (wrap-parser-for-users x matchf accumsym #'sym)))
           #`(define sym #,syn))))))

(define-sexp-parser peg-grammar all
  (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
(define-sexp-parser peg-pattern all
  (and peg-alternative
       (* (and (ignore "/") peg-sp peg-alternative))))
(define-sexp-parser peg-alternative all
  (+ (and (? (or "!" "&")) peg-sp peg-suffix)))
(define-sexp-parser peg-suffix all
  (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
(define-sexp-parser peg-primary all
  (or (and "(" peg-sp peg-pattern ")" peg-sp)
      (and "." peg-sp)
      peg-literal
      peg-charclass
      (and peg-nonterminal (not-followed-by "<"))))
(define-sexp-parser peg-literal all
  (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
(define-sexp-parser peg-charclass all
  (and (ignore "[")
       (* (and (not-followed-by "]")
               (or charclass-range charclass-single)))
       (ignore "]")
       peg-sp))
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
(define-sexp-parser charclass-single all peg-any)
(define-sexp-parser peg-nonterminal all
  (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
(define-sexp-parser peg-sp none
  (* (or " " "\t" "\n")))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; PARSE STRING PEGS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Takes a string representing a PEG grammar and returns syntax that
;; will define all of the nonterminals in the grammar with equivalent
;; PEG s-expressions.
(define (peg-parser str for-syntax)
  (let ((parsed (match-pattern peg-grammar str)))
    (if (not parsed)
        (begin
          ;; (display "Invalid PEG grammar!\n")
          #f)
        (let ((lst (peg:tree parsed)))
          (cond
           ((or (not (list? lst)) (null? lst))
            lst)
           ((eq? (car lst) 'peg-grammar)
            #`(begin
                #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
                        (context-flatten (lambda (lst) (<= (depth lst) 2))
                                         (cdr lst))))))))))

;; Macro wrapper for PEG-PARSER.  Parses PEG grammars expressed as strings and
;; defines all the appropriate nonterminals.
(define-syntax define-peg-string-patterns
  (lambda (x)
    (syntax-case x ()
      ((_ str)
       (peg-parser (syntax->datum #'str) x)))))

;; lst has format (nonterm grabber pattern), where
;;   nonterm is a symbol (the name of the nonterminal),
;;   grabber is a string (either "<", "<-" or "<--"), and
;;   pattern is the parse of a PEG pattern expressed as as string.
(define (peg-nonterm->defn lst for-syntax)
  (let* ((nonterm (car lst))
         (grabber (cadr lst))
         (pattern (caddr lst))
         (nonterm-name (datum->syntax for-syntax
                                      (string->symbol (cadr nonterm)))))
    #`(define-peg-pattern #,nonterm-name
       #,(cond
          ((string=? grabber "<--") (datum->syntax for-syntax 'all))
          ((string=? grabber "<-") (datum->syntax for-syntax 'body))
          (else (datum->syntax for-syntax 'none)))
       #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))

;; lst has format ('peg-pattern ...).
;; After the context-flatten, (cdr lst) has format
;;   (('peg-alternative ...) ...), where the outer list is a collection
;;   of elements from a '/' alternative.
(define (peg-pattern->defn lst for-syntax)
  #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
                (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
                                 (cdr lst)))))

;; lst has format ('peg-alternative ...).
;; After the context-flatten, (cdr lst) has the format
;;   (item ...), where each item has format either ("!" ...), ("&" ...),
;;   or ('peg-suffix ...).
(define (peg-alternative->defn lst for-syntax)
  #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
                 (context-flatten (lambda (x) (or (string? (car x))
                                             (eq? (car x) 'peg-suffix)))
                                  (cdr lst)))))

;; lst has the format either
;;   ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
;;     ('peg-suffix ...).
(define (peg-body->defn lst for-syntax)
    (cond
      ((equal? (car lst) "&")
       #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
      ((equal? (car lst) "!")
       #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
      ((eq? (car lst) 'peg-suffix)
       (peg-suffix->defn lst for-syntax))
      (else `(peg-parse-body-fail ,lst))))

;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
(define (peg-suffix->defn lst for-syntax)
  (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
    (cond
      ((null? (cddr lst))
       inner-defn)
      ((equal? (caddr lst) "*")
       #`(* #,inner-defn))
      ((equal? (caddr lst) "?")
       #`(? #,inner-defn))
      ((equal? (caddr lst) "+")
       #`(+ #,inner-defn)))))

;; Parse a primary.
(define (peg-primary->defn lst for-syntax)
  (let ((el (cadr lst)))
  (cond
   ((list? el)
    (cond
     ((eq? (car el) 'peg-literal)
      (peg-literal->defn el for-syntax))
     ((eq? (car el) 'peg-charclass)
      (peg-charclass->defn el for-syntax))
     ((eq? (car el) 'peg-nonterminal)
      (datum->syntax for-syntax (string->symbol (cadr el))))))
   ((string? el)
    (cond
     ((equal? el "(")
      (peg-pattern->defn (caddr lst) for-syntax))
     ((equal? el ".")
      (datum->syntax for-syntax 'peg-any))
     (else (datum->syntax for-syntax
                          `(peg-parse-any unknown-string ,lst)))))
   (else (datum->syntax for-syntax
                        `(peg-parse-any unknown-el ,lst))))))

;; Trims characters off the front and end of STR.
;; (trim-1chars "'ab'") -> "ab"
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))

;; Parses a literal.
(define (peg-literal->defn lst for-syntax)
  (datum->syntax for-syntax (trim-1chars (cadr lst))))

;; Parses a charclass.
(define (peg-charclass->defn lst for-syntax)
  #`(or
     #,@(map
         (lambda (cc)
           (cond
            ((eq? (car cc) 'charclass-range)
             #`(range #,(datum->syntax
                         for-syntax
                         (string-ref (cadr cc) 0))
                      #,(datum->syntax
                         for-syntax
                         (string-ref (cadr cc) 2))))
            ((eq? (car cc) 'charclass-single)
             (datum->syntax for-syntax (cadr cc)))))
         (context-flatten
          (lambda (x) (or (eq? (car x) 'charclass-range)
                          (eq? (car x) 'charclass-single)))
          (cdr lst)))))

;; Compresses a list to save the optimizer work.
;; e.g. (or (and a)) -> a
(define (compressor-core lst)
  (if (or (not (list? lst)) (null? lst))
      lst
      (cond
       ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
             (null? (cddr lst)))
        (compressor-core (cadr lst)))
       ((and (eq? (car lst) 'body)
             (eq? (cadr lst) 'lit)
             (eq? (cadddr lst) 1))
        (compressor-core (caddr lst)))
       (else (map compressor-core lst)))))

(define (compressor syn for-syntax)
  (datum->syntax for-syntax
                 (compressor-core (syntax->datum syn))))

;; Builds a lambda-expressions for the pattern STR using accum.
(define (peg-string-compile args accum)
  (syntax-case args ()
    ((str-stx) (string? (syntax->datum #'str-stx))
     (let ((string (syntax->datum #'str-stx)))
       (compile-peg-pattern
        (compressor
         (peg-pattern->defn
          (peg:tree (match-pattern peg-pattern string)) #'str-stx)
         #'str-stx)
        (if (eq? accum 'all) 'body accum))))
     (else (error "Bad embedded PEG string" args))))

(add-peg-compiler! 'peg peg-string-compile)