;; Test macro: (define-syntax T (syntax-rules () [(T expr "should be" v) (let ([actual expr]) (printf "~a~s => ~s~n" (if (equal? actual v) "" "ERROR ") 'expr actual))])) ;; A rx is either ;; - (make-rx:lit val) ;; - (make-rx:seq rx rx) ;; - (make-rx:alt rx rx) ;; - (make-rx:star rx) (define-struct rx:lit (v)) (define-struct rx:seq (first second)) (define-struct rx:alt (first second)) (define-struct rx:star (rx)) ;; Shorthands: ;; lit : val -> rx (define (lit v) (make-rx:lit v)) ;; seq : rx rx -> rx (define (seq a b) (make-rx:seq a b)) ;; alt : rx rx -> rx (define (alt a b) (make-rx:alt a b)) ;; star : rx -> rx (define (star rx) (make-rx:star rx)) ;; A stream is a list-of-val ;; A result is either ;; - #f ;; - stream ;; rxm : rx stream -> result ;; Regexp-matching takes a rx and stream; it returns ;; #f if the beginning of the stream doesn't match ;; the regexp, otherwise it returns the remainder stream ;; after a matching part ;; [See examples/tests at the end] ;; ---------------------------------------- ;; First attempt at matcher (define (rxm rx stream) (cond [(rx:lit? rx) (and (pair? stream) (equal? (car stream) (rx:lit-v rx)) (cdr stream))] [(rx:seq? rx) (let ([rest (rxm (rx:seq-first rx) stream)]) (and rest (rxm (rx:seq-second rx) rest)))] [(rx:alt? rx) (or (rxm (rx:alt-first rx) stream) (rxm (rx:alt-second rx) stream))] [(rx:star? rx) (or stream (rxm (rx:star-rx rx) stream))])) ;; ---------------------------------------- ;; Second attempt at matcher (first one that works) ;; rxmk : rx stream (stream -> result) -> result ;; A matcher that takes a success continuation (define (rxmk rx stream k) (cond [(rx:lit? rx) (and (pair? stream) (equal? (car stream) (rx:lit-v rx)) (k (cdr stream)))] [(rx:seq? rx) (rxmk (rx:seq-first rx) stream (lambda (rest) (rxmk (rx:seq-second rx) rest k)))] [(rx:alt? rx) (or (rxmk (rx:alt-first rx) stream k) (rxmk (rx:alt-second rx) stream k))] [(rx:star? rx) (or (k stream) (rxmk (rx:star-rx rx) stream (lambda (rest) (rxmk rx rest k))))])) (define (rxm rx stream) (rxmk rx stream (lambda (rest) rest))) ;; ---------------------------------------- ;; Third matcher, which is faster than the second ;; rx-compile : rx -> (stream (stream -> result) -> result) ;; Compiles an rx to a matcher function (define (rx-compile rx) (cond [(rx:lit? rx) (let ([v (rx:lit-v rx)]) (lambda (stream k) (and (pair? stream) (equal? (car stream) v) (k (cdr stream)))))] [(rx:seq? rx) (let ([first (rx-compile (rx:seq-first rx))] [second (rx-compile (rx:seq-second rx))]) (lambda (stream k) (first stream (lambda (rest) (second rest k)))))] [(rx:alt? rx) (let ([first (rx-compile (rx:alt-first rx))] [second (rx-compile (rx:alt-second rx))]) (lambda (stream k) (or (first stream k) (second stream k))))] [(rx:star? rx) (let ([inner-rx (rx-compile (rx:star-rx rx))]) (letrec ([rx (lambda (stream k) (or (k stream) (inner-rx stream (lambda (rest) (rx rest k)))))]) rx))])) (define (rxm rx stream) ((rx-compile rx) stream (lambda (rest) rest))) ;; ---------------------------------------- ;; Fourth matcher, "compiles" as each rx is constructed ;; which makes it easier to extend rx ;; A rx is (stream (stream -> result) -> result) ;; lit : val -> rx (define (lit v) (lambda (stream k) (and (pair? stream) (equal? (car stream) v) (k (cdr stream))))) ;; seq : rx rx -> rx (define (seq first second) (lambda (stream k) (first stream (lambda (rest) (second rest k))))) ;; alt : rx rx -> rx (define (alt first second) (lambda (stream k) (or (first stream k) (second stream k)))) ;; star : rx -> rx (define (star inner-rx) (letrec ([rx (lambda (stream k) (or (k stream) (inner-rx stream (lambda (rest) (rx rest k)))))]) rx)) (define (rxm rx stream) (rx stream (lambda (rest) rest))) ;; ---------------------------------------- ;; Examples/tests (T (rxm (lit 'a) '(a)) "should be" '()) (T (rxm (lit 'a) '(a b)) "should be" '(b)) (T (rxm (lit 'a) '(b)) "should be" #f) (T (rxm (lit 'a) '(b a)) "should be" #f) (T (rxm (seq (lit 'b) (lit 'a)) '(b a)) "should be" '()) (T (rxm (seq (lit 'a) (lit 'b)) '(a a)) "should be" #f) (T (rxm (alt (lit 'a) (lit 'b)) '(a)) "should be" '()) (T (rxm (alt (lit 'a) (lit 'b)) '(b)) "should be" '()) (T (rxm (alt (lit 'a) (lit 'b)) '(c)) "should be" #f) (T (rxm (seq (alt (lit 'a) (lit 'b)) (lit 'b)) '(a b)) "should be" '()) (T (rxm (seq (alt (lit 'a) (lit 'b)) (lit 'b)) '(b a)) "should be" #f) (T (rxm (seq (alt (lit 'a) (lit 'b)) (lit 'b)) (list 'b 'b)) "should be" '()) (T (rxm (seq (alt (lit 'a) (seq (list 'a) (lit 'b))) (lit 'b)) (list 'a 'b)) "should be" '()) (T (rxm (seq (alt (lit 'a) (seq (lit 'a) (lit 'a))) (lit 'b)) (list 'a 'a 'b)) "should be" '()) (T (rxm (seq (star (lit 'b)) (lit 'c)) (list 'b 'b 'c)) "should be" '()) ;; ---------------------------------------- (define (time-test) (define big-stream (string->list (string-append (make-string 300 #\a) (make-string 300 #\b) (make-string 300 #\a) "z"))) (time (rxm (seq (seq (star (lit #\a)) (seq (star (lit #\b)) (star (lit #\a)))) (lit #\z)) big-stream))) (time-test)