2014-08-07 07:00:01 +10:00
|
|
|
;;; parser.el --- Monadic parser combinator -*- lexical-binding: t -*-
|
|
|
|
|
|
|
|
;; Author: Greg Sexton <gregsexton@gmail.com>
|
|
|
|
;; Version: 1.0
|
|
|
|
;; Keywords: parser
|
|
|
|
;; URL: https://github.com/gregsexton/
|
|
|
|
;; Package-Requires: ((s "1.9.0") (dash "2.5.0") (emacs "24"))
|
|
|
|
|
|
|
|
;; The MIT License (MIT)
|
|
|
|
|
|
|
|
;; Copyright (c) 2014 Greg Sexton
|
|
|
|
|
|
|
|
;; Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
;; of this software and associated documentation files (the "Software"), to deal
|
|
|
|
;; in the Software without restriction, including without limitation the rights
|
|
|
|
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
;; copies of the Software, and to permit persons to whom the Software is
|
|
|
|
;; furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
;; The above copyright notice and this permission notice shall be included in
|
|
|
|
;; all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
;; THE SOFTWARE.
|
|
|
|
|
|
|
|
;;; Commentary:
|
|
|
|
|
|
|
|
;;; Code:
|
|
|
|
|
|
|
|
(require 'dash)
|
|
|
|
(require 's)
|
|
|
|
(require 'cl)
|
|
|
|
|
|
|
|
;;; content structure
|
|
|
|
|
|
|
|
(defun parser-content (consumed string)
|
|
|
|
"Create a content structure from STRING and the count of CONSUMED characters."
|
|
|
|
(cons consumed string))
|
|
|
|
|
|
|
|
(defun parser-content-consumed-count (content) (car content))
|
|
|
|
|
|
|
|
(defun parser-content-string (content) (cdr content))
|
|
|
|
|
|
|
|
(defun parser-content-from (content consumed)
|
|
|
|
"Create a new content after consuming CONSUMED chars."
|
|
|
|
(parser-content (+ (parser-content-consumed-count content) consumed)
|
|
|
|
(substring (parser-content-string content) consumed)))
|
|
|
|
|
|
|
|
;;; parsers
|
|
|
|
;;; TODO: document parsers
|
|
|
|
|
|
|
|
(defun parser-run (parser content)
|
|
|
|
(funcall parser content))
|
|
|
|
|
|
|
|
(defun parser-bind (m f)
|
|
|
|
"State monad composed with the maybe monad."
|
|
|
|
(if (null m) nil
|
|
|
|
(lambda (s)
|
|
|
|
(let ((new-result (parser-run m s)))
|
|
|
|
(if (null new-result) nil
|
|
|
|
(destructuring-bind (new-value . new-state) new-result
|
|
|
|
(parser-run (funcall f new-value) new-state)))))))
|
|
|
|
|
|
|
|
(defmacro parser-do (expr &rest more)
|
|
|
|
(let ((assignment-p (and (listp expr) (equal (cadr expr) '<-))))
|
|
|
|
(let ((var (if assignment-p (car expr) (gensym)))
|
|
|
|
(f (if assignment-p (caddr expr) expr)))
|
|
|
|
(if more
|
|
|
|
`(parser-bind
|
|
|
|
,f (lambda (,var)
|
|
|
|
(parser-do ,@more)))
|
|
|
|
f))))
|
|
|
|
|
|
|
|
(defun parser-return (x)
|
|
|
|
(lambda (s) (cons x s)))
|
|
|
|
|
|
|
|
(defun parser-zero ()
|
|
|
|
(lambda (s) nil))
|
|
|
|
|
|
|
|
(defun parser-get ()
|
|
|
|
(lambda (s) (cons s s)))
|
|
|
|
|
|
|
|
(defun parser-put (x)
|
|
|
|
(lambda (s) (cons nil x)))
|
|
|
|
|
|
|
|
(defun parser-get-string ()
|
|
|
|
(parser-do (content <- (parser-get))
|
|
|
|
(parser-return (parser-content-string content))))
|
|
|
|
|
|
|
|
(defun parser-drop (n)
|
|
|
|
(parser-do (content <- (parser-get))
|
|
|
|
(parser-put (parser-content-from content n))
|
|
|
|
;; TODO: substring will error if n is too large, guard against this
|
|
|
|
(parser-return n)))
|
|
|
|
|
|
|
|
(defun parser-take (n)
|
|
|
|
(lambda (content)
|
|
|
|
(let ((content-str (parser-content-string content)))
|
|
|
|
(unless (s-blank? content-str)
|
|
|
|
(let* ((len (length content-str))
|
|
|
|
(n (if (> n len) len n)))
|
|
|
|
(cons (substring content-str 0 n) (parser-content-from content n)))))))
|
|
|
|
|
|
|
|
(defun parser-item ()
|
|
|
|
(parser-take 1))
|
|
|
|
|
|
|
|
(defun parser-position ()
|
|
|
|
"Returns the point position, which is 1 more than the current
|
|
|
|
consumed count."
|
|
|
|
(parser-do (content <- (parser-get))
|
|
|
|
(parser-return (+ (parser-content-consumed-count content) 1))))
|
|
|
|
|
|
|
|
(defun parser-sat (pred)
|
|
|
|
(parser-do (pos <- (parser-position))
|
|
|
|
(a <- (parser-item))
|
|
|
|
(if (funcall pred a)
|
|
|
|
(parser-return pos)
|
|
|
|
(parser-zero))))
|
|
|
|
|
|
|
|
(defun parser-char (x)
|
|
|
|
(parser-sat (lambda (c) (equal x c))))
|
|
|
|
|
|
|
|
(defun parser-string (str)
|
|
|
|
;; take rather than recursion due to elisp
|
|
|
|
(parser-do (prefix <- (parser-take (length str)))
|
|
|
|
(pos <- (parser-position))
|
|
|
|
(if (equal str prefix)
|
|
|
|
(parser-return pos)
|
|
|
|
(parser-zero))))
|
|
|
|
|
|
|
|
(defun parser-regex (rx)
|
|
|
|
"Match the regex somewhere in the remaining string. Note you
|
|
|
|
have to prefix with '^' if you wish to match the beginning."
|
|
|
|
(parser-do (str <- (parser-get-string))
|
|
|
|
(if (string-match rx str)
|
|
|
|
(parser-drop (match-end 0))
|
|
|
|
(parser-zero))
|
|
|
|
(parser-position)))
|
|
|
|
|
|
|
|
;;; TODO: rename? parser-consume-while-not ?
|
|
|
|
(defun parser-drop-until-regex (rx)
|
|
|
|
"Skip over all characters until hitting RX. If RX is not found
|
|
|
|
this will bind to zero. If RX is matched at the beginning of the
|
|
|
|
string, we bind to zero. This allows for bottoming out of
|
|
|
|
recursion. We fail if we don't consume something."
|
|
|
|
(parser-do (str <- (parser-get-string))
|
|
|
|
(if (string-match rx str)
|
|
|
|
(if (> (match-beginning 0) 0)
|
|
|
|
(parser-drop (match-beginning 0))
|
|
|
|
(parser-zero))
|
|
|
|
(parser-zero))))
|
|
|
|
|
|
|
|
(defun parser-conj (p1 p2)
|
|
|
|
(lambda (content)
|
|
|
|
(or (parser-run p1 content)
|
|
|
|
(parser-run p2 content))))
|
|
|
|
|
|
|
|
(defun parser-0+ (p)
|
|
|
|
(parser-conj
|
|
|
|
(parser-1+ p)
|
|
|
|
(parser-return nil)))
|
|
|
|
|
|
|
|
(defun parser-1+ (p)
|
|
|
|
;; recursive isn't going to cut it in elisp
|
|
|
|
(lambda (content)
|
|
|
|
(let ((res (parser-run p content))
|
|
|
|
(acc nil))
|
|
|
|
(while res
|
|
|
|
(setq acc (cons (car res) acc))
|
|
|
|
(setq content (cdr res))
|
|
|
|
(setq res (parser-run p content)))
|
|
|
|
(when acc
|
|
|
|
(cons (reverse acc) content)))))
|
|
|
|
|
|
|
|
(defun parser-1? (p)
|
|
|
|
(parser-conj p (parser-return nil)))
|
|
|
|
|
|
|
|
(defun parser-not (parser)
|
|
|
|
(lambda (content)
|
|
|
|
(if (parser-run parser content)
|
|
|
|
nil
|
|
|
|
(parser-run (parser-item) content))))
|
|
|
|
|
|
|
|
(defun parser-paired (start children end create)
|
|
|
|
;; TODO: make this a macro so I don't have to pass in the thunk?
|
2014-08-09 23:02:37 +10:00
|
|
|
;; TODO: move this to origami-parsers?
|
2014-08-07 07:00:01 +10:00
|
|
|
"CHILDREN should be a zero-arg lambda -- a thunk -- returning a
|
|
|
|
parser to allow for recursive nesting of a parser."
|
|
|
|
(parser-do (begin <- start)
|
|
|
|
(children <- (funcall children))
|
|
|
|
(end <- end)
|
|
|
|
(parser-return (funcall create begin end children))))
|
|
|
|
|
|
|
|
(provide 'parser)
|
|
|
|
|
|
|
|
;;; parser.el ends here
|