Extract parser.el

This commit is contained in:
Greg Sexton 2014-08-06 22:00:01 +01:00
parent 7775fc2023
commit f9858348ff
2 changed files with 215 additions and 176 deletions

View File

@ -35,6 +35,7 @@
(require 'dash) (require 'dash)
(require 's) (require 's)
(require 'cl) (require 'cl)
(require 'parser)
;;; overlay manipulation ;;; overlay manipulation
@ -264,168 +265,6 @@ with the current state and the current node at each iteration."
(origami-create-overlay beg end buffer)))) (origami-create-overlay beg end buffer))))
(origami-fold-node-raw beg end open children overlay))) (origami-fold-node-raw beg end open children overlay)))
;;; content structure
(defun origami-content (consumed string)
"Create a content structure from STRING and the count of CONSUMED characters."
(cons consumed string))
(defun origami-content-consumed-count (content) (car content))
(defun origami-content-string (content) (cdr content))
(defun origami-content-from (content consumed)
"Create a new content after consuming CONSUMED chars."
(origami-content (+ (origami-content-consumed-count content) consumed)
(substring (origami-content-string content) consumed)))
;;; monadic parser combinator
;;; TODO: document parsers
(defun origami-run-parser (parser content)
(funcall parser content))
(defun origami-parser-bind (m f)
"State monad composed with the maybe monad."
(if (null m) nil
(lambda (s)
(let ((new-result (origami-run-parser m s)))
(if (null new-result) nil
(destructuring-bind (new-value . new-state) new-result
(origami-run-parser (funcall f new-value) new-state)))))))
(defmacro origami-do (expr &rest more)
(let ((assignment-p (and (listp expr) (equal (cadr expr) '<-))))
(let ((var (if assignment-p (car expr) (gensym)))
(f (if assignment-p (caddr expr) expr)))
(if more
`(origami-parser-bind
,f (lambda (,var)
(origami-do ,@more)))
f))))
(defun origami-parser-return (x)
(lambda (s) (cons x s)))
(defun origami-parser-zero ()
(lambda (s) nil))
(defun origami-parser-get ()
(lambda (s) (cons s s)))
(defun origami-parser-put (x)
(lambda (s) (cons nil x)))
(defun origami-parser-get-string ()
(origami-do (content <- (origami-parser-get))
(origami-parser-return (origami-content-string content))))
(defun origami-parser-drop (n)
(origami-do (content <- (origami-parser-get))
(origami-parser-put (origami-content-from content n))
;; TODO: substring will error if n is too large, guard against this
(origami-parser-return n)))
(defun origami-parser-take (n)
(lambda (content)
(let ((content-str (origami-content-string content)))
(unless (s-blank? content-str)
(let* ((len (length content-str))
(n (if (> n len) len n)))
(cons (substring content-str 0 n) (origami-content-from content n)))))))
(defun origami-parser-item ()
(origami-parser-take 1))
(defun origami-parser-position ()
"Returns the point position, which is 1 more than the current
consumed count."
(origami-do (content <- (origami-parser-get))
(origami-parser-return (+ (origami-content-consumed-count content) 1))))
(defun origami-parser-sat (pred)
(origami-do (pos <- (origami-parser-position))
(a <- (origami-parser-item))
(if (funcall pred a)
(origami-parser-return pos)
(origami-parser-zero))))
(defun origami-parser-char (x)
(origami-parser-sat (lambda (c) (equal x c))))
(defun origami-parser-string (str)
;; take rather than recursion due to elisp
(origami-do (prefix <- (origami-parser-take (length str)))
(pos <- (origami-parser-position))
(if (equal str prefix)
(origami-parser-return pos)
(origami-parser-zero))))
(defun origami-parser-regex (rx)
"Match the regex somewhere in the remaining string. Note you
have to prefix with '^' if you wish to match the beginning."
(origami-do (str <- (origami-parser-get-string))
(if (string-match rx str)
(origami-parser-drop (match-end 0))
(origami-parser-zero))
(origami-parser-position)))
;;; TODO: rename? origami-parser-consume-while-not ?
(defun origami-parser-drop-until-regex (rx)
"Skip over all characters until hitting RX. If rx is not found
this will bind to zero. If rx is matched at the beginning of the
string, we bind to zero. This allows for bottoming out of
recursion. We fail if we don't consume something."
(origami-do (str <- (origami-parser-get-string))
(if (string-match rx str)
(if (> (match-beginning 0) 0)
(origami-parser-drop (match-beginning 0))
(origami-parser-zero))
(origami-parser-zero))))
(defun origami-parser-conj (p1 p2)
(lambda (content)
(or (origami-run-parser p1 content)
(origami-run-parser p2 content))))
(defun origami-parser-0+ (p)
(origami-parser-conj
(origami-parser-1+ p)
(origami-parser-return nil)))
(defun origami-parser-1+ (p)
;; recursive isn't going to cut it in elisp
(lambda (content)
(let ((res (origami-run-parser p content))
(acc nil))
(while res
(setq acc (cons (car res) acc))
(setq content (cdr res))
(setq res (origami-run-parser p content)))
(when acc
(cons (reverse acc) content)))))
(defun origami-parser-1? (p)
(origami-parser-conj p (origami-parser-return nil)))
(defun origami-parser-not (parser)
(lambda (content)
(if (origami-run-parser parser content)
nil
(origami-run-parser (origami-parser-item) content))))
(defun origami-parser-paired (start children end create)
;; TODO: make this a macro so I don't have to pass in the thunk?
"CHILDREN should be a zero-arg lambda -- a thunk -- returning a
parser to allow for recursive nesting of a parser."
(origami-do (begin <- start)
(children <- (funcall children))
(end <- end)
(origami-parser-return (funcall create begin end children))))
;;; TODO: always need to parse to max nesting, even if some of it gets ignored
;;; interactive utils ;;; interactive utils
;;; TODO: create functions for accessing/setting the local vars and ;;; TODO: create functions for accessing/setting the local vars and
@ -460,19 +299,19 @@ was last built."
(with-current-buffer buffer (with-current-buffer buffer
(let ((contents (buffer-string))) (let ((contents (buffer-string)))
(-> parser (-> parser
(origami-run-parser (origami-content 0 contents)) (parser-run (parser-content 0 contents))
car car
origami-fold-root-node))))) origami-fold-root-node)))))
(defun origami-test-parser (create) (defun origami-test-parser (create)
(let ((pair (origami-parser-paired (origami-parser-char "{") (let ((pair (parser-paired (parser-char "{")
(lambda () (origami-test-parser create)) (lambda () (origami-test-parser create))
(origami-parser-char "}") (parser-char "}")
create))) create)))
(origami-parser-0+ (origami-parser-conj (parser-0+ (parser-conj
(origami-do (parser-do
(origami-parser-drop-until-regex "[{}]") (parser-drop-until-regex "[{}]")
(origami-parser-1? pair)) (parser-1? pair))
pair)))) pair))))
(defun origami-get-parser (buffer) (defun origami-get-parser (buffer)
@ -493,10 +332,6 @@ otherwise fetch cached tree."
(origami-build-tree buffer (origami-get-parser buffer)) (origami-build-tree buffer (origami-get-parser buffer))
(origami-get-cached-tree buffer)))) (origami-get-cached-tree buffer))))
;;; commands
;;; TODO: document all commands
(defun origami-apply-new-tree (buffer old-tree new-tree) (defun origami-apply-new-tree (buffer old-tree new-tree)
(when new-tree (when new-tree
(origami-fold-diff old-tree (origami-store-cached-tree buffer new-tree) (origami-fold-diff old-tree (origami-store-cached-tree buffer new-tree)
@ -504,6 +339,10 @@ otherwise fetch cached tree."
'origami-show-overlay-from-fold-tree-fn 'origami-show-overlay-from-fold-tree-fn
'origami-change-overlay-from-fold-node-fn))) 'origami-change-overlay-from-fold-node-fn)))
;;; commands
;;; TODO: document all commands
(defun origami-open-node (buffer point) (defun origami-open-node (buffer point)
(interactive (list (current-buffer) (point))) (interactive (list (current-buffer) (point)))
(-when-let (tree (origami-get-fold-tree buffer)) (-when-let (tree (origami-get-fold-tree buffer))

200
parser.el Normal file
View File

@ -0,0 +1,200 @@
;;; parser.el --- Monadic parser combinator -*- lexical-binding: t -*-
;; Author: Greg Sexton <gregsexton@gmail.com>
;; Version: 1.0
;; Keywords: parser
;; URL: https://github.com/gregsexton/
;; Package-Requires: ((s "1.9.0") (dash "2.5.0") (emacs "24"))
;; The MIT License (MIT)
;; Copyright (c) 2014 Greg Sexton
;; Permission is hereby granted, free of charge, to any person obtaining a copy
;; of this software and associated documentation files (the "Software"), to deal
;; in the Software without restriction, including without limitation the rights
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
;; copies of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:
;; The above copyright notice and this permission notice shall be included in
;; all copies or substantial portions of the Software.
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
;; THE SOFTWARE.
;;; Commentary:
;;; Code:
(require 'dash)
(require 's)
(require 'cl)
;;; content structure
(defun parser-content (consumed string)
"Create a content structure from STRING and the count of CONSUMED characters."
(cons consumed string))
(defun parser-content-consumed-count (content) (car content))
(defun parser-content-string (content) (cdr content))
(defun parser-content-from (content consumed)
"Create a new content after consuming CONSUMED chars."
(parser-content (+ (parser-content-consumed-count content) consumed)
(substring (parser-content-string content) consumed)))
;;; parsers
;;; TODO: document parsers
(defun parser-run (parser content)
(funcall parser content))
(defun parser-bind (m f)
"State monad composed with the maybe monad."
(if (null m) nil
(lambda (s)
(let ((new-result (parser-run m s)))
(if (null new-result) nil
(destructuring-bind (new-value . new-state) new-result
(parser-run (funcall f new-value) new-state)))))))
(defmacro parser-do (expr &rest more)
(let ((assignment-p (and (listp expr) (equal (cadr expr) '<-))))
(let ((var (if assignment-p (car expr) (gensym)))
(f (if assignment-p (caddr expr) expr)))
(if more
`(parser-bind
,f (lambda (,var)
(parser-do ,@more)))
f))))
(defun parser-return (x)
(lambda (s) (cons x s)))
(defun parser-zero ()
(lambda (s) nil))
(defun parser-get ()
(lambda (s) (cons s s)))
(defun parser-put (x)
(lambda (s) (cons nil x)))
(defun parser-get-string ()
(parser-do (content <- (parser-get))
(parser-return (parser-content-string content))))
(defun parser-drop (n)
(parser-do (content <- (parser-get))
(parser-put (parser-content-from content n))
;; TODO: substring will error if n is too large, guard against this
(parser-return n)))
(defun parser-take (n)
(lambda (content)
(let ((content-str (parser-content-string content)))
(unless (s-blank? content-str)
(let* ((len (length content-str))
(n (if (> n len) len n)))
(cons (substring content-str 0 n) (parser-content-from content n)))))))
(defun parser-item ()
(parser-take 1))
(defun parser-position ()
"Returns the point position, which is 1 more than the current
consumed count."
(parser-do (content <- (parser-get))
(parser-return (+ (parser-content-consumed-count content) 1))))
(defun parser-sat (pred)
(parser-do (pos <- (parser-position))
(a <- (parser-item))
(if (funcall pred a)
(parser-return pos)
(parser-zero))))
(defun parser-char (x)
(parser-sat (lambda (c) (equal x c))))
(defun parser-string (str)
;; take rather than recursion due to elisp
(parser-do (prefix <- (parser-take (length str)))
(pos <- (parser-position))
(if (equal str prefix)
(parser-return pos)
(parser-zero))))
(defun parser-regex (rx)
"Match the regex somewhere in the remaining string. Note you
have to prefix with '^' if you wish to match the beginning."
(parser-do (str <- (parser-get-string))
(if (string-match rx str)
(parser-drop (match-end 0))
(parser-zero))
(parser-position)))
;;; TODO: rename? parser-consume-while-not ?
(defun parser-drop-until-regex (rx)
"Skip over all characters until hitting RX. If RX is not found
this will bind to zero. If RX is matched at the beginning of the
string, we bind to zero. This allows for bottoming out of
recursion. We fail if we don't consume something."
(parser-do (str <- (parser-get-string))
(if (string-match rx str)
(if (> (match-beginning 0) 0)
(parser-drop (match-beginning 0))
(parser-zero))
(parser-zero))))
(defun parser-conj (p1 p2)
(lambda (content)
(or (parser-run p1 content)
(parser-run p2 content))))
(defun parser-0+ (p)
(parser-conj
(parser-1+ p)
(parser-return nil)))
(defun parser-1+ (p)
;; recursive isn't going to cut it in elisp
(lambda (content)
(let ((res (parser-run p content))
(acc nil))
(while res
(setq acc (cons (car res) acc))
(setq content (cdr res))
(setq res (parser-run p content)))
(when acc
(cons (reverse acc) content)))))
(defun parser-1? (p)
(parser-conj p (parser-return nil)))
(defun parser-not (parser)
(lambda (content)
(if (parser-run parser content)
nil
(parser-run (parser-item) content))))
(defun parser-paired (start children end create)
;; TODO: make this a macro so I don't have to pass in the thunk?
"CHILDREN should be a zero-arg lambda -- a thunk -- returning a
parser to allow for recursive nesting of a parser."
(parser-do (begin <- start)
(children <- (funcall children))
(end <- end)
(parser-return (funcall create begin end children))))
(provide 'parser)
;;; parser.el ends here