Extract parser.el
This commit is contained in:
parent
7775fc2023
commit
f9858348ff
191
origami.el
191
origami.el
@ -35,6 +35,7 @@
|
|||||||
(require 'dash)
|
(require 'dash)
|
||||||
(require 's)
|
(require 's)
|
||||||
(require 'cl)
|
(require 'cl)
|
||||||
|
(require 'parser)
|
||||||
|
|
||||||
;;; overlay manipulation
|
;;; overlay manipulation
|
||||||
|
|
||||||
@ -264,168 +265,6 @@ with the current state and the current node at each iteration."
|
|||||||
(origami-create-overlay beg end buffer))))
|
(origami-create-overlay beg end buffer))))
|
||||||
(origami-fold-node-raw beg end open children overlay)))
|
(origami-fold-node-raw beg end open children overlay)))
|
||||||
|
|
||||||
;;; content structure
|
|
||||||
|
|
||||||
(defun origami-content (consumed string)
|
|
||||||
"Create a content structure from STRING and the count of CONSUMED characters."
|
|
||||||
(cons consumed string))
|
|
||||||
|
|
||||||
(defun origami-content-consumed-count (content) (car content))
|
|
||||||
|
|
||||||
(defun origami-content-string (content) (cdr content))
|
|
||||||
|
|
||||||
(defun origami-content-from (content consumed)
|
|
||||||
"Create a new content after consuming CONSUMED chars."
|
|
||||||
(origami-content (+ (origami-content-consumed-count content) consumed)
|
|
||||||
(substring (origami-content-string content) consumed)))
|
|
||||||
|
|
||||||
;;; monadic parser combinator
|
|
||||||
|
|
||||||
;;; TODO: document parsers
|
|
||||||
|
|
||||||
(defun origami-run-parser (parser content)
|
|
||||||
(funcall parser content))
|
|
||||||
|
|
||||||
(defun origami-parser-bind (m f)
|
|
||||||
"State monad composed with the maybe monad."
|
|
||||||
(if (null m) nil
|
|
||||||
(lambda (s)
|
|
||||||
(let ((new-result (origami-run-parser m s)))
|
|
||||||
(if (null new-result) nil
|
|
||||||
(destructuring-bind (new-value . new-state) new-result
|
|
||||||
(origami-run-parser (funcall f new-value) new-state)))))))
|
|
||||||
|
|
||||||
(defmacro origami-do (expr &rest more)
|
|
||||||
(let ((assignment-p (and (listp expr) (equal (cadr expr) '<-))))
|
|
||||||
(let ((var (if assignment-p (car expr) (gensym)))
|
|
||||||
(f (if assignment-p (caddr expr) expr)))
|
|
||||||
(if more
|
|
||||||
`(origami-parser-bind
|
|
||||||
,f (lambda (,var)
|
|
||||||
(origami-do ,@more)))
|
|
||||||
f))))
|
|
||||||
|
|
||||||
(defun origami-parser-return (x)
|
|
||||||
(lambda (s) (cons x s)))
|
|
||||||
|
|
||||||
(defun origami-parser-zero ()
|
|
||||||
(lambda (s) nil))
|
|
||||||
|
|
||||||
(defun origami-parser-get ()
|
|
||||||
(lambda (s) (cons s s)))
|
|
||||||
|
|
||||||
(defun origami-parser-put (x)
|
|
||||||
(lambda (s) (cons nil x)))
|
|
||||||
|
|
||||||
(defun origami-parser-get-string ()
|
|
||||||
(origami-do (content <- (origami-parser-get))
|
|
||||||
(origami-parser-return (origami-content-string content))))
|
|
||||||
|
|
||||||
(defun origami-parser-drop (n)
|
|
||||||
(origami-do (content <- (origami-parser-get))
|
|
||||||
(origami-parser-put (origami-content-from content n))
|
|
||||||
;; TODO: substring will error if n is too large, guard against this
|
|
||||||
(origami-parser-return n)))
|
|
||||||
|
|
||||||
(defun origami-parser-take (n)
|
|
||||||
(lambda (content)
|
|
||||||
(let ((content-str (origami-content-string content)))
|
|
||||||
(unless (s-blank? content-str)
|
|
||||||
(let* ((len (length content-str))
|
|
||||||
(n (if (> n len) len n)))
|
|
||||||
(cons (substring content-str 0 n) (origami-content-from content n)))))))
|
|
||||||
|
|
||||||
(defun origami-parser-item ()
|
|
||||||
(origami-parser-take 1))
|
|
||||||
|
|
||||||
(defun origami-parser-position ()
|
|
||||||
"Returns the point position, which is 1 more than the current
|
|
||||||
consumed count."
|
|
||||||
(origami-do (content <- (origami-parser-get))
|
|
||||||
(origami-parser-return (+ (origami-content-consumed-count content) 1))))
|
|
||||||
|
|
||||||
(defun origami-parser-sat (pred)
|
|
||||||
(origami-do (pos <- (origami-parser-position))
|
|
||||||
(a <- (origami-parser-item))
|
|
||||||
(if (funcall pred a)
|
|
||||||
(origami-parser-return pos)
|
|
||||||
(origami-parser-zero))))
|
|
||||||
|
|
||||||
(defun origami-parser-char (x)
|
|
||||||
(origami-parser-sat (lambda (c) (equal x c))))
|
|
||||||
|
|
||||||
(defun origami-parser-string (str)
|
|
||||||
;; take rather than recursion due to elisp
|
|
||||||
(origami-do (prefix <- (origami-parser-take (length str)))
|
|
||||||
(pos <- (origami-parser-position))
|
|
||||||
(if (equal str prefix)
|
|
||||||
(origami-parser-return pos)
|
|
||||||
(origami-parser-zero))))
|
|
||||||
|
|
||||||
(defun origami-parser-regex (rx)
|
|
||||||
"Match the regex somewhere in the remaining string. Note you
|
|
||||||
have to prefix with '^' if you wish to match the beginning."
|
|
||||||
(origami-do (str <- (origami-parser-get-string))
|
|
||||||
(if (string-match rx str)
|
|
||||||
(origami-parser-drop (match-end 0))
|
|
||||||
(origami-parser-zero))
|
|
||||||
(origami-parser-position)))
|
|
||||||
|
|
||||||
;;; TODO: rename? origami-parser-consume-while-not ?
|
|
||||||
(defun origami-parser-drop-until-regex (rx)
|
|
||||||
"Skip over all characters until hitting RX. If rx is not found
|
|
||||||
this will bind to zero. If rx is matched at the beginning of the
|
|
||||||
string, we bind to zero. This allows for bottoming out of
|
|
||||||
recursion. We fail if we don't consume something."
|
|
||||||
(origami-do (str <- (origami-parser-get-string))
|
|
||||||
(if (string-match rx str)
|
|
||||||
(if (> (match-beginning 0) 0)
|
|
||||||
(origami-parser-drop (match-beginning 0))
|
|
||||||
(origami-parser-zero))
|
|
||||||
(origami-parser-zero))))
|
|
||||||
|
|
||||||
(defun origami-parser-conj (p1 p2)
|
|
||||||
(lambda (content)
|
|
||||||
(or (origami-run-parser p1 content)
|
|
||||||
(origami-run-parser p2 content))))
|
|
||||||
|
|
||||||
(defun origami-parser-0+ (p)
|
|
||||||
(origami-parser-conj
|
|
||||||
(origami-parser-1+ p)
|
|
||||||
(origami-parser-return nil)))
|
|
||||||
|
|
||||||
(defun origami-parser-1+ (p)
|
|
||||||
;; recursive isn't going to cut it in elisp
|
|
||||||
(lambda (content)
|
|
||||||
(let ((res (origami-run-parser p content))
|
|
||||||
(acc nil))
|
|
||||||
(while res
|
|
||||||
(setq acc (cons (car res) acc))
|
|
||||||
(setq content (cdr res))
|
|
||||||
(setq res (origami-run-parser p content)))
|
|
||||||
(when acc
|
|
||||||
(cons (reverse acc) content)))))
|
|
||||||
|
|
||||||
(defun origami-parser-1? (p)
|
|
||||||
(origami-parser-conj p (origami-parser-return nil)))
|
|
||||||
|
|
||||||
(defun origami-parser-not (parser)
|
|
||||||
(lambda (content)
|
|
||||||
(if (origami-run-parser parser content)
|
|
||||||
nil
|
|
||||||
(origami-run-parser (origami-parser-item) content))))
|
|
||||||
|
|
||||||
(defun origami-parser-paired (start children end create)
|
|
||||||
;; TODO: make this a macro so I don't have to pass in the thunk?
|
|
||||||
"CHILDREN should be a zero-arg lambda -- a thunk -- returning a
|
|
||||||
parser to allow for recursive nesting of a parser."
|
|
||||||
(origami-do (begin <- start)
|
|
||||||
(children <- (funcall children))
|
|
||||||
(end <- end)
|
|
||||||
(origami-parser-return (funcall create begin end children))))
|
|
||||||
|
|
||||||
;;; TODO: always need to parse to max nesting, even if some of it gets ignored
|
|
||||||
|
|
||||||
;;; interactive utils
|
;;; interactive utils
|
||||||
|
|
||||||
;;; TODO: create functions for accessing/setting the local vars and
|
;;; TODO: create functions for accessing/setting the local vars and
|
||||||
@ -460,20 +299,20 @@ was last built."
|
|||||||
(with-current-buffer buffer
|
(with-current-buffer buffer
|
||||||
(let ((contents (buffer-string)))
|
(let ((contents (buffer-string)))
|
||||||
(-> parser
|
(-> parser
|
||||||
(origami-run-parser (origami-content 0 contents))
|
(parser-run (parser-content 0 contents))
|
||||||
car
|
car
|
||||||
origami-fold-root-node)))))
|
origami-fold-root-node)))))
|
||||||
|
|
||||||
(defun origami-test-parser (create)
|
(defun origami-test-parser (create)
|
||||||
(let ((pair (origami-parser-paired (origami-parser-char "{")
|
(let ((pair (parser-paired (parser-char "{")
|
||||||
(lambda () (origami-test-parser create))
|
(lambda () (origami-test-parser create))
|
||||||
(origami-parser-char "}")
|
(parser-char "}")
|
||||||
create)))
|
create)))
|
||||||
(origami-parser-0+ (origami-parser-conj
|
(parser-0+ (parser-conj
|
||||||
(origami-do
|
(parser-do
|
||||||
(origami-parser-drop-until-regex "[{}]")
|
(parser-drop-until-regex "[{}]")
|
||||||
(origami-parser-1? pair))
|
(parser-1? pair))
|
||||||
pair))))
|
pair))))
|
||||||
|
|
||||||
(defun origami-get-parser (buffer)
|
(defun origami-get-parser (buffer)
|
||||||
;; TODO: remove hardcoding!
|
;; TODO: remove hardcoding!
|
||||||
@ -493,10 +332,6 @@ otherwise fetch cached tree."
|
|||||||
(origami-build-tree buffer (origami-get-parser buffer))
|
(origami-build-tree buffer (origami-get-parser buffer))
|
||||||
(origami-get-cached-tree buffer))))
|
(origami-get-cached-tree buffer))))
|
||||||
|
|
||||||
;;; commands
|
|
||||||
|
|
||||||
;;; TODO: document all commands
|
|
||||||
|
|
||||||
(defun origami-apply-new-tree (buffer old-tree new-tree)
|
(defun origami-apply-new-tree (buffer old-tree new-tree)
|
||||||
(when new-tree
|
(when new-tree
|
||||||
(origami-fold-diff old-tree (origami-store-cached-tree buffer new-tree)
|
(origami-fold-diff old-tree (origami-store-cached-tree buffer new-tree)
|
||||||
@ -504,6 +339,10 @@ otherwise fetch cached tree."
|
|||||||
'origami-show-overlay-from-fold-tree-fn
|
'origami-show-overlay-from-fold-tree-fn
|
||||||
'origami-change-overlay-from-fold-node-fn)))
|
'origami-change-overlay-from-fold-node-fn)))
|
||||||
|
|
||||||
|
;;; commands
|
||||||
|
|
||||||
|
;;; TODO: document all commands
|
||||||
|
|
||||||
(defun origami-open-node (buffer point)
|
(defun origami-open-node (buffer point)
|
||||||
(interactive (list (current-buffer) (point)))
|
(interactive (list (current-buffer) (point)))
|
||||||
(-when-let (tree (origami-get-fold-tree buffer))
|
(-when-let (tree (origami-get-fold-tree buffer))
|
||||||
|
200
parser.el
Normal file
200
parser.el
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
;;; parser.el --- Monadic parser combinator -*- lexical-binding: t -*-
|
||||||
|
|
||||||
|
;; Author: Greg Sexton <gregsexton@gmail.com>
|
||||||
|
;; Version: 1.0
|
||||||
|
;; Keywords: parser
|
||||||
|
;; URL: https://github.com/gregsexton/
|
||||||
|
;; Package-Requires: ((s "1.9.0") (dash "2.5.0") (emacs "24"))
|
||||||
|
|
||||||
|
;; The MIT License (MIT)
|
||||||
|
|
||||||
|
;; Copyright (c) 2014 Greg Sexton
|
||||||
|
|
||||||
|
;; Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
;; of this software and associated documentation files (the "Software"), to deal
|
||||||
|
;; in the Software without restriction, including without limitation the rights
|
||||||
|
;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
;; copies of the Software, and to permit persons to whom the Software is
|
||||||
|
;; furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
;; The above copyright notice and this permission notice shall be included in
|
||||||
|
;; all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
;; THE SOFTWARE.
|
||||||
|
|
||||||
|
;;; Commentary:
|
||||||
|
|
||||||
|
;;; Code:
|
||||||
|
|
||||||
|
(require 'dash)
|
||||||
|
(require 's)
|
||||||
|
(require 'cl)
|
||||||
|
|
||||||
|
;;; content structure
|
||||||
|
|
||||||
|
(defun parser-content (consumed string)
|
||||||
|
"Create a content structure from STRING and the count of CONSUMED characters."
|
||||||
|
(cons consumed string))
|
||||||
|
|
||||||
|
(defun parser-content-consumed-count (content) (car content))
|
||||||
|
|
||||||
|
(defun parser-content-string (content) (cdr content))
|
||||||
|
|
||||||
|
(defun parser-content-from (content consumed)
|
||||||
|
"Create a new content after consuming CONSUMED chars."
|
||||||
|
(parser-content (+ (parser-content-consumed-count content) consumed)
|
||||||
|
(substring (parser-content-string content) consumed)))
|
||||||
|
|
||||||
|
;;; parsers
|
||||||
|
;;; TODO: document parsers
|
||||||
|
|
||||||
|
(defun parser-run (parser content)
|
||||||
|
(funcall parser content))
|
||||||
|
|
||||||
|
(defun parser-bind (m f)
|
||||||
|
"State monad composed with the maybe monad."
|
||||||
|
(if (null m) nil
|
||||||
|
(lambda (s)
|
||||||
|
(let ((new-result (parser-run m s)))
|
||||||
|
(if (null new-result) nil
|
||||||
|
(destructuring-bind (new-value . new-state) new-result
|
||||||
|
(parser-run (funcall f new-value) new-state)))))))
|
||||||
|
|
||||||
|
(defmacro parser-do (expr &rest more)
|
||||||
|
(let ((assignment-p (and (listp expr) (equal (cadr expr) '<-))))
|
||||||
|
(let ((var (if assignment-p (car expr) (gensym)))
|
||||||
|
(f (if assignment-p (caddr expr) expr)))
|
||||||
|
(if more
|
||||||
|
`(parser-bind
|
||||||
|
,f (lambda (,var)
|
||||||
|
(parser-do ,@more)))
|
||||||
|
f))))
|
||||||
|
|
||||||
|
(defun parser-return (x)
|
||||||
|
(lambda (s) (cons x s)))
|
||||||
|
|
||||||
|
(defun parser-zero ()
|
||||||
|
(lambda (s) nil))
|
||||||
|
|
||||||
|
(defun parser-get ()
|
||||||
|
(lambda (s) (cons s s)))
|
||||||
|
|
||||||
|
(defun parser-put (x)
|
||||||
|
(lambda (s) (cons nil x)))
|
||||||
|
|
||||||
|
(defun parser-get-string ()
|
||||||
|
(parser-do (content <- (parser-get))
|
||||||
|
(parser-return (parser-content-string content))))
|
||||||
|
|
||||||
|
(defun parser-drop (n)
|
||||||
|
(parser-do (content <- (parser-get))
|
||||||
|
(parser-put (parser-content-from content n))
|
||||||
|
;; TODO: substring will error if n is too large, guard against this
|
||||||
|
(parser-return n)))
|
||||||
|
|
||||||
|
(defun parser-take (n)
|
||||||
|
(lambda (content)
|
||||||
|
(let ((content-str (parser-content-string content)))
|
||||||
|
(unless (s-blank? content-str)
|
||||||
|
(let* ((len (length content-str))
|
||||||
|
(n (if (> n len) len n)))
|
||||||
|
(cons (substring content-str 0 n) (parser-content-from content n)))))))
|
||||||
|
|
||||||
|
(defun parser-item ()
|
||||||
|
(parser-take 1))
|
||||||
|
|
||||||
|
(defun parser-position ()
|
||||||
|
"Returns the point position, which is 1 more than the current
|
||||||
|
consumed count."
|
||||||
|
(parser-do (content <- (parser-get))
|
||||||
|
(parser-return (+ (parser-content-consumed-count content) 1))))
|
||||||
|
|
||||||
|
(defun parser-sat (pred)
|
||||||
|
(parser-do (pos <- (parser-position))
|
||||||
|
(a <- (parser-item))
|
||||||
|
(if (funcall pred a)
|
||||||
|
(parser-return pos)
|
||||||
|
(parser-zero))))
|
||||||
|
|
||||||
|
(defun parser-char (x)
|
||||||
|
(parser-sat (lambda (c) (equal x c))))
|
||||||
|
|
||||||
|
(defun parser-string (str)
|
||||||
|
;; take rather than recursion due to elisp
|
||||||
|
(parser-do (prefix <- (parser-take (length str)))
|
||||||
|
(pos <- (parser-position))
|
||||||
|
(if (equal str prefix)
|
||||||
|
(parser-return pos)
|
||||||
|
(parser-zero))))
|
||||||
|
|
||||||
|
(defun parser-regex (rx)
|
||||||
|
"Match the regex somewhere in the remaining string. Note you
|
||||||
|
have to prefix with '^' if you wish to match the beginning."
|
||||||
|
(parser-do (str <- (parser-get-string))
|
||||||
|
(if (string-match rx str)
|
||||||
|
(parser-drop (match-end 0))
|
||||||
|
(parser-zero))
|
||||||
|
(parser-position)))
|
||||||
|
|
||||||
|
;;; TODO: rename? parser-consume-while-not ?
|
||||||
|
(defun parser-drop-until-regex (rx)
|
||||||
|
"Skip over all characters until hitting RX. If RX is not found
|
||||||
|
this will bind to zero. If RX is matched at the beginning of the
|
||||||
|
string, we bind to zero. This allows for bottoming out of
|
||||||
|
recursion. We fail if we don't consume something."
|
||||||
|
(parser-do (str <- (parser-get-string))
|
||||||
|
(if (string-match rx str)
|
||||||
|
(if (> (match-beginning 0) 0)
|
||||||
|
(parser-drop (match-beginning 0))
|
||||||
|
(parser-zero))
|
||||||
|
(parser-zero))))
|
||||||
|
|
||||||
|
(defun parser-conj (p1 p2)
|
||||||
|
(lambda (content)
|
||||||
|
(or (parser-run p1 content)
|
||||||
|
(parser-run p2 content))))
|
||||||
|
|
||||||
|
(defun parser-0+ (p)
|
||||||
|
(parser-conj
|
||||||
|
(parser-1+ p)
|
||||||
|
(parser-return nil)))
|
||||||
|
|
||||||
|
(defun parser-1+ (p)
|
||||||
|
;; recursive isn't going to cut it in elisp
|
||||||
|
(lambda (content)
|
||||||
|
(let ((res (parser-run p content))
|
||||||
|
(acc nil))
|
||||||
|
(while res
|
||||||
|
(setq acc (cons (car res) acc))
|
||||||
|
(setq content (cdr res))
|
||||||
|
(setq res (parser-run p content)))
|
||||||
|
(when acc
|
||||||
|
(cons (reverse acc) content)))))
|
||||||
|
|
||||||
|
(defun parser-1? (p)
|
||||||
|
(parser-conj p (parser-return nil)))
|
||||||
|
|
||||||
|
(defun parser-not (parser)
|
||||||
|
(lambda (content)
|
||||||
|
(if (parser-run parser content)
|
||||||
|
nil
|
||||||
|
(parser-run (parser-item) content))))
|
||||||
|
|
||||||
|
(defun parser-paired (start children end create)
|
||||||
|
;; TODO: make this a macro so I don't have to pass in the thunk?
|
||||||
|
"CHILDREN should be a zero-arg lambda -- a thunk -- returning a
|
||||||
|
parser to allow for recursive nesting of a parser."
|
||||||
|
(parser-do (begin <- start)
|
||||||
|
(children <- (funcall children))
|
||||||
|
(end <- end)
|
||||||
|
(parser-return (funcall create begin end children))))
|
||||||
|
|
||||||
|
(provide 'parser)
|
||||||
|
|
||||||
|
;;; parser.el ends here
|
Loading…
Reference in New Issue
Block a user