src/hmisc/algo/hparse_pegs

    Dark Mode
Search:
Group by:
  Source   Edit

Types

Captures = object
  matches*: PegCaptures
  ml*: int
  origStart: int
contains the captured substrings.   Source   Edit
EInvalidPeg = object of ValueError
raised if an invalid PEG has been detected   Source   Edit
NonTerminal = ref NonTerminalObj
  Source   Edit
NonTerminalFlag = enum
  ntDeclared, ntUsed
  Source   Edit
Peg {.shallow.} = object
  case kind: PegKind
  of pkEmpty .. pkWhitespace:
      nil

  of pkTerminalKinds, pkInterpolateRefKinds:
      term: string

  of pkChar, pkGreedyRepChar:
      ch: char

  of pkCharChoice, pkGreedyRepSet:
      charChoice: ref set[char]

  of pkNonTerminal:
      nt: NonTerminal

  of pkBackrefKinds:
      index: range[0 .. high(int)]

  else:
      sons: seq[Peg]

  
type that represents a PEG   Source   Edit
PegCallReplaceMap = seq[tuple[peg: Peg, expr: PegReplaceHandler]]
  Source   Edit
PegCaptureRange = range[1 .. MaxSubpatterns]
  Source   Edit
PegExprReplaceMap = seq[tuple[peg: Peg, expr: string]]
  Source   Edit
PegKind = enum
  pkEmpty, pkAny,           ## any character (.)
  pkAnyRune,                ## any Unicode character (_)
  pkNewLine,                ## CR-LF, LF, CR
  pkLetter,                 ## Unicode letter
  pkLower,                  ## Unicode lower case letter
  pkUpper,                  ## Unicode upper case letter
  pkTitle,                  ## Unicode title character
  pkWhitespace,             ## Unicode whitespace character
  pkTerminal, pkTerminalIgnoreCase, pkTerminalFullIgnoreStyle,
  pkTerminalNimIgnoreStyle, pkChar, ## single character to match
  pkCharChoice, pkNonTerminal, pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c)
  pkOrderedChoice,          ## a / b / ... --> Internal DSL: a / b or /[a, b, c]
  pkGreedyRep,              ## a*     --> Internal DSL: *a
                             ## a+     --> (a a*)
  pkGreedyRepChar,          ## x* where x is a single character (superop)
  pkGreedyRepSet,           ## [set]* (superop)
  pkGreedyAny,              ## .* or _* (superop)
  pkOption,                 ## a?     --> Internal DSL: ?a
  pkAndPredicate,           ## &a     --> Internal DSL: &a
  pkNotPredicate,           ## !a     --> Internal DSL: !a
  pkCapture,                ## {a}    --> Internal DSL: capture(a)
  pkBackRef,                ## $i     --> Internal DSL: backref(i)
  pkBackRefIgnoreCase, pkBackRefFullIgnoreStyle, pkBackRefNimIgnoreStyle, pkSearch, ## @a     --> Internal DSL: !*a
  pkCapturedSearch,         ## {@} a  --> Internal DSL: !*\a
  pkRule,                   ## a <- b
  pkList,                   ## a, b
  pkStartAnchor,            ## ^      --> Internal DSL: startAnchor()
  pkInterpolateRef, pkInterpolateRefIgnoreCase, pkInterpolateRefFullIgnoreStyle,
  pkInterpolateRefNimIgnoreStyle
  Source   Edit
PegMatches = array[MaxSubpatterns, string]
  Source   Edit
PegReplaceHandler = proc (match: int; cnt: int; caps: PegMatches): string
  Source   Edit

Consts

pkBackrefKinds = {pkBackRef..pkBackRefNimIgnoreStyle}
  Source   Edit
pkInterpolateRefKinds = {pkInterpolateRef..pkInterpolateRefNimIgnoreStyle}
  Source   Edit
pkRuntimeValueKinds = {pkBackRef..pkBackRefNimIgnoreStyle,
                       pkInterpolateRef..pkInterpolateRefNimIgnoreStyle}
  Source   Edit
pkTerminalKinds = {pkTerminal..pkTerminalNimIgnoreStyle}
  Source   Edit

Procs

func `!*\`(a: Peg): Peg {....raises: [], tags: [].}
constructs a "captured search" for the PEG a   Source   Edit
func `!*`(a: Peg): Peg {....raises: [], tags: [].}
constructs a "search" for the PEG a   Source   Edit
func `!`(a: Peg): Peg {....raises: [], tags: [].}
constructs a "not predicate" with the PEG a   Source   Edit
func `$`(r: Peg): string {....raises: [], tags: [].}
converts a PEG to its string representation   Source   Edit
func `&`(a: Peg): Peg {....raises: [], tags: [].}
constructs an "and predicate" with the PEG a   Source   Edit
func `*`(a: Peg): Peg {....raises: [], tags: [].}
constructs a "greedy repetition" for the PEG a   Source   Edit
func `+`(a: Peg): Peg {....raises: [], tags: [].}
constructs a "greedy positive repetition" with the PEG a   Source   Edit
func `/`(a: varargs[Peg]): Peg {....raises: [], tags: [].}
constructs an ordered choice with the PEGs in a   Source   Edit
func `?`(a: Peg): Peg {....raises: [], tags: [].}
constructs an optional for the PEG a   Source   Edit
func add(d: var Peg; s: Peg) {.inline, ...raises: [], tags: [].}
  Source   Edit
func anyChar(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG any character (.)   Source   Edit
func anyRune(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG any rune (_)   Source   Edit
func backref(index: Natural): Peg {....raises: [], tags: [].}
constructs a back reference of the given index. index starts counting from 1.   Source   Edit
func backrefFullIgnoreStyle(index: Natural): Peg {....raises: [], tags: [].}
constructs a back reference of the given index. index starts counting from 1. Ignores style for matching.   Source   Edit
func backrefIgnoreCase(index: Natural): Peg {....raises: [], tags: [].}
constructs a back reference of the given index. index starts counting from 1. Ignores case for matching.   Source   Edit
func backrefNimIgnoreStyle(index: Natural): Peg {....raises: [], tags: [].}
constructs a back reference of the given index. index starts counting from 1. Ignores style for matching.   Source   Edit
proc bounds(c: Captures; i: int): tuple[first, last: int] {....raises: [], tags: [].}
returns the bounds [first..last] of the i'th capture.   Source   Edit
func capture(a: Peg): Peg {....raises: [], tags: [].}
constructs a capture with the PEG a   Source   Edit
proc ch(p: Peg): char {....raises: [], tags: [].}
Returns the char representation of a given Peg variant object where present.   Source   Edit
proc charChoice(p: Peg): ref set[char] {....raises: [], tags: [].}
Returns the charChoice field of a given Peg variant object where present.   Source   Edit
func charSet(s: set[char]): Peg {....raises: [], tags: [].}
constructs a PEG from a character set s   Source   Edit
proc col(nt: NonTerminal): int {....raises: [], tags: [].}
Gets the column number of the definition of the parent Peg object variant of a given NonTerminal.   Source   Edit
proc contains(s: string; pattern: Peg; env: proc (s: string): string = nil;
              start = 0): bool {....raises: [Exception], tags: [RootEffect].}
same as find(s, pattern, start) >= 0   Source   Edit
proc contains(s: string; pattern: Peg; matches: var PegMatches;
              env: proc (s: string): string; start = 0): bool {.
    ...raises: [Exception], tags: [RootEffect].}
same as find(s, pattern, matches, start) >= 0   Source   Edit
func endAnchor(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG $ which matches the end of the input.   Source   Edit
proc endsWith(s: string; suffix: Peg; env: proc (s: string): string = nil;
              start = 0): bool {....raises: [Exception], tags: [RootEffect].}
returns true if s ends with the pattern suffix   Source   Edit
proc escapePeg(s: string): string {....raises: [], tags: [].}
escapes s so that it is matched verbatim when used as a peg.   Source   Edit
proc find(s: string; pattern: Peg; env: proc (s: string): string = nil;
          start = 0): int {....raises: [Exception], tags: [RootEffect].}
returns the starting position of pattern in s. If it does not match, -1 is returned.   Source   Edit
proc find(s: string; pattern: Peg; matches: var PegMatches;
          env: proc (s: string): string = nil; start = 0): int {.
    ...raises: [Exception], tags: [RootEffect].}
returns the starting position of pattern in s and the captured substrings in the array matches. If it does not match, nothing is written into matches and -1 is returned.   Source   Edit
proc findAll(s: string; pattern: Peg; env: proc (s: string): string = nil;
             start = 0): seq[string] {....raises: [Exception], tags: [RootEffect].}
returns all matching substrings of s that match pattern. If it does not match, @[] is returned.   Source   Edit
proc findBounds(s: string; pattern: Peg; matches: var PegMatches;
                env: proc (s: string): string = nil; start = 0): tuple[
    first, last: int] {....raises: [Exception], tags: [RootEffect].}
returns the starting position and end position of pattern in s and the captured substrings in the array matches. If it does not match, nothing is written into matches and (-1,0) is returned.   Source   Edit
proc flags(nt: NonTerminal): set[NonTerminalFlag] {....raises: [], tags: [].}
Gets the NonTerminalFlag-typed flags field of the parent Peg variant object of a given NonTerminal.   Source   Edit
proc index(p: Peg): range[0 .. high(int)] {....raises: [], tags: [].}
Returns the back-reference index of a captured sub-pattern in the Captures object for a given Peg variant object where present.   Source   Edit
func interpolateref(expr: string): Peg {....raises: [], tags: [].}
constructs a interpolate reference of the given expr. expr starts counting from 1.   Source   Edit
func interpolaterefFullIgnoreStyle(expr: string): Peg {....raises: [], tags: [].}
constructs a interpolate reference of the given expr. expr starts counting from 1. Ignores style for matching.   Source   Edit
func interpolaterefIgnoreCase(expr: string): Peg {....raises: [], tags: [].}
constructs a interpolate reference of the given expr. expr starts counting from 1. Ignores case for matching.   Source   Edit
func interpolaterefNimIgnoreStyle(expr: string): Peg {....raises: [], tags: [].}
constructs a interpolate reference of the given expr. expr starts counting from 1. Ignores style for matching.   Source   Edit
proc kind(p: Peg): PegKind {....raises: [], tags: [].}
Returns the PegKind of a given Peg object.   Source   Edit
func len(a: Peg): int {.inline, ...raises: [], tags: [].}
  Source   Edit
proc line(nt: NonTerminal): int {....raises: [], tags: [].}
Gets the line number of the definition of the parent Peg object variant of a given NonTerminal.   Source   Edit
proc match(s: string; pattern: Peg; env: proc (s: string): string = nil;
           start = 0): bool {....raises: [Exception], tags: [RootEffect].}
returns true if s matches the pattern beginning from start.   Source   Edit
proc match(s: string; pattern: Peg; matches: var PegMatches;
           env: proc (s: string): string = nil; start = 0): bool {.
    ...raises: [Exception], tags: [RootEffect].}
returns true if s[start..] matches the pattern and the captured substrings in the array matches. If it does not match, nothing is written into matches and false is returned.   Source   Edit
proc matchLen(s: string; pattern: Peg; env: proc (s: string): string = nil;
              start = 0): int {....raises: [Exception], tags: [RootEffect].}
the same as match, but it returns the length of the match, if there is no match, -1 is returned. Note that a match length of zero can happen. It's possible that a suffix of s remains that does not belong to the match.   Source   Edit
proc matchLen(s: string; pattern: Peg; matches: var PegMatches;
              env: proc (s: string): string = nil; start = 0): int {.
    ...raises: [Exception], tags: [RootEffect].}
the same as match, but it returns the length of the match, if there is no match, -1 is returned. Note that a match length of zero can happen. It's possible that a suffix of s remains that does not belong to the match.   Source   Edit
proc name(nt: NonTerminal): string {....raises: [], tags: [].}
Gets the name of the symbol represented by the parent Peg object variant of a given NonTerminal.   Source   Edit
func newLine(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG newline (\n)   Source   Edit
func newNonTerminal(name: string; line, column: int): NonTerminal {....raises: [],
    tags: [].}
constructs a nonterminal symbol   Source   Edit
func nonterminal(n: NonTerminal): Peg {....raises: [], tags: [].}
constructs a PEG that consists of the nonterminal symbol   Source   Edit
proc nt(p: Peg): NonTerminal {....raises: [], tags: [].}
Returns the NonTerminal object of a given Peg variant object where present.   Source   Edit
proc parallelReplace(s: string;
                     subs: varargs[tuple[pattern: Peg, repl: string]];
                     env: proc (s: string): string): string {.
    ...raises: [Exception, ValueError], tags: [RootEffect].}
Returns a modified copy of s with the substitutions in subs applied in parallel.   Source   Edit
proc parsePeg(pattern: string; filename = "pattern"; line = 1; col = 0): Peg {.
    ...raises: [ValueError, EInvalidPeg, Exception], tags: [RootEffect].}
constructs a Peg object from pattern. filename, line, col are used for error messages, but they only provide start offsets. parsePeg keeps track of line and column numbers within pattern.   Source   Edit
proc peg(pattern: string): Peg {....raises: [ValueError, EInvalidPeg, Exception],
                                 tags: [RootEffect].}
constructs a Peg object from the pattern. The short name has been chosen to encourage its use as a raw string modifier:
peg"{\ident} \s* '=' \s* {.*}"
  Source   Edit
proc rawMatch(s: string; p: Peg; start: int; c: var Captures;
              env: proc (s: string): string = nil): int {....raises: [Exception],
    tags: [RootEffect].}
low-level matching proc that implements the PEG interpreter. Use this for maximum efficiency (every other PEG operation ends up calling this proc). Returns -1 if it does not match, else the length of the match   Source   Edit
proc replace(s: string; sub: Peg;
             cb: proc (match: int; cnt: int; caps: PegMatches): string;
             env: proc (s: string): string = nil): string {....raises: [Exception],
    tags: [RootEffect].}
Replaces sub in s by the resulting strings from the callback. The callback proc receives the index of the found match (starting from 0) the count of captures and an open array with the captures of each match. Examples:

Example:

import std/strutils
proc handleMatches*(m: int, n: int, c: PegMatches): string =
  result = ""
  if m > 0: # If not the first match in the input string
    result.add ", "

  result.add case n: # Check number of captured variables
    of 2:
      # Two captures (key-value pair)
      c[0].toLower & ": '" & c[1] & "'"
    of 1:
      # Single capture (just variable)
      c[0].toLower & ": ''"

    else:
      ""

let s = "Var1=key1;var2=Key2;   VAR3"
doAssert s.replace(peg"{\ident}('='{\ident})* ';'* \s*", handleMatches) ==
  "var1: 'key1', var2: 'Key2', var3: ''"
  Source   Edit
proc replace(s: string; sub: Peg; by: string = "";
             env: proc (s: string): string = nil): string {....raises: [Exception],
    tags: [RootEffect].}
Replaces sub in s by the string by. Captures cannot be accessed in by.   Source   Edit
proc replacef(s: string; sub: Peg; by: string;
              env: proc (s: string): string = nil): string {.
    ...raises: [Exception, ValueError], tags: [RootEffect].}
Replaces sub in s by the string by. Captures can be accessed in by with the notation $i and $# (see strutils.%). Examples:
"var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2")

Results in:

"var1<-keykey; val2<-key2key2"
  Source   Edit
proc replaceInterpol(s: string; sub: Peg; expr: string; exprCalls: Table[string,
    proc (arg: string): string] = defaultReplacementCalls;
                     env: proc (s: string): string = nil): string {.
    ...raises: [ValueError, Exception], tags: [RootEffect].}
  Source   Edit
proc replaceInterpolAny(s: string; replaceMap: seq[
    tuple[peg: Peg, expr: PegReplaceHandler]]; exprCalls: Table[string,
    proc (arg: string): string] = defaultReplacementCalls;
                        env: proc (s: string): string = nil): string {.
    ...raises: [Exception], tags: [RootEffect].}
  Source   Edit
proc replaceInterpolAny(s: string;
                        replaceMap: seq[tuple[peg: Peg, expr: string]];
    exprCalls: Table[string, proc (arg: string): string] = defaultReplacementCalls;
                        env: proc (s: string): string = nil): string {.
    ...raises: [Exception, ValueError], tags: [RootEffect].}
  Source   Edit
proc rule(nt: NonTerminal): Peg {....raises: [], tags: [].}
Gets the Peg object representing the rule definition of the parent Peg object variant of a given NonTerminal.   Source   Edit
func sequence(a: varargs[Peg]): Peg {....raises: [], tags: [].}
constructs a sequence with all the PEGs from a   Source   Edit
proc split(s: string; sep: Peg; env: proc (s: string): string): seq[string] {.
    ...raises: [Exception], tags: [RootEffect].}
Splits the string s into substrings.   Source   Edit
func startAnchor(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG ^ which matches the start of the input.   Source   Edit
proc startsWith(s: string; prefix: Peg; env: proc (s: string): string = nil;
                start = 0): bool {....raises: [Exception], tags: [RootEffect].}
returns true if s starts with the pattern prefix   Source   Edit
proc term(p: Peg): string {....raises: [], tags: [].}
Returns the string representation of a given Peg variant object where present.   Source   Edit
func term(t: char): Peg {....raises: [], tags: [].}
constructs a PEG from a terminal char   Source   Edit
func term(t: string): Peg {....raises: [], tags: [].}
constructs a PEG from a terminal string   Source   Edit
func termFullIgnoreStyle(t: string): Peg {....raises: [], tags: [].}
constructs a PEG from a terminal string; ignore style for matching   Source   Edit
func termIgnoreCase(t: string): Peg {....raises: [], tags: [].}
constructs a PEG from a terminal string; ignore case for matching   Source   Edit
func termNimIgnoreStyle(t: string): Peg {....raises: [], tags: [].}
constructs a PEG from a terminal string; ignore style for matching   Source   Edit
proc toReplaceHandler(expr: string; exprCalls: Table[string,
    proc (arg: string): string] = defaultReplacementCalls): PegReplaceHandler {.
    ...raises: [ValueError], tags: [].}
  Source   Edit
proc toReplaceHandlerMap(replaceMap: seq[tuple[peg: Peg, expr: string]];
    exprCalls: Table[string, proc (arg: string): string] = defaultReplacementCalls): seq[
    (Peg, PegReplaceHandler)] {....raises: [ValueError], tags: [].}
  Source   Edit
proc transformFile(infile, outfile: string;
                   subs: varargs[tuple[pattern: Peg, repl: string]];
                   env: proc (s: string): string) {.
    ...raises: [IOError, Exception, ValueError],
    tags: [ReadIOEffect, WriteIOEffect, RootEffect].}

reads in the file infile, performs a parallel replacement (calls parallelReplace) and writes back to outfile. Raises IOError if an error occurs. This is supposed to be used for quick scripting.

Note: this proc does not exist while using the JS backend.

  Source   Edit
func unicodeLetter(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG \letter which matches any Unicode letter.   Source   Edit
func unicodeLower(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG \lower which matches any Unicode lowercase letter.   Source   Edit
func unicodeTitle(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG \title which matches any Unicode title letter.   Source   Edit
func unicodeUpper(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG \upper which matches any Unicode uppercase letter.   Source   Edit
func unicodeWhitespace(): Peg {.inline, ...raises: [], tags: [].}
constructs the PEG \white which matches any Unicode whitespace character.   Source   Edit

Iterators

iterator findAll(s: string; pattern: Peg; env: proc (s: string): string = nil;
                 start = 0): string {....raises: [Exception], tags: [RootEffect].}
yields all matching substrings of s that match pattern.   Source   Edit
iterator items(p: Peg): Peg {.inline, ...raises: [], tags: [].}
Yields the child nodes of a Peg variant object where present.   Source   Edit
iterator pairs(p: Peg): (int, Peg) {.inline, ...raises: [], tags: [].}
Yields the indices and child nodes of a Peg variant object where present.   Source   Edit
iterator split(s: string; sep: Peg; env: proc (s: string): string = nil): string {.
    ...raises: [Exception], tags: [RootEffect].}

Splits the string s into substrings.

Substrings are separated by the PEG sep. Examples:

for word in split("00232this02939is39an22example111", peg"\d+"):
  writeLine(stdout, word)

Results in:

"this"
"is"
"an"
"example"
  Source   Edit

Templates

template `=~`(s: string; pattern: Peg): bool
This calls match with an implicit declared matches array that can be used in the scope of the =~ call:
if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}":
  # matches a key=value pair:
  echo("Key: ", matches[0])
  echo("Value: ", matches[1])
elif line =~ peg"\s*{'#'.*}":
  # matches a comment
  # note that the implicit ``matches`` array is different from the
  # ``matches`` array of the first branch
  echo("comment: ", matches[0])
else:
  echo("syntax error")
  Source   Edit
template digits(): Peg
expands to charset({'0'..'9'})   Source   Edit
template eventParser(pegAst, handlers: untyped): (
    proc (s: string; env: proc (s: string): string): int)
Generates an interpreting event parser proc according to the specified PEG AST and handler code blocks. The proc can be called with a string to be parsed and will execute the handler code blocks whenever their associated grammar element is matched. It returns -1 if the string does not match, else the length of the total match. The following example code evaluates an arithmetic expression defined by a simple PEG:
import strutils, pegs

let
  pegAst = """
Expr    <- Sum
Sum     <- Product (('+' / '-')Product)*
Product <- Value (('*' / '/')Value)*
Value   <- [0-9]+ / '(' Expr ')'
  """.peg
  txt = "(5+3)/2-7*22"

var
  pStack: seq[string] = @[]
  valStack: seq[float] = @[]
  opStack = ""
let
  parseArithExpr = pegAst.eventParser:
    pkNonTerminal:
      enter:
        pStack.add p.nt.name
      leave:
        pStack.setLen pStack.high
        if length > 0:
          let matchStr = s.substr(start, start+length-1)
          case p.nt.name
          of "Value":
            try:
              valStack.add matchStr.parseFloat
              echo valStack
            except ValueError:
              discard
          of "Sum", "Product":
            try:
              let val = matchStr.parseFloat
            except ValueError:
              if valStack.len > 1 and opStack.len > 0:
                valStack[^2] = case opStack[^1]
                of '+': valStack[^2] + valStack[^1]
                of '-': valStack[^2] - valStack[^1]
                of '*': valStack[^2] * valStack[^1]
                else: valStack[^2] / valStack[^1]
                valStack.setLen valStack.high
                echo valStack
                opStack.setLen opStack.high
                echo opStack
    pkChar:
      leave:
        if length == 1 and "Value" != pStack[^1]:
          let matchChar = s[start]
          opStack.add matchChar
          echo opStack

let pLen = parseArithExpr(txt)

The handlers parameter consists of code blocks for PegKinds, which define the grammar elements of interest. Each block can contain handler code to be executed when the parser enters and leaves text matching the grammar element. An enter handler can access the specific PEG AST node being matched as p, the entire parsed string as s and the position of the matched text segment in s as start. A leave handler can access p, s, start and also the length of the matched text segment as length. For an unsuccessful match, the enter and leave handlers will be executed, with length set to -1.

Symbols declared in an enter handler can be made visible in the corresponding leave handler by annotating them with an inject pragma.

  Source   Edit
template ident(): Peg
same as [a-zA-Z_][a-zA-z_0-9]*; standard identifier   Source   Edit
template identChars(): Peg
expands to charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})   Source   Edit
template identStartChars(): Peg
expands to charset({'A'..'Z', 'a'..'z', '_'})   Source   Edit
template injectMatch(s: string; pattern: Peg;
                     env: proc (s: string): string = nil): bool
  Source   Edit
template letters(): Peg
expands to charset({'A'..'Z', 'a'..'z'})   Source   Edit
template natural(): Peg
same as \d+   Source   Edit
template whitespace(): Peg
expands to charset({' ', '\9'..'\13'})   Source   Edit