Documentation
¶
Overview ¶
Package parsekit implements a simple, reusable parser for simple grammars.
Example ¶
package main
import (
"fmt"
"net/netip"
"time"
"unicode/utf8"
"github.com/TroutSoftware/parsekit/v2"
)
func main() {
p := parsekit.Init[Lease](
parsekit.ReadFile("testdata/example_dhcp1"),
parsekit.WithLexer(scantk),
parsekit.SynchronizeAt("lease"),
)
ParseLease(p)
lease, err := p.Finish()
if err != nil {
fmt.Printf("cannot parse lease file: %s", err)
return
}
fmt.Println(lease)
}
type Lease struct {
Interface string
FixedAddress netip.Addr
Expire time.Time
}
func ParseLease(p *parsekit.Parser[Lease]) {
defer p.Synchronize()
p.Expect(IdentToken, "lease")
p.Expect('{', "opening bracket")
for p.More() {
if p.Match('}') {
return
}
p.Expect(IdentToken, "option")
switch p.Lit() {
case "interface":
p.Expect(StringToken, "interface")
p.Value.Interface = p.Val().(string)
p.Expect(';', ";")
case "fixed-address":
p.Expect(IPToken, "IP address")
p.Value.FixedAddress = p.Val().(netip.Addr)
p.Expect(';', ";")
case "expire":
p.Expect(NumberToken, "number")
p.Expect(DateTimeToken, "date and time of expiration")
p.Value.Expire = time.Time(p.Val().(LTime))
p.Expect(';', ";")
default:
for !p.Match(';') {
p.Skip()
}
}
}
}
type LTime time.Time
func (t *LTime) UnmarshalText(dt []byte) error {
u, err := time.Parse("2006/01/02 15:04:05", string(dt))
if err != nil {
return err
}
*t = (LTime)(u)
return nil
}
const (
NumberToken rune = -1 - iota
IPToken
DateTimeToken
IdentToken
StringToken
InvalidType
)
func scantk(sc *parsekit.Scanner) parsekit.Token {
switch tk := sc.Advance(); {
case tk == ' ':
return parsekit.Ignore // empty space
case tk == '{', tk == '}', tk == ';':
return parsekit.Const(tk)
case tk == '"':
for sc.Peek() != '"' && sc.Peek() != utf8.RuneError {
sc.Advance()
}
if sc.Peek() == utf8.RuneError {
return parsekit.EOF
}
sc.Advance() // terminating '"'
return parsekit.Auto[string](StringToken, sc)
case '0' <= tk && tk <= '9':
guess := NumberToken
for {
if sc.Peek() >= '0' && sc.Peek() <= '9' {
sc.Advance()
} else if sc.Peek() == '/' {
guess = DateTimeToken
sc.Advance()
} else if sc.Peek() == '.' {
guess = IPToken
sc.Advance()
} else if (sc.Peek() == ' ' || sc.Peek() == ':') && guess == DateTimeToken {
sc.Advance()
} else {
break
}
}
switch guess {
case DateTimeToken:
return parsekit.Auto[LTime](guess, sc)
case IPToken:
return parsekit.Auto[netip.Addr](guess, sc)
default:
return parsekit.Auto[int](guess, sc)
}
case 'a' <= tk && tk <= 'z' || tk == '-':
for 'a' <= sc.Peek() && sc.Peek() <= 'z' || sc.Peek() == '-' {
sc.Advance()
}
return parsekit.Const(IdentToken)
}
return parsekit.EOF
}
Output: {eth0 10.67.21.85 2023-11-03 11:27:26 +0000 UTC}
Index ¶
- Constants
- type Identifier
- type Lexer
- type Parser
- func (p *Parser[T]) Errf(format string, args ...any)
- func (p *Parser[T]) Expect(tk rune, msg string)
- func (p *Parser[T]) Finish() (T, error)
- func (p *Parser[T]) Lit() string
- func (p *Parser[T]) Match(tk ...rune) bool
- func (p *Parser[T]) More() bool
- func (p *Parser[T]) Skip()
- func (p *Parser[T]) Synchronize()
- func (p *Parser[T]) Val() any
- type ParserOptions
- type Position
- type Scanner
- type Token
Examples ¶
Constants ¶
const ErrLit = "<error>"
ErrLit is the literal value set after a failed call to Parser.Expect
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Identifier ¶
type Identifier string
type Lexer ¶
Lexer is a stateful function to read tokens from the scanner. Each time the function returns, a new token is created, and the scanner advance.
type Parser ¶
type Parser[T any] struct { Value T // contains filtered or unexported fields }
Parser implements a recursive descent parser. It provides facilities for error reporting, peeking, …
func Init ¶
func Init[T any](opts ...ParserOptions) *Parser[T]
Init creates a new parser. At least two options must be provided: (1) a reader, and (2) a lexer function. Further options (e.g. SynchronizeAt)
func (*Parser[T]) Errf ¶
Errf triggers a panic mode with the given formatted error. The position is correctly attached to the error.
func (*Parser[T]) Expect ¶
Expects advances the parser to the next input, making sure it matches the token tk.
func (*Parser[T]) Finish ¶
Finish returns the value, and error of the parsing. This make it convenient to use at the bottom of a function:
func ReadConfigFiles() (MyStruct, error) {
p := Init(ReadFiles(xxx), Lexer(yyy))
parseConfig(p)
return p.Finish()
}
func (*Parser[T]) Match ¶
Match returns true if tk is found at the current parsing point. It does not consume any input on failure, so can be used in a test.
func (*Parser[T]) More ¶
More returns true if input is left in the stream. More does not advance the parser state, so use Parser.Skip or Parser.Expect to consume a value.
func (*Parser[T]) Synchronize ¶
func (p *Parser[T]) Synchronize()
Synchronize handles error recovery in the parsing process: when an error occurs, the parser panics all the way to the Parser.Synchronize function. All tokens are thrown until the first of lits is found
Run this in a top-level `defer` statement in at the level of the synchronisation elements.
type ParserOptions ¶
type ParserOptions func(*emb)
ParserOptions specialize the behavior of the parser.
func ReadFile ¶
func ReadFile(name string) ParserOptions
ReadFile reads the content of file name, and passes it to the scanner.
func SynchronizeAt ¶
func SynchronizeAt(lits ...string) ParserOptions
SynchronizeAt sets the synchronisation literals for error recovery. See Parser.Synchronize for full documentation.
func Verbose ¶
func Verbose() ParserOptions
func WithLexer ¶
func WithLexer(lx Lexer) ParserOptions
WithLexer options sets the lexer used by the parser
type Position ¶
type Position struct {
Filename string // filename, if any
Offset int // byte offset, starting at 0
Line int // line number, starting at 1
Column int // column number, starting at 1 (character count per line)
}
Position is a value that represents a source position. A position is valid if Line > 0.
type Scanner ¶
type Scanner struct {
// contains filtered or unexported fields
}
Scanner reads lexemes from a source
func (*Scanner) Advance ¶
Advances returns the next character in the stream, and increment the read counter.
type Token ¶
var EOF Token
EOF is a marker token. The Lexer should return it when Scanner.Advance returns an invalid rune.
var Ignore Token
Ignore is a marker token. The Lexer should return it when the current token is to be ignored by the scanner, and not passed to the parser. This is useful to skip over comments, or empty lines.
func Auto ¶
Auto returns a new token with value of type T. The value is read from the current lexeme, and converted with:
- strconv.Unquote for strings if the first character is a quote
- the lexeme directly for strings
- strconv.ParseInt
- unix and iso times for times
- calling Unmarshaler otherwise
If the value cannot be parsed, an error token is returned to the parser.