separated parser from tokenizer, wrote some tokenizer tests

This commit is contained in:
Christopher Ramey 2021-01-17 16:41:28 -09:00
parent 3a4317f3a9
commit b675d8714e
7 changed files with 222 additions and 103 deletions

View File

@ -3,9 +3,7 @@ package config
import ( import (
"alrm/alarm" "alrm/alarm"
"alrm/check" "alrm/check"
"bufio"
"fmt" "fmt"
"os"
"strings" "strings"
) )
@ -21,7 +19,6 @@ const (
type Parser struct { type Parser struct {
DebugLevel int DebugLevel int
Line int
states []int states []int
lastHost *Host lastHost *Host
lastGroup *Group lastGroup *Group
@ -31,18 +28,15 @@ type Parser struct {
} }
func (p *Parser) Parse(fn string) (*Config, error) { func (p *Parser) Parse(fn string) (*Config, error) {
file, err := os.Open(fn)
if err != nil {
return nil, fmt.Errorf("cannot open config \"%s\": %s", fn, err.Error())
}
defer file.Close()
config := NewConfig() config := NewConfig()
tok, err := NewTokenizer(fn)
if err != nil {
return nil, err
}
defer tok.Close()
scan := bufio.NewScanner(file) for tok.Scan() {
scan.Split(p.Split) tk := tok.Text()
for scan.Scan() {
tk := scan.Text()
stateswitch: stateswitch:
switch p.state() { switch p.state() {
case TK_NONE: case TK_NONE:
@ -55,29 +49,29 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.setState(TK_ALARM) p.setState(TK_ALARM)
default: default:
return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"", return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"",
fn, p.Line, tk) fn, tok.Line(), tk)
} }
case TK_SET: case TK_SET:
key := strings.ToLower(tk) key := strings.ToLower(tk)
if !scan.Scan() { if !tok.Scan() {
return nil, fmt.Errorf("empty value name for set in %s, line %d", return nil, fmt.Errorf("empty value name for set in %s, line %d",
fn, p.Line) fn, tok.Line())
} }
value := scan.Text() value := tok.Text()
switch key { switch key {
case "interval": case "interval":
err := config.SetInterval(value) err := config.SetInterval(value)
if err != nil { if err != nil {
return nil, fmt.Errorf( return nil, fmt.Errorf(
"invalid number for interval in %s, line %d: \"%s\"", "invalid number for interval in %s, line %d: \"%s\"",
fn, p.Line, value, fn, tok.Line(), value,
) )
} }
default: default:
return nil, fmt.Errorf("unknown key for set in %s, line %d: \"%s\"", return nil, fmt.Errorf("unknown key for set in %s, line %d: \"%s\"",
fn, p.Line, tk, fn, tok.Line(), tk,
) )
} }
p.prevState() p.prevState()
@ -100,7 +94,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastGroup, err = config.NewGroup(tk) p.lastGroup, err = config.NewGroup(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line, err.Error(), fn, tok.Line(),
) )
} }
continue continue
@ -121,7 +115,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastGroup, err = config.NewGroup(tk) p.lastGroup, err = config.NewGroup(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line, err.Error(), fn, tok.Line(),
) )
} }
} }
@ -130,7 +124,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastHost, err = p.lastGroup.NewHost(tk) p.lastHost, err = p.lastGroup.NewHost(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line, err.Error(), fn, tok.Line(),
) )
} }
continue continue
@ -138,11 +132,11 @@ func (p *Parser) Parse(fn string) (*Config, error) {
switch strings.ToLower(tk) { switch strings.ToLower(tk) {
case "address": case "address":
if !scan.Scan() { if !tok.Scan() {
return nil, fmt.Errorf("empty address for host in %s, line %d", return nil, fmt.Errorf("empty address for host in %s, line %d",
fn, p.Line) fn, tok.Line())
} }
p.lastHost.Address = scan.Text() p.lastHost.Address = tok.Text()
case "check": case "check":
p.setState(TK_CHECK) p.setState(TK_CHECK)
@ -157,14 +151,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastCheck, err = p.lastHost.NewCheck(tk) p.lastCheck, err = p.lastHost.NewCheck(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line) err.Error(), fn, tok.Line())
} }
continue continue
} }
cont, err := p.lastCheck.Parse(tk) cont, err := p.lastCheck.Parse(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line) err.Error(), fn, tok.Line())
} }
if !cont { if !cont {
p.lastCheck = nil p.lastCheck = nil
@ -182,7 +176,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastAlarm, err = config.NewAlarm(p.lastAlarmName, tk) p.lastAlarm, err = config.NewAlarm(p.lastAlarmName, tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line) err.Error(), fn, tok.Line())
} }
p.lastAlarmName = "" p.lastAlarmName = ""
continue continue
@ -190,7 +184,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
cont, err := p.lastAlarm.Parse(tk) cont, err := p.lastAlarm.Parse(tk)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s in %s, line %d", return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line) err.Error(), fn, tok.Line())
} }
if !cont { if !cont {
p.lastAlarm = nil p.lastAlarm = nil
@ -202,7 +196,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
return nil, fmt.Errorf("unknown parser state: %d", p.state()) return nil, fmt.Errorf("unknown parser state: %d", p.state())
} }
} }
if err := scan.Err(); err != nil { if err := tok.Err(); err != nil {
return nil, err return nil, err
} }
return config, nil return config, nil
@ -250,7 +244,7 @@ func (p *Parser) stateName() string {
case TK_SET: case TK_SET:
return "TK_SET" return "TK_SET"
case TK_MONITOR: case TK_MONITOR:
return "TK_MONTIOR" return "TK_MONITOR"
case TK_GROUP: case TK_GROUP:
return "TK_GROUP" return "TK_GROUP"
case TK_HOST: case TK_HOST:
@ -263,75 +257,3 @@ func (p *Parser) stateName() string {
return "UNKNOWN" return "UNKNOWN"
} }
} }
func (p *Parser) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
p.Line++
if ignoreline {
ignoreline = false
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
}
case '#':
if !started {
ignoreline = true
}
default:
if !ignoreline && !started {
started = true
startidx = i
}
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
}
}
return 0, nil, nil
}

5
config/testdata/comments.tok vendored Normal file
View File

@ -0,0 +1,5 @@
# one two three
one two three
#four five six
four five six #seven eight nine
# EOF

8
config/testdata/quotes.tok vendored Normal file
View File

@ -0,0 +1,8 @@
"one" "two three"
"four five"
" #six" "" "seven" "ei" "ght"
"multi
line"

6
config/testdata/simple-broken.tok vendored Normal file
View File

@ -0,0 +1,6 @@
one two three
four five
six

1
config/testdata/simple.tok vendored Normal file
View File

@ -0,0 +1 @@
one two three four five six

122
config/tokenizer.go Normal file
View File

@ -0,0 +1,122 @@
package config
import (
"bufio"
"fmt"
"os"
)
type Tokenizer struct {
line int
file *os.File
scanner *bufio.Scanner
}
func NewTokenizer(fn string) (*Tokenizer, error) {
var err error
tk := &Tokenizer{line: 1}
tk.file, err = os.Open(fn)
if err != nil {
return nil, err
}
tk.scanner = bufio.NewScanner(tk.file)
tk.scanner.Split(tk.Split)
return tk, nil
}
func (t *Tokenizer) Close() error {
return t.file.Close()
}
func (t *Tokenizer) Scan() bool {
return t.scanner.Scan()
}
func (t *Tokenizer) Text() string {
return t.scanner.Text()
}
func (t *Tokenizer) Line() int {
return t.line
}
func (t *Tokenizer) Err() error {
return t.scanner.Err()
}
func (t *Tokenizer) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
if started {
return i, data[startidx:i], nil
}
t.line++
if ignoreline {
ignoreline = false
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
}
case '#':
if !started {
ignoreline = true
}
default:
if !ignoreline && !started {
started = true
startidx = i
}
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
}
}
return 0, nil, nil
}

55
config/tokenizer_test.go Normal file
View File

@ -0,0 +1,55 @@
package config
import (
"testing"
"encoding/json"
)
func TestTokenizer(t *testing.T) {
runTest(t, "simple",
`[["one","two","three","four","five","six"]]`,
)
runTest(t, "simple-broken",
`[["one","two","three"],["four","five"],[],[],["six"]]`,
)
runTest(t, "comments",
`[[],["one","two","three"],[],["four","five","six"]]`,
)
runTest(t, "quotes",
`[["one","two three",[],["four five"],[],[" #six","","seven","ei","ght"],[],["multi\nline"]]`,
)
}
func runTest(t *testing.T, bn string, exp string) {
tok, err := NewTokenizer("testdata/" + bn + ".tok")
if err != nil {
t.Fatalf("%s", err.Error())
}
defer tok.Close()
tokens := [][]string{}
for tok.Scan() {
ln := tok.Line()
tl := len(tokens)
if tl < ln {
for i := tl; i < ln; i++ {
tokens = append(tokens, []string{})
}
}
tokens[ln-1] = append(tokens[ln-1], tok.Text())
}
if tok.Err() != nil {
t.Fatalf("%s", tok.Err())
}
out, err := json.Marshal(tokens)
if err != nil {
t.Fatalf("%s", err)
}
if exp != string(out) {
t.Logf("Expected: %s", exp)
t.Logf("Got: %s", out)
t.Fail()
}
}