improved tokenizer, added tests

This commit is contained in:
Christopher Ramey 2021-01-18 07:29:38 -09:00
parent b675d8714e
commit cd73181a62
11 changed files with 210 additions and 149 deletions

View File

@ -1,9 +1,9 @@
package config package config
import ( import (
"alrm/alarm"
"fmt" "fmt"
"time" "time"
"alrm/alarm"
) )
type Config struct { type Config struct {

View File

@ -8,13 +8,13 @@ import (
) )
const ( const (
TK_NONE = iota PR_NONE = iota
TK_SET PR_SET
TK_MONITOR PR_MONITOR
TK_GROUP PR_GROUP
TK_HOST PR_HOST
TK_CHECK PR_CHECK
TK_ALARM PR_ALARM
) )
type Parser struct { type Parser struct {
@ -39,20 +39,20 @@ func (p *Parser) Parse(fn string) (*Config, error) {
tk := tok.Text() tk := tok.Text()
stateswitch: stateswitch:
switch p.state() { switch p.state() {
case TK_NONE: case PR_NONE:
switch strings.ToLower(tk) { switch strings.ToLower(tk) {
case "monitor": case "monitor":
p.setState(TK_MONITOR) p.setState(PR_MONITOR)
case "set": case "set":
p.setState(TK_SET) p.setState(PR_SET)
case "alarm": case "alarm":
p.setState(TK_ALARM) p.setState(PR_ALARM)
default: default:
return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"", return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"",
fn, tok.Line(), tk) fn, tok.Line(), tk)
} }
case TK_SET: case PR_SET:
key := strings.ToLower(tk) key := strings.ToLower(tk)
if !tok.Scan() { if !tok.Scan() {
return nil, fmt.Errorf("empty value name for set in %s, line %d", return nil, fmt.Errorf("empty value name for set in %s, line %d",
@ -76,20 +76,20 @@ func (p *Parser) Parse(fn string) (*Config, error) {
} }
p.prevState() p.prevState()
case TK_MONITOR: case PR_MONITOR:
switch strings.ToLower(tk) { switch strings.ToLower(tk) {
case "host": case "host":
p.setState(TK_HOST) p.setState(PR_HOST)
case "group": case "group":
p.setState(TK_GROUP) p.setState(PR_GROUP)
default: default:
p.prevState() p.prevState()
goto stateswitch goto stateswitch
} }
case TK_GROUP: case PR_GROUP:
if p.lastGroup == nil { if p.lastGroup == nil {
p.lastGroup, err = config.NewGroup(tk) p.lastGroup, err = config.NewGroup(tk)
if err != nil { if err != nil {
@ -102,14 +102,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
switch strings.ToLower(tk) { switch strings.ToLower(tk) {
case "host": case "host":
p.setState(TK_HOST) p.setState(PR_HOST)
default: default:
p.prevState() p.prevState()
goto stateswitch goto stateswitch
} }
case TK_HOST: case PR_HOST:
// If a host has no group, inherit the host name // If a host has no group, inherit the host name
if p.lastGroup == nil { if p.lastGroup == nil {
p.lastGroup, err = config.NewGroup(tk) p.lastGroup, err = config.NewGroup(tk)
@ -139,14 +139,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastHost.Address = tok.Text() p.lastHost.Address = tok.Text()
case "check": case "check":
p.setState(TK_CHECK) p.setState(PR_CHECK)
default: default:
p.prevState() p.prevState()
goto stateswitch goto stateswitch
} }
case TK_CHECK: case PR_CHECK:
if p.lastCheck == nil { if p.lastCheck == nil {
p.lastCheck, err = p.lastHost.NewCheck(tk) p.lastCheck, err = p.lastHost.NewCheck(tk)
if err != nil { if err != nil {
@ -166,7 +166,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
goto stateswitch goto stateswitch
} }
case TK_ALARM: case PR_ALARM:
if p.lastAlarm == nil { if p.lastAlarm == nil {
if p.lastAlarmName == "" { if p.lastAlarmName == "" {
p.lastAlarmName = tk p.lastAlarmName = tk
@ -204,19 +204,19 @@ func (p *Parser) Parse(fn string) (*Config, error) {
func (p *Parser) state() int { func (p *Parser) state() int {
if len(p.states) < 1 { if len(p.states) < 1 {
return TK_NONE return PR_NONE
} }
return p.states[len(p.states)-1] return p.states[len(p.states)-1]
} }
func (p *Parser) setState(state int) { func (p *Parser) setState(state int) {
switch state { switch state {
case TK_SET, TK_MONITOR: case PR_SET, PR_MONITOR:
fallthrough fallthrough
case TK_GROUP: case PR_GROUP:
p.lastGroup = nil p.lastGroup = nil
fallthrough fallthrough
case TK_HOST: case PR_HOST:
p.lastHost = nil p.lastHost = nil
p.lastCheck = nil p.lastCheck = nil
} }
@ -239,20 +239,20 @@ func (p *Parser) prevState() int {
func (p *Parser) stateName() string { func (p *Parser) stateName() string {
switch p.state() { switch p.state() {
case TK_NONE: case PR_NONE:
return "TK_NONE" return "PR_NONE"
case TK_SET: case PR_SET:
return "TK_SET" return "PR_SET"
case TK_MONITOR: case PR_MONITOR:
return "TK_MONITOR" return "PR_MONITOR"
case TK_GROUP: case PR_GROUP:
return "TK_GROUP" return "PR_GROUP"
case TK_HOST: case PR_HOST:
return "TK_HOST" return "PR_HOST"
case TK_CHECK: case PR_CHECK:
return "TK_CHECK" return "PR_CHECK"
case TK_ALARM: case PR_ALARM:
return "TK_ALARM" return "PR_ALARM"
default: default:
return "UNKNOWN" return "UNKNOWN"
} }

5
config/testdata/comments-inline.tok vendored Normal file
View File

@ -0,0 +1,5 @@
one #one
"two#three"
# "three"
four
# EOF

View File

@ -1,5 +1,6 @@
# one two three # one
one two three one
#four five six #two
four five six #seven eight nine two
# EOF # three
three

6
config/testdata/quotes-multiline.tok vendored Normal file
View File

@ -0,0 +1,6 @@
"one
two" 'three
four'
`five
six`

View File

@ -1,8 +1,3 @@
"one" "two three" "one" 'two' `three`
"four five" `four` 'five' "six"
" #six" "" "seven" "ei" "ght"
"multi
line"

1
config/testdata/simple-spaces.tok vendored Normal file
View File

@ -0,0 +1 @@
one two three four five six

View File

@ -1 +0,0 @@
one two three four five six

View File

@ -2,26 +2,37 @@ package config
import ( import (
"bufio" "bufio"
"fmt" "io"
"os" "os"
"strings"
"unicode"
)
const (
TK_NONE = iota
TK_VAL
TK_QUOTE
TK_COMMENT
) )
type Tokenizer struct { type Tokenizer struct {
line int curline int
repline int
file *os.File file *os.File
scanner *bufio.Scanner reader *bufio.Reader
text string
err error
} }
func NewTokenizer(fn string) (*Tokenizer, error) { func NewTokenizer(fn string) (*Tokenizer, error) {
var err error var err error
tk := &Tokenizer{line: 1} tk := &Tokenizer{curline: 1}
tk.file, err = os.Open(fn) tk.file, err = os.Open(fn)
if err != nil { if err != nil {
return nil, err return nil, err
} }
tk.scanner = bufio.NewScanner(tk.file) tk.reader = bufio.NewReader(tk.file)
tk.scanner.Split(tk.Split)
return tk, nil return tk, nil
} }
@ -30,93 +41,114 @@ func (t *Tokenizer) Close() error {
} }
func (t *Tokenizer) Scan() bool { func (t *Tokenizer) Scan() bool {
return t.scanner.Scan() t.repline = t.curline
state := TK_NONE
t.text = ""
var b strings.Builder
var quo rune
for {
var r rune
r, _, t.err = t.reader.ReadRune()
if t.err != nil {
break
}
switch state {
case TK_NONE:
// When between values, increment both the reported line
// and the current line, since there's not yet anything
// to report
if r == '\n' {
t.repline++
t.curline++
}
// If we're between values and we encounter a space
// or a control character, ignore it
if unicode.IsSpace(r) || unicode.IsControl(r) {
continue
}
// If we're between values and we encounter a #, it's
// the beginning of a comment
if r == '#' {
state = TK_COMMENT
continue
}
// If we're between values and we get a quote character
// treat it as the beginning of a string literal
if r == '"' || r == '\'' || r == '`' {
state = TK_QUOTE
quo = r
continue
}
b.WriteRune(r)
state = TK_VAL
case TK_VAL:
// In values, only increment the current line, so
// if an error is reported, it reports the line
// the value starts on
if r == '\n' {
t.curline++
}
// If we're in a normal value and we encounter a space
// or a control character, end value
if unicode.IsSpace(r) || unicode.IsControl(r) {
goto end
}
b.WriteRune(r)
case TK_QUOTE:
// In quotes, only increment the current line, so
// if an error is reported, it reports the line
// the quoted value starts on
if r == '\n' {
t.curline++
}
// End this quote if it's another quote of the same rune
if r == quo {
goto end
}
b.WriteRune(r)
case TK_COMMENT:
// Comments are ignored, until a new line is encounter
// at which point, increment the current and reported line
if r == '\n' {
t.curline++
t.repline++
state = TK_NONE
}
continue
}
}
end:
if t.err == nil || t.err == io.EOF {
if b.Len() > 0 {
t.text = b.String()
}
}
return t.err == nil
} }
func (t *Tokenizer) Text() string { func (t *Tokenizer) Text() string {
return t.scanner.Text() return t.text
} }
func (t *Tokenizer) Line() int { func (t *Tokenizer) Line() int {
return t.line return t.repline
} }
func (t *Tokenizer) Err() error { func (t *Tokenizer) Err() error {
return t.scanner.Err() if t.err == io.EOF {
return nil
} }
return t.err
func (t *Tokenizer) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
if started {
return i, data[startidx:i], nil
}
t.line++
if ignoreline {
ignoreline = false
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
}
case '#':
if !started {
ignoreline = true
}
default:
if !ignoreline && !started {
started = true
startidx = i
}
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
}
}
return 0, nil, nil
} }

View File

@ -1,26 +1,48 @@
package config package config
import ( import (
"testing"
"encoding/json" "encoding/json"
"testing"
) )
func TestTokenizer(t *testing.T) { func TestSimpleSpaces(t *testing.T) {
runTest(t, "simple", runTest(t, "simple-spaces",
`[["one","two","three","four","five","six"]]`, `[["one","two","three","four","five","six"]]`,
) )
runTest(t, "simple-broken", }
func TestSimpleMultiline(t *testing.T) {
runTest(t, "simple-multiline",
`[["one","two","three"],["four","five"],[],[],["six"]]`, `[["one","two","three"],["four","five"],[],[],["six"]]`,
) )
runTest(t, "comments", }
`[[],["one","two","three"],[],["four","five","six"]]`,
) func TestQuotes(t *testing.T) {
runTest(t, "quotes", runTest(t, "quotes",
`[["one","two three",[],["four five"],[],[" #six","","seven","ei","ght"],[],["multi\nline"]]`, `[["one","two","three"],[],["four","five","six"]]`,
)
}
func TestQuotesMultiline(t *testing.T) {
runTest(t, "quotes-multiline",
`[["one\ntwo"],["three\nfour"],[],[],["five\n six"]]`,
)
}
func TestComments(t *testing.T) {
runTest(t, "comments",
`[[],["one"],[],["two"],[],["three"]]`,
)
}
func TestCommentsInline(t *testing.T) {
runTest(t, "comments-inline",
`[["one"],["two#three"],[],["four"]]`,
) )
} }
func runTest(t *testing.T, bn string, exp string) { func runTest(t *testing.T, bn string, exp string) {
t.Logf("Running testdata/%s.tok.. ", bn)
tok, err := NewTokenizer("testdata/" + bn + ".tok") tok, err := NewTokenizer("testdata/" + bn + ".tok")
if err != nil { if err != nil {
t.Fatalf("%s", err.Error()) t.Fatalf("%s", err.Error())
@ -50,6 +72,6 @@ func runTest(t *testing.T, bn string, exp string) {
if exp != string(out) { if exp != string(out) {
t.Logf("Expected: %s", exp) t.Logf("Expected: %s", exp)
t.Logf("Got: %s", out) t.Logf("Got: %s", out)
t.Fail() t.FailNow()
} }
} }