Browse Source

improved tokenizer, added tests

master
Christopher Ramey 3 years ago
parent
commit
cd73181a62
  1. 2
      config/config.go
  2. 78
      config/parser.go
  3. 5
      config/testdata/comments-inline.tok
  4. 11
      config/testdata/comments.tok
  5. 6
      config/testdata/quotes-multiline.tok
  6. 9
      config/testdata/quotes.tok
  7. 0
      config/testdata/simple-multiline.tok
  8. 1
      config/testdata/simple-spaces.tok
  9. 1
      config/testdata/simple.tok
  10. 180
      config/tokenizer.go
  11. 40
      config/tokenizer_test.go

2
config/config.go

@ -1,9 +1,9 @@
package config
import (
"alrm/alarm"
"fmt"
"time"
"alrm/alarm"
)
type Config struct {

78
config/parser.go

@ -8,13 +8,13 @@ import (
)
const (
TK_NONE = iota
TK_SET
TK_MONITOR
TK_GROUP
TK_HOST
TK_CHECK
TK_ALARM
PR_NONE = iota
PR_SET
PR_MONITOR
PR_GROUP
PR_HOST
PR_CHECK
PR_ALARM
)
type Parser struct {
@ -39,20 +39,20 @@ func (p *Parser) Parse(fn string) (*Config, error) {
tk := tok.Text()
stateswitch:
switch p.state() {
case TK_NONE:
case PR_NONE:
switch strings.ToLower(tk) {
case "monitor":
p.setState(TK_MONITOR)
p.setState(PR_MONITOR)
case "set":
p.setState(TK_SET)
p.setState(PR_SET)
case "alarm":
p.setState(TK_ALARM)
p.setState(PR_ALARM)
default:
return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"",
fn, tok.Line(), tk)
}
case TK_SET:
case PR_SET:
key := strings.ToLower(tk)
if !tok.Scan() {
return nil, fmt.Errorf("empty value name for set in %s, line %d",
@ -76,20 +76,20 @@ func (p *Parser) Parse(fn string) (*Config, error) {
}
p.prevState()
case TK_MONITOR:
case PR_MONITOR:
switch strings.ToLower(tk) {
case "host":
p.setState(TK_HOST)
p.setState(PR_HOST)
case "group":
p.setState(TK_GROUP)
p.setState(PR_GROUP)
default:
p.prevState()
goto stateswitch
}
case TK_GROUP:
case PR_GROUP:
if p.lastGroup == nil {
p.lastGroup, err = config.NewGroup(tk)
if err != nil {
@ -102,14 +102,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
switch strings.ToLower(tk) {
case "host":
p.setState(TK_HOST)
p.setState(PR_HOST)
default:
p.prevState()
goto stateswitch
}
case TK_HOST:
case PR_HOST:
// If a host has no group, inherit the host name
if p.lastGroup == nil {
p.lastGroup, err = config.NewGroup(tk)
@ -139,14 +139,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastHost.Address = tok.Text()
case "check":
p.setState(TK_CHECK)
p.setState(PR_CHECK)
default:
p.prevState()
goto stateswitch
}
case TK_CHECK:
case PR_CHECK:
if p.lastCheck == nil {
p.lastCheck, err = p.lastHost.NewCheck(tk)
if err != nil {
@ -166,7 +166,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
goto stateswitch
}
case TK_ALARM:
case PR_ALARM:
if p.lastAlarm == nil {
if p.lastAlarmName == "" {
p.lastAlarmName = tk
@ -204,19 +204,19 @@ func (p *Parser) Parse(fn string) (*Config, error) {
func (p *Parser) state() int {
if len(p.states) < 1 {
return TK_NONE
return PR_NONE
}
return p.states[len(p.states)-1]
}
func (p *Parser) setState(state int) {
switch state {
case TK_SET, TK_MONITOR:
case PR_SET, PR_MONITOR:
fallthrough
case TK_GROUP:
case PR_GROUP:
p.lastGroup = nil
fallthrough
case TK_HOST:
case PR_HOST:
p.lastHost = nil
p.lastCheck = nil
}
@ -239,20 +239,20 @@ func (p *Parser) prevState() int {
func (p *Parser) stateName() string {
switch p.state() {
case TK_NONE:
return "TK_NONE"
case TK_SET:
return "TK_SET"
case TK_MONITOR:
return "TK_MONITOR"
case TK_GROUP:
return "TK_GROUP"
case TK_HOST:
return "TK_HOST"
case TK_CHECK:
return "TK_CHECK"
case TK_ALARM:
return "TK_ALARM"
case PR_NONE:
return "PR_NONE"
case PR_SET:
return "PR_SET"
case PR_MONITOR:
return "PR_MONITOR"
case PR_GROUP:
return "PR_GROUP"
case PR_HOST:
return "PR_HOST"
case PR_CHECK:
return "PR_CHECK"
case PR_ALARM:
return "PR_ALARM"
default:
return "UNKNOWN"
}

5
config/testdata/comments-inline.tok

@ -0,0 +1,5 @@
one #one
"two#three"
# "three"
four
# EOF

11
config/testdata/comments.tok

@ -1,5 +1,6 @@
# one two three
one two three
#four five six
four five six #seven eight nine
# EOF
# one
one
#two
two
# three
three

6
config/testdata/quotes-multiline.tok

@ -0,0 +1,6 @@
"one
two" 'three
four'
`five
six`

9
config/testdata/quotes.tok

@ -1,8 +1,3 @@
"one" "two three"
"one" 'two' `three`
"four five"
" #six" "" "seven" "ei" "ght"
"multi
line"
`four` 'five' "six"

0
config/testdata/simple-broken.tok → config/testdata/simple-multiline.tok

1
config/testdata/simple-spaces.tok

@ -0,0 +1 @@
one two three four five six

1
config/testdata/simple.tok

@ -1 +0,0 @@
one two three four five six

180
config/tokenizer.go

@ -2,26 +2,37 @@ package config
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"unicode"
)
const (
TK_NONE = iota
TK_VAL
TK_QUOTE
TK_COMMENT
)
type Tokenizer struct {
line int
curline int
repline int
file *os.File
scanner *bufio.Scanner
reader *bufio.Reader
text string
err error
}
func NewTokenizer(fn string) (*Tokenizer, error) {
var err error
tk := &Tokenizer{line: 1}
tk := &Tokenizer{curline: 1}
tk.file, err = os.Open(fn)
if err != nil {
return nil, err
}
tk.scanner = bufio.NewScanner(tk.file)
tk.scanner.Split(tk.Split)
tk.reader = bufio.NewReader(tk.file)
return tk, nil
}
@ -30,93 +41,114 @@ func (t *Tokenizer) Close() error {
}
func (t *Tokenizer) Scan() bool {
return t.scanner.Scan()
}
func (t *Tokenizer) Text() string {
return t.scanner.Text()
}
func (t *Tokenizer) Line() int {
return t.line
}
func (t *Tokenizer) Err() error {
return t.scanner.Err()
}
func (t *Tokenizer) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
t.repline = t.curline
state := TK_NONE
t.text = ""
var b strings.Builder
var quo rune
for {
var r rune
r, _, t.err = t.reader.ReadRune()
if t.err != nil {
break
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
if started {
return i, data[startidx:i], nil
switch state {
case TK_NONE:
// When between values, increment both the reported line
// and the current line, since there's not yet anything
// to report
if r == '\n' {
t.repline++
t.curline++
}
t.line++
if ignoreline {
ignoreline = false
// If we're between values and we encounter a space
// or a control character, ignore it
if unicode.IsSpace(r) || unicode.IsControl(r) {
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
// If we're between values and we encounter a #, it's
// the beginning of a comment
if r == '#' {
state = TK_COMMENT
continue
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
// If we're between values and we get a quote character
// treat it as the beginning of a string literal
if r == '"' || r == '\'' || r == '`' {
state = TK_QUOTE
quo = r
continue
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
b.WriteRune(r)
state = TK_VAL
case TK_VAL:
// In values, only increment the current line, so
// if an error is reported, it reports the line
// the value starts on
if r == '\n' {
t.curline++
}
case '#':
if !started {
ignoreline = true
// If we're in a normal value and we encounter a space
// or a control character, end value
if unicode.IsSpace(r) || unicode.IsControl(r) {
goto end
}
b.WriteRune(r)
case TK_QUOTE:
// In quotes, only increment the current line, so
// if an error is reported, it reports the line
// the quoted value starts on
if r == '\n' {
t.curline++
}
default:
if !ignoreline && !started {
started = true
startidx = i
// End this quote if it's another quote of the same rune
if r == quo {
goto end
}
b.WriteRune(r)
case TK_COMMENT:
// Comments are ignored, until a new line is encounter
// at which point, increment the current and reported line
if r == '\n' {
t.curline++
t.repline++
state = TK_NONE
}
continue
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
end:
if t.err == nil || t.err == io.EOF {
if b.Len() > 0 {
t.text = b.String()
}
}
return t.err == nil
}
return 0, nil, nil
func (t *Tokenizer) Text() string {
return t.text
}
func (t *Tokenizer) Line() int {
return t.repline
}
func (t *Tokenizer) Err() error {
if t.err == io.EOF {
return nil
}
return t.err
}

40
config/tokenizer_test.go

@ -1,26 +1,48 @@
package config
import (
"testing"
"encoding/json"
"testing"
)
func TestTokenizer(t *testing.T) {
runTest(t, "simple",
func TestSimpleSpaces(t *testing.T) {
runTest(t, "simple-spaces",
`[["one","two","three","four","five","six"]]`,
)
runTest(t, "simple-broken",
}
func TestSimpleMultiline(t *testing.T) {
runTest(t, "simple-multiline",
`[["one","two","three"],["four","five"],[],[],["six"]]`,
)
}
func TestQuotes(t *testing.T) {
runTest(t, "quotes",
`[["one","two","three"],[],["four","five","six"]]`,
)
}
func TestQuotesMultiline(t *testing.T) {
runTest(t, "quotes-multiline",
`[["one\ntwo"],["three\nfour"],[],[],["five\n six"]]`,
)
}
func TestComments(t *testing.T) {
runTest(t, "comments",
`[[],["one","two","three"],[],["four","five","six"]]`,
`[[],["one"],[],["two"],[],["three"]]`,
)
runTest(t, "quotes",
`[["one","two three",[],["four five"],[],[" #six","","seven","ei","ght"],[],["multi\nline"]]`,
}
func TestCommentsInline(t *testing.T) {
runTest(t, "comments-inline",
`[["one"],["two#three"],[],["four"]]`,
)
}
func runTest(t *testing.T, bn string, exp string) {
t.Logf("Running testdata/%s.tok.. ", bn)
tok, err := NewTokenizer("testdata/" + bn + ".tok")
if err != nil {
t.Fatalf("%s", err.Error())
@ -49,7 +71,7 @@ func runTest(t *testing.T, bn string, exp string) {
if exp != string(out) {
t.Logf("Expected: %s", exp)
t.Logf("Got: %s", out)
t.Fail()
t.Logf("Got: %s", out)
t.FailNow()
}
}
Loading…
Cancel
Save