Browse Source

separated parser from tokenizer, wrote some tokenizer tests

master
Christopher Ramey 3 years ago
parent
commit
b675d8714e
  1. 126
      config/parser.go
  2. 5
      config/testdata/comments.tok
  3. 8
      config/testdata/quotes.tok
  4. 6
      config/testdata/simple-broken.tok
  5. 1
      config/testdata/simple.tok
  6. 122
      config/tokenizer.go
  7. 55
      config/tokenizer_test.go

126
config/parser.go

@ -3,9 +3,7 @@ package config
import (
"alrm/alarm"
"alrm/check"
"bufio"
"fmt"
"os"
"strings"
)
@ -21,7 +19,6 @@ const (
type Parser struct {
DebugLevel int
Line int
states []int
lastHost *Host
lastGroup *Group
@ -31,18 +28,15 @@ type Parser struct {
}
func (p *Parser) Parse(fn string) (*Config, error) {
file, err := os.Open(fn)
config := NewConfig()
tok, err := NewTokenizer(fn)
if err != nil {
return nil, fmt.Errorf("cannot open config \"%s\": %s", fn, err.Error())
return nil, err
}
defer file.Close()
config := NewConfig()
defer tok.Close()
scan := bufio.NewScanner(file)
scan.Split(p.Split)
for scan.Scan() {
tk := scan.Text()
for tok.Scan() {
tk := tok.Text()
stateswitch:
switch p.state() {
case TK_NONE:
@ -55,29 +49,29 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.setState(TK_ALARM)
default:
return nil, fmt.Errorf("invalid token in %s, line %d: \"%s\"",
fn, p.Line, tk)
fn, tok.Line(), tk)
}
case TK_SET:
key := strings.ToLower(tk)
if !scan.Scan() {
if !tok.Scan() {
return nil, fmt.Errorf("empty value name for set in %s, line %d",
fn, p.Line)
fn, tok.Line())
}
value := scan.Text()
value := tok.Text()
switch key {
case "interval":
err := config.SetInterval(value)
if err != nil {
return nil, fmt.Errorf(
"invalid number for interval in %s, line %d: \"%s\"",
fn, p.Line, value,
fn, tok.Line(), value,
)
}
default:
return nil, fmt.Errorf("unknown key for set in %s, line %d: \"%s\"",
fn, p.Line, tk,
fn, tok.Line(), tk,
)
}
p.prevState()
@ -100,7 +94,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastGroup, err = config.NewGroup(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line,
err.Error(), fn, tok.Line(),
)
}
continue
@ -121,7 +115,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastGroup, err = config.NewGroup(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line,
err.Error(), fn, tok.Line(),
)
}
}
@ -130,7 +124,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastHost, err = p.lastGroup.NewHost(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line,
err.Error(), fn, tok.Line(),
)
}
continue
@ -138,11 +132,11 @@ func (p *Parser) Parse(fn string) (*Config, error) {
switch strings.ToLower(tk) {
case "address":
if !scan.Scan() {
if !tok.Scan() {
return nil, fmt.Errorf("empty address for host in %s, line %d",
fn, p.Line)
fn, tok.Line())
}
p.lastHost.Address = scan.Text()
p.lastHost.Address = tok.Text()
case "check":
p.setState(TK_CHECK)
@ -157,14 +151,14 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastCheck, err = p.lastHost.NewCheck(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line)
err.Error(), fn, tok.Line())
}
continue
}
cont, err := p.lastCheck.Parse(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line)
err.Error(), fn, tok.Line())
}
if !cont {
p.lastCheck = nil
@ -182,7 +176,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
p.lastAlarm, err = config.NewAlarm(p.lastAlarmName, tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line)
err.Error(), fn, tok.Line())
}
p.lastAlarmName = ""
continue
@ -190,7 +184,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
cont, err := p.lastAlarm.Parse(tk)
if err != nil {
return nil, fmt.Errorf("%s in %s, line %d",
err.Error(), fn, p.Line)
err.Error(), fn, tok.Line())
}
if !cont {
p.lastAlarm = nil
@ -202,7 +196,7 @@ func (p *Parser) Parse(fn string) (*Config, error) {
return nil, fmt.Errorf("unknown parser state: %d", p.state())
}
}
if err := scan.Err(); err != nil {
if err := tok.Err(); err != nil {
return nil, err
}
return config, nil
@ -250,7 +244,7 @@ func (p *Parser) stateName() string {
case TK_SET:
return "TK_SET"
case TK_MONITOR:
return "TK_MONTIOR"
return "TK_MONITOR"
case TK_GROUP:
return "TK_GROUP"
case TK_HOST:
@ -263,75 +257,3 @@ func (p *Parser) stateName() string {
return "UNKNOWN"
}
}
func (p *Parser) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
p.Line++
if ignoreline {
ignoreline = false
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
}
case '#':
if !started {
ignoreline = true
}
default:
if !ignoreline && !started {
started = true
startidx = i
}
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
}
}
return 0, nil, nil
}

5
config/testdata/comments.tok

@ -0,0 +1,5 @@
# one two three
one two three
#four five six
four five six #seven eight nine
# EOF

8
config/testdata/quotes.tok

@ -0,0 +1,8 @@
"one" "two three"
"four five"
" #six" "" "seven" "ei" "ght"
"multi
line"

6
config/testdata/simple-broken.tok

@ -0,0 +1,6 @@
one two three
four five
six

1
config/testdata/simple.tok

@ -0,0 +1 @@
one two three four five six

122
config/tokenizer.go

@ -0,0 +1,122 @@
package config
import (
"bufio"
"fmt"
"os"
)
type Tokenizer struct {
line int
file *os.File
scanner *bufio.Scanner
}
func NewTokenizer(fn string) (*Tokenizer, error) {
var err error
tk := &Tokenizer{line: 1}
tk.file, err = os.Open(fn)
if err != nil {
return nil, err
}
tk.scanner = bufio.NewScanner(tk.file)
tk.scanner.Split(tk.Split)
return tk, nil
}
func (t *Tokenizer) Close() error {
return t.file.Close()
}
func (t *Tokenizer) Scan() bool {
return t.scanner.Scan()
}
func (t *Tokenizer) Text() string {
return t.scanner.Text()
}
func (t *Tokenizer) Line() int {
return t.line
}
func (t *Tokenizer) Err() error {
return t.scanner.Err()
}
func (t *Tokenizer) Split(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
var ignoreline bool
var started bool
var startidx int
var quote byte
for i := 0; i < len(data); i++ {
c := data[i]
//fmt.Printf("%c (%t) (%t)\n", c, started, ignoreline)
switch c {
case '\f', '\n', '\r':
if started {
return i, data[startidx:i], nil
}
t.line++
if ignoreline {
ignoreline = false
continue
}
fallthrough
case ' ', '\t', '\v':
if started && quote == 0 {
return i + 1, data[startidx:i], nil
}
case '\'', '"', '`':
// When the quote ends
if quote == c {
// if we've gotten data, return it
if started {
return i + 1, data[startidx:i], nil
}
// if we haven't return nothing
return i + 1, []byte{}, nil
}
// start a quoted string
if !ignoreline && quote == 0 {
quote = c
}
case '#':
if !started {
ignoreline = true
}
default:
if !ignoreline && !started {
started = true
startidx = i
}
}
}
if atEOF {
if quote != 0 {
return 0, nil, fmt.Errorf("unterminated quote")
}
if ignoreline {
return len(data), nil, nil
}
if started {
return len(data), data[startidx:], nil
}
}
return 0, nil, nil
}

55
config/tokenizer_test.go

@ -0,0 +1,55 @@
package config
import (
"testing"
"encoding/json"
)
func TestTokenizer(t *testing.T) {
runTest(t, "simple",
`[["one","two","three","four","five","six"]]`,
)
runTest(t, "simple-broken",
`[["one","two","three"],["four","five"],[],[],["six"]]`,
)
runTest(t, "comments",
`[[],["one","two","three"],[],["four","five","six"]]`,
)
runTest(t, "quotes",
`[["one","two three",[],["four five"],[],[" #six","","seven","ei","ght"],[],["multi\nline"]]`,
)
}
func runTest(t *testing.T, bn string, exp string) {
tok, err := NewTokenizer("testdata/" + bn + ".tok")
if err != nil {
t.Fatalf("%s", err.Error())
}
defer tok.Close()
tokens := [][]string{}
for tok.Scan() {
ln := tok.Line()
tl := len(tokens)
if tl < ln {
for i := tl; i < ln; i++ {
tokens = append(tokens, []string{})
}
}
tokens[ln-1] = append(tokens[ln-1], tok.Text())
}
if tok.Err() != nil {
t.Fatalf("%s", tok.Err())
}
out, err := json.Marshal(tokens)
if err != nil {
t.Fatalf("%s", err)
}
if exp != string(out) {
t.Logf("Expected: %s", exp)
t.Logf("Got: %s", out)
t.Fail()
}
}
Loading…
Cancel
Save