alrm/config/tokenizer.go

158 lines
2.8 KiB
Go
Raw Normal View History

package config
import (
"bufio"
"fmt"
2021-01-18 07:29:38 -09:00
"io"
"os"
2021-01-18 07:29:38 -09:00
"strings"
"unicode"
)
const (
TK_NONE = iota
TK_VAL
TK_QUOTE
TK_COMMENT
)
type Tokenizer struct {
2021-01-18 07:29:38 -09:00
curline int
repline int
file *os.File
2021-01-18 07:29:38 -09:00
reader *bufio.Reader
text string
err error
}
func NewTokenizer(fn string) (*Tokenizer, error) {
var err error
2021-01-18 07:29:38 -09:00
tk := &Tokenizer{curline: 1}
tk.file, err = os.Open(fn)
if err != nil {
return nil, err
}
2021-01-18 07:29:38 -09:00
tk.reader = bufio.NewReader(tk.file)
return tk, nil
}
func (t *Tokenizer) Close() error {
return t.file.Close()
}
func (t *Tokenizer) Scan() bool {
2021-01-18 07:29:38 -09:00
t.repline = t.curline
state := TK_NONE
t.text = ""
var b strings.Builder
var quo rune
for {
var r rune
r, _, t.err = t.reader.ReadRune()
if t.err != nil {
break
}
if r == unicode.ReplacementChar {
t.err = fmt.Errorf("invalid utf-8 encoding on line %s", t.repline)
break
}
2021-01-18 07:29:38 -09:00
switch state {
case TK_NONE:
// When between values, increment both the reported line
// and the current line, since there's not yet anything
// to report
if r == '\n' {
t.repline++
t.curline++
}
2021-01-18 07:29:38 -09:00
// If we're between values and we encounter a space
// or a control character, ignore it
if unicode.IsSpace(r) || unicode.IsControl(r) {
continue
}
2021-01-18 07:29:38 -09:00
// If we're between values and we encounter a #, it's
// the beginning of a comment
if r == '#' {
state = TK_COMMENT
continue
}
2021-01-18 07:29:38 -09:00
// If we're between values and we get a quote character
// treat it as the beginning of a string literal
if r == '"' || r == '\'' || r == '`' {
state = TK_QUOTE
quo = r
continue
}
2021-01-18 07:29:38 -09:00
b.WriteRune(r)
state = TK_VAL
case TK_VAL:
// In values, only increment the current line, so
// if an error is reported, it reports the line
// the value starts on
if r == '\n' {
t.curline++
}
2021-01-18 07:29:38 -09:00
// If we're in a normal value and we encounter a space
// or a control character, end value
if unicode.IsSpace(r) || unicode.IsControl(r) {
goto end
}
b.WriteRune(r)
case TK_QUOTE:
// In quotes, only increment the current line, so
// if an error is reported, it reports the line
// the quoted value starts on
if r == '\n' {
t.curline++
}
2021-01-18 07:29:38 -09:00
// End this quote if it's another quote of the same rune
if r == quo {
goto end
}
b.WriteRune(r)
case TK_COMMENT:
// Comments are ignored, until a new line is encounter
// at which point, increment the current and reported line
if r == '\n' {
t.curline++
t.repline++
state = TK_NONE
}
2021-01-18 07:29:38 -09:00
continue
}
}
2021-01-18 07:29:38 -09:00
end:
if t.err == nil || t.err == io.EOF {
2021-01-18 07:40:56 -09:00
t.text = b.String()
}
2021-01-18 07:29:38 -09:00
return t.err == nil
}
2021-01-18 07:29:38 -09:00
func (t *Tokenizer) Text() string {
return t.text
}
func (t *Tokenizer) Line() int {
return t.repline
}
func (t *Tokenizer) Err() error {
if t.err == io.EOF {
return nil
}
return t.err
}