diff --git a/pkg/grok/base.go b/pkg/grok/base.go new file mode 100644 index 0000000000..298cb41a5b --- /dev/null +++ b/pkg/grok/base.go @@ -0,0 +1,152 @@ +// +// Copyright (c) 2016-2017 Konstanin Ivanov . +// All rights reserved. This program is free software. It comes without +// any warranty, to the extent permitted by applicable law. You can +// redistribute it and/or modify it under the terms of the Do What +// The Fuck You Want To Public License, Version 2, as published by +// Sam Hocevar. See LICENSE file for more details or see below. +// + +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// Version 2, December 2004 +// +// Copyright (C) 2004 Sam Hocevar +// +// Everyone is permitted to copy and distribute verbatim or modified +// copies of this license document, and changing it is allowed as long +// as the name is changed. +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +// +// 0. You just DO WHAT THE FUCK YOU WANT TO. +// + +package grokky + +func must(err error) { + if err != nil { + panic(err) + } +} + +// Must is like Add but panics if the expression can't be parsed or +// the name is empty. +func (h Host) Must(name, expr string) { + must(h.Add(name, expr)) +} + +// NewBase creates new Host that filled up with base patterns. +// To see all base patterns open 'base.go' file. +func NewBase() Host { + h := make(Host) + // + h.Must("USERNAME", `[a-zA-Z0-9._-]+`) + h.Must("USER", `%{USERNAME}`) + h.Must("EMAILLOCALPART", `[a-zA-Z][a-zA-Z0-9_.+-=:]+`) + h.Must("HOSTNAME", `\b[0-9A-Za-z][0-9A-Za-z-]{0,62}(?:\.[0-9A-Za-z][0-9A-Za-z-]{0,62})*(\.?|\b)`) + h.Must("EMAILADDRESS", `%{EMAILLOCALPART}@%{HOSTNAME}`) + h.Must("HTTPDUSER", `%{EMAILADDRESS}|%{USER}`) + h.Must("INT", `[+-]?(?:[0-9]+)`) + h.Must("BASE10NUM", `[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))`) + h.Must("NUMBER", `%{BASE10NUM}`) + h.Must("BASE16NUM", `[+-]?(?:0x)?(?:[0-9A-Fa-f]+)`) + h.Must("BASE16FLOAT", `\b[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+))\b`) + // + h.Must("POSINT", `\b[1-9][0-9]*\b`) + h.Must("NONNEGINT", `\b[0-9]+\b`) + h.Must("WORD", `\b\w+\b`) + h.Must("NOTSPACE", `\S+`) + h.Must("SPACE", `\s*`) + h.Must("DATA", `.*?`) + h.Must("GREEDYDATA", `.*`) + h.Must("QUOTEDSTRING", `("(\\.|[^\\"]+)+")|""|('(\\.|[^\\']+)+')|''|`+ + "(`(\\\\.|[^\\\\`]+)+`)|``") + h.Must("UUID", `[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}`) + // Networking + h.Must("CISCOMAC", `(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}`) + h.Must("WINDOWSMAC", `(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}`) + h.Must("COMMONMAC", `(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}`) + h.Must("MAC", `%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}`) + h.Must("IPV6", `((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?`) + h.Must("IPV4", `(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))`) + h.Must("IP", `%{IPV6}|%{IPV4}`) + h.Must("IPORHOST", `%{IP}|%{HOSTNAME}`) + h.Must("HOSTPORT", `%{IPORHOST}:%{POSINT}`) + + // paths + h.Must("UNIXPATH", `(/([\w_%!$@:.,~-]+|\\.)*)+`) + h.Must("TTY", `/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)`) + h.Must("WINPATH", `(?:[A-Za-z]+:|\\)(?:\\[^\\?*]*)+`) + h.Must("PATH", `%{UNIXPATH}|%{WINPATH}`) + h.Must("URIPROTO", `[A-Za-z]+(\+[A-Za-z+]+)?`) + h.Must("URIHOST", `%{IPORHOST}(?::%{POSINT:port})?`) + // uripath comes loosely from RFC1738, but mostly from what Firefox + // doesn't turn into %XX + h.Must("URIPATH", `(?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+`) + h.Must("URIPARAM", `\?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*`) + h.Must("URIPATHPARAM", `%{URIPATH}(?:%{URIPARAM})?`) + h.Must("URI", `%{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?`) + // Months: January, Feb, 3, 03, 12, December + h.Must("MONTH", `\bJan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|รค)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?\b`) + h.Must("MONTHNUM", `0?[1-9]|1[0-2]`) + h.Must("MONTHNUM2", `0[1-9]|1[0-2]`) + h.Must("MONTHDAY", `(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]`) + // Days: Monday, Tue, Thu, etc... + h.Must("DAY", `Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?`) + // Years? + h.Must("YEAR", `(?:\d\d){1,2}`) + h.Must("HOUR", `2[0123]|[01]?[0-9]`) + h.Must("MINUTE", `[0-5][0-9]`) + // '60' is a leap second in most time standards and thus is valid. + h.Must("SECOND", `(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?`) + h.Must("TIME", `%{HOUR}:%{MINUTE}:%{SECOND}`) + // datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) + h.Must("DATE_US", `%{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}`) + h.Must("DATE_EU", `%{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}`) + // I really don't know how it's called + h.Must("DATE_X", `%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}`) + h.Must("ISO8601_TIMEZONE", `Z|[+-]%{HOUR}(?::?%{MINUTE})`) + h.Must("ISO8601_SECOND", `%{SECOND}|60`) + h.Must("TIMESTAMP_ISO8601", `%{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?`) + h.Must("DATE", `%{DATE_US}|%{DATE_EU}|%{DATE_X}`) + h.Must("DATESTAMP", `%{DATE}[- ]%{TIME}`) + h.Must("TZ", `[A-Z]{3}`) + h.Must("NUMTZ", `[+-]\d{4}`) + h.Must("DATESTAMP_RFC822", `%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}`) + h.Must("DATESTAMP_RFC2822", `%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}`) + h.Must("DATESTAMP_OTHER", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}`) + h.Must("DATESTAMP_EVENTLOG", `%{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}`) + h.Must("HTTPDERROR_DATE", `%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}`) + // golang time patterns + h.Must("ANSIC", `%{DAY} %{MONTH} [_123]\d %{TIME} %{YEAR}"`) + h.Must("UNIXDATE", `%{DAY} %{MONTH} [_123]\d %{TIME} %{TZ} %{YEAR}`) + h.Must("RUBYDATE", `%{DAY} %{MONTH} [0-3]\d %{TIME} %{NUMTZ} %{YEAR}`) + h.Must("RFC822Z", `[0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) + h.Must("RFC850", `%{DAY}, [0-3]\d-%{MONTH}-%{YEAR} %{TIME} %{TZ}`) + h.Must("RFC1123", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{TZ}`) + h.Must("RFC1123Z", `%{DAY}, [0-3]\d %{MONTH} %{YEAR} %{TIME} %{NUMTZ}`) + h.Must("RFC3339", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}%{ISO8601_TIMEZONE}`) + h.Must("RFC3339NANO", `%{YEAR}-[01]\d-[0-3]\dT%{TIME}\.\d{9}%{ISO8601_TIMEZONE}`) + h.Must("KITCHEN", `\d{1,2}:\d{2}(AM|PM|am|pm)`) + // Syslog Dates: Month Day HH:MM:SS + h.Must("SYSLOGTIMESTAMP", `%{MONTH} +%{MONTHDAY} %{TIME}`) + h.Must("PROG", `[\x21-\x5a\x5c\x5e-\x7e]+`) + h.Must("SYSLOGPROG", `%{PROG:program}(?:\[%{POSINT:pid}\])?`) + h.Must("SYSLOGHOST", `%{IPORHOST}`) + h.Must("SYSLOGFACILITY", `<%{NONNEGINT:facility}.%{NONNEGINT:priority}>`) + h.Must("HTTPDATE", `%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}`) + // Shortcuts + h.Must("QS", `%{QUOTEDSTRING}`) + // Log Levels + h.Must("LOGLEVEL", `[Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?`) + // Log formats + h.Must("SYSLOGBASE", `%{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:`) + h.Must("COMMONAPACHELOG", `%{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-)`) + h.Must("COMBINEDAPACHELOG", `%{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}`) + h.Must("HTTPD20_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}`) + h.Must("HTTPD24_ERRORLOG", `\[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}:tid %{NUMBER:tid}\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}`) + h.Must("HTTPD_ERRORLOG", `%{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}`) + return h +} diff --git a/pkg/grok/grok.go b/pkg/grok/grok.go new file mode 100644 index 0000000000..dc6458b61a --- /dev/null +++ b/pkg/grok/grok.go @@ -0,0 +1,261 @@ +// +// Copyright (c) 2016-2017 Konstanin Ivanov . +// All rights reserved. This program is free software. It comes without +// any warranty, to the extent permitted by applicable law. You can +// redistribute it and/or modify it under the terms of the Do What +// The Fuck You Want To Public License, Version 2, as published by +// Sam Hocevar. See LICENSE file for more details or see below. +// + +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// Version 2, December 2004 +// +// Copyright (C) 2004 Sam Hocevar +// +// Everyone is permitted to copy and distribute verbatim or modified +// copies of this license document, and changing it is allowed as long +// as the name is changed. +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +// +// 0. You just DO WHAT THE FUCK YOU WANT TO. +// + +// Package grokky is a pure Golang Grok-like patterns library. This can +// help you to parse log files and other. This is based on RE2 regexp +// that much more faster then Oniguruma. The library disigned for creating +// many patterns and using it many times. The behavior and capabilities +// are slightly different from the original library. The golas of the +// library are: (1) simplicity, (2) performance, (3) ease of use. +package grokky + +// http://play.golang.org/p/vb18r_OZkK + +import ( + "bufio" + "errors" + "fmt" + "os" + "regexp" + "strings" +) + +var patternRegexp = regexp.MustCompile(`\%\{(\w+)(\:(\w+))?}`) + +var ( + // ErrEmptyName arises when pattern name is an empty string + ErrEmptyName = errors.New("an empty name") + // ErrEmptyExpression arises when expression is an empty string + ErrEmptyExpression = errors.New("an empty expression") + // ErrAlreadyExist arises when pattern with given name alrady exists + ErrAlreadyExist = errors.New("the pattern already exist") + // ErrNotExist arises when pattern with given name doesn't exists + ErrNotExist = errors.New("pattern doesn't exist") +) + +// helpers + +func split(s string) (name, sem string) { + ss := patternRegexp.FindStringSubmatch(s) + if len(ss) >= 2 { + name = ss[1] + } + if len(ss) >= 4 { + sem = ss[3] + } + return +} + +func wrap(s string) string { return "(" + s + ")" } + +// host + +// Host is a patterns collection. Feel free to +// delete the Host after all patterns (that you need) +// are created. Think of it as a kind of factory. +type Host map[string]string + +// New returns new empty host +func New() Host { return make(Host) } + +// Add a new pattern to the Host. If pattern with given name +// already exists the ErrAlreadyExists will be retuned. +func (h Host) Add(name, expr string) error { + if name == "" { + return ErrEmptyName + } + if expr == "" { + return ErrEmptyExpression + } + if _, ok := h[name]; ok { + return ErrAlreadyExist + } + if _, err := h.compileExternal(expr); err != nil { + return err + } + h[name] = expr + return nil +} + +func (h Host) compile(name string) (*Pattern, error) { + expr, ok := h[name] + if !ok { + return nil, ErrNotExist + } + return h.compileExternal(expr) +} + +func (h Host) compileExternal(expr string) (*Pattern, error) { + // find subpatterns + subs := patternRegexp.FindAllString(expr, -1) + // this semantics set + ts := make(map[string]struct{}) + // chek: does subpatterns exist into this Host? + for _, s := range subs { + name, sem := split(s) + if _, ok := h[name]; !ok { + return nil, fmt.Errorf("the '%s' pattern doesn't exist", name) + } + ts[sem] = struct{}{} + } + // if there are not subpatterns + if len(subs) == 0 { + r, err := regexp.Compile(expr) + if err != nil { + return nil, err + } + p := &Pattern{Regexp: r} + return p, nil + } + // split + spl := patternRegexp.Split(expr, -1) + // concat it back + msi := make(map[string]int) + order := 1 // semantic order + var res string + for i := 0; i < len(spl)-1; i++ { + // split part + splPart := spl[i] + order += capCount(splPart) + // subs part + sub := subs[i] + subName, subSem := split(sub) + p, err := h.compile(subName) + if err != nil { + return nil, err + } + sub = p.String() + subNumSubexp := p.NumSubexp() + subNumSubexp++ + sub = wrap(sub) + if subSem != "" { + msi[subSem] = order + } + res += splPart + sub + // add sub semantics to this semantics + for k, v := range p.s { + if _, ok := ts[k]; !ok { + msi[k] = order + v + } + } + // increse the order + order += subNumSubexp + } // last spl + res += spl[len(spl)-1] + r, err := regexp.Compile(res) + if err != nil { + return nil, err + } + p := &Pattern{Regexp: r} + p.s = msi + return p, nil +} + +// Get pattern by name from the Host +func (h Host) Get(name string) (*Pattern, error) { + return h.compile(name) +} + +// Compile and get pattern without name (and without adding it to this Host) +func (h Host) Compile(expr string) (*Pattern, error) { + if expr == "" { + return nil, ErrEmptyExpression + } + return h.compileExternal(expr) +} + +// Pattern is a pattern. +// Feel free to use the Pattern as regexp.Regexp. +type Pattern struct { + *regexp.Regexp + s map[string]int +} + +// Parse returns map (name->match) on input. The map can be empty. +func (p *Pattern) Parse(input string) map[string]string { + ss := p.FindStringSubmatch(input) + r := make(map[string]string) + if len(ss) <= 1 { + return r + } + for sem, order := range p.s { + r[sem] = ss[order] + } + return r +} + +// Names returns all names that this pattern has +func (p *Pattern) Names() (ss []string) { + ss = make([]string, 0, len(p.s)) + for k := range p.s { + ss = append(ss, k) + } + return +} + +var lineRegexp = regexp.MustCompile(`^(\w+)\s+(.+)$`) + +func (h Host) addFromLine(line string) error { + sub := lineRegexp.FindStringSubmatch(line) + if len(sub) == 0 { // not match + return nil + } + return h.Add(sub[1], sub[2]) +} + +// AddFromFile appends all patterns from the file to this Host. +func (h Host) AddFromFile(path string) error { + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + if err := h.addFromLine(scanner.Text()); err != nil { + return err + } + } + if err := scanner.Err(); err != nil { + return err + } + return nil +} + +// http://play.golang.org/p/1rPuziYhRL + +var ( + nonCapLeftRxp = regexp.MustCompile(`\(\?[imsU\-]*\:`) + nonCapFlagsRxp = regexp.MustCompile(`\(?[imsU\-]+\)`) +) + +// cap count +func capCount(in string) int { + leftParens := strings.Count(in, "(") + nonCapLeft := len(nonCapLeftRxp.FindAllString(in, -1)) + nonCapBoth := len(nonCapFlagsRxp.FindAllString(in, -1)) + escapedLeftParens := strings.Count(in, `\(`) + return leftParens - nonCapLeft - nonCapBoth - escapedLeftParens +} diff --git a/pkg/grok/host_test.go b/pkg/grok/host_test.go new file mode 100644 index 0000000000..d8074f2072 --- /dev/null +++ b/pkg/grok/host_test.go @@ -0,0 +1,250 @@ +// +// Copyright (c) 2016-2017 Konstanin Ivanov . +// All rights reserved. This program is free software. It comes without +// any warranty, to the extent permitted by applicable law. You can +// redistribute it and/or modify it under the terms of the Do What +// The Fuck You Want To Public License, Version 2, as published by +// Sam Hocevar. See LICENSE file for more details or see below. +// + +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// Version 2, December 2004 +// +// Copyright (C) 2004 Sam Hocevar +// +// Everyone is permitted to copy and distribute verbatim or modified +// copies of this license document, and changing it is allowed as long +// as the name is changed. +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +// +// 0. You just DO WHAT THE FUCK YOU WANT TO. +// + +package grokky + +//go test -coverprofile cover.out && go tool cover -html=cover.out -o cover.html + +import ( + "bufio" + "io/ioutil" + "os" + "testing" +) + +const ( + patternsTest = "patterns_pass.txt" + patternsFailTest = "patterns_fail.txt" +) + +func TestNew(t *testing.T) { + h := New() + if len(h) != 0 { + t.Error("New returns non-empty host") + } + if h == nil { + t.Error("New returns nil") + } +} + +func testEmptyName(t *testing.T, h Host) { + l := len(h) + if err := h.Add("", "expr"); err == nil { + t.Error("(Host).Add is missing ErrEmptyName") + } else if err != ErrEmptyName { + t.Error("(Host).Add returns non-ErrEmptyName error") + } + if len(h) > l { + t.Error("added bad patterns") + } +} + +func testEmptyExpression(t *testing.T, h Host) { + l := len(h) + if err := h.Add("zorro", ""); err == nil { + t.Error("(Host).Add is missing ErrEmptyExpression") + } else if err != ErrEmptyExpression { + t.Error("(Host).Add returns non-ErrEmptyExpression error") + } + if len(h) > l { + t.Error("added bad patterns") + } +} + +func testNormalPattern(t *testing.T, h Host) { + l := len(h) + if err := h.Add("DIGIT", `\d`); err != nil { + t.Errorf("(Host).Add returns non-nil error: %v", err) + } + if len(h) != l+1 { + t.Error("wrong patterns count") + } +} + +// must be invoked direct after testNormalPattern +func testAlreadyExists(t *testing.T, h Host) { + l := len(h) + if err := h.Add("DIGIT", `[+-](0x)?\d`); err == nil { + t.Error("(Host).Add is missing ErrAlreadyExist") + } else if err != ErrAlreadyExist { + t.Error("(Host).Add returns non-ErrAlreadyExist error") + } + if len(h) != l { + t.Error("wrong patterns count") + } +} + +func TestHost_Add(t *testing.T) { + h := New() + testEmptyName(t, h) + testEmptyExpression(t, h) + testNormalPattern(t, h) + testAlreadyExists(t, h) + if err := h.Add("BAD", `(?![0-5])`); err == nil { + t.Error("(Host).Add is missing any bad-regexp error") + } + if len(h) != 1 { + t.Error("wrong patterns count") + } + if err := h.Add("TWODIG", `%{DIGIT}-%{DIGIT}`); err != nil { + t.Errorf("(Host).Add returns non-nil error: %v", err) + } + if len(h) != 2 { + t.Error("wrong patterns count") + } + if err := h.Add("THREE", `%{NOT}-%{EXIST}`); err == nil { + t.Errorf("(Host).Add is missing the-pattern-not-exist error") + } + if len(h) != 2 { + t.Error("wrong patterns count") + } + if err := h.Add("FOUR", `%{DIGIT:one}-%{DIGIT:two}`); err != nil { + t.Errorf("(Host).Add returns non-nil error: %v", err) + } + if len(h) != 3 { + t.Error("wrong patterns count") + } + if err := h.Add("FIVE", `(?!\d)%{DIGIT}(?!\d)`); err == nil { + t.Errorf("(Host).Add is missing an error of regexp") + } + if len(h) != 3 { + t.Error("wrong patterns count") + } + if err := h.Add("SIX", `%{FOUR:four}-%{DIGIT:six}`); err != nil { + t.Errorf("(Host).Add returns non-nil error") + } + if len(h) != 4 { + t.Error("wrong patterns count") + } +} + +func TestHost_Compile(t *testing.T) { + h := New() + if _, err := h.Compile(""); err == nil { + t.Error("(Host).Compile missing ErrEmptyExpression") + } else if err != ErrEmptyExpression { + t.Error("(Host).Compile returns non-ErrEmptyExpression error") + } + if len(h) != 0 { + t.Error("(Host).Compile: (bad) pattern added to host") + } + if p, err := h.Compile(`\d+`); err != nil { + t.Error("(Host).Compile error:", err) + } else if p == nil { + t.Error("(Host).Compile returns nil (and no errors)") + } + if len(h) != 0 { + t.Error("(Host).Compile: pattern added to host") + } +} + +func TestHost_Get(t *testing.T) { + h := New() + if err := h.Add("DIG", `\d`); err != nil { + t.Error(err) + } + if p, err := h.Get("DIG"); err != nil { + t.Error(err) + } else if p == nil { + t.Error("(Host).Get returns nil (and nil-error)") + } + if p, err := h.Get("SEVEN"); err == nil { + t.Error("(Host).Get is missing ErrNotExist") + } else if p != nil { + t.Error("(Host).Get returns non-nil not-exsted-pattern") + } +} + +func tempFile(t *testing.T) (name string) { + f, err := ioutil.TempFile("", "") + if err != nil { + t.Skip("unable to create temporary file") + return + } + defer f.Close() + if _, err = f.Write(make([]byte, bufio.MaxScanTokenSize+1)); err != nil { + t.Skip("unable to write to temporary file") + return + } + return f.Name() +} + +func TestHost_AddFromFile(t *testing.T) { + h := New() + if err := h.AddFromFile(patternsTest); err != nil { + t.Error(err) + } + if len(h) != 3 { + t.Error("wrong patterns count") + } + if _, err := h.Get("ONE"); err != nil { + t.Error(err) + } + if _, err := h.Get("TWO"); err != nil { + t.Error(err) + } + if _, err := h.Get("THREE"); err != nil { + t.Error(err) + } +} + +func TestHost_AddFromFile_malformedPatterns(t *testing.T) { + h := New() + if err := h.AddFromFile(patternsFailTest); err == nil { + t.Error("(Host).AddFromFile (should fail): missing error") + } +} + +func TestHost_AddFromFile_scannerError(t *testing.T) { + h := New() + name := tempFile(t) + t.Log("create tmporary file:", name) + defer os.Remove(name) + if err := h.AddFromFile(name); err == nil { + t.Error("(Host).AddFromFile (should fail): missing error") + } +} + +func TestHost_inject(t *testing.T) { + h := New() + h["TWO"] = `(?!\d)` + if err := h.Add("ONE", `%{TWO:one}`); err == nil { + t.Error("bad injection returns nil error") + } +} + +func TestHost_badPath(t *testing.T) { + h := New() + if err := h.AddFromFile("unexisted-file-without-patterns"); err == nil { + t.Error("bad path with nil error") + } +} + +func TestHost_addFromLine(t *testing.T) { + h := New() + if err := h.addFromLine("ONE (?!\\d)"); err == nil { + t.Error("bad line with nil error") + } +} diff --git a/pkg/grok/pattern_test.go b/pkg/grok/pattern_test.go new file mode 100644 index 0000000000..64d9c160e1 --- /dev/null +++ b/pkg/grok/pattern_test.go @@ -0,0 +1,143 @@ +// +// Copyright (c) 2016-2017 Konstanin Ivanov . +// All rights reserved. This program is free software. It comes without +// any warranty, to the extent permitted by applicable law. You can +// redistribute it and/or modify it under the terms of the Do What +// The Fuck You Want To Public License, Version 2, as published by +// Sam Hocevar. See LICENSE file for more details or see below. +// + +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// Version 2, December 2004 +// +// Copyright (C) 2004 Sam Hocevar +// +// Everyone is permitted to copy and distribute verbatim or modified +// copies of this license document, and changing it is allowed as long +// as the name is changed. +// +// DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +// TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +// +// 0. You just DO WHAT THE FUCK YOU WANT TO. +// + +package grokky + +import ( + "testing" +) + +func terr(t *testing.T, err error) { + if err != nil { + t.Error(err) + } +} + +func mssTest(expect, got map[string]string) bool { + if len(expect) != len(got) { + return false + } + for k, v := range expect { + if v != got[k] { + return false + } + } + return true +} + +func TestPattern_Parse(t *testing.T) { + h := New() + // compile + terr(t, h.Add("ONE", `\d`)) + terr(t, h.Add("TWO", `%{ONE:one}-%{ONE:two}`)) + terr(t, h.Add("THREE", `%{ONE:zero}-%{TWO:three}`)) + // + if p, err := h.Get("ONE"); err != nil { + t.Error(err) + } else if !mssTest(nil, p.Parse("1")) { + t.Error("unnamed result") + } + p, err := h.Get("TWO") + if err != nil { + t.Error(err) + } + if !mssTest(map[string]string{"one": "1", "two": "2"}, p.Parse("1-2")) { + t.Error("bad result") + } + p, err = h.Get("THREE") + if err != nil { + t.Error(err) + } + if !mssTest(map[string]string{ + "one": "1", + "two": "2", + "zero": "0", + "three": "1-2", + }, p.Parse("0-1-2")) { + t.Error("bad result") + } + if err := h.Add("FOUR", `%{TWO:two}`); err != nil { + t.Error(err) + } + p, err = h.Get("FOUR") + if err != nil { + t.Error(err) + } + if !mssTest(map[string]string{"one": "1", "two": "1-2"}, p.Parse("1-2")) { + t.Error("bad result") + } +} + +func TestPattern_nestedGroups(t *testing.T) { + h := New() + if err := h.Add("ONE", `\d`); err != nil { + t.Error(err) + } + if err := h.Add("TWO", `(?:%{ONE:one})-(?:%{ONE:two})?`); err != nil { + t.Error(err) + } + p, err := h.Get("TWO") + if err != nil { + t.Error(err) + } + mss := p.Parse("1-2") + if len(mss) != 2 || + mss["one"] != "1" || + mss["two"] != "2" { + t.Error("bad result") + } + mss = p.Parse("1-") + if len(mss) != 2 || + mss["one"] != "1" || + mss["two"] != "" { + t.Error("bad result") + } +} + +func TestPattern_Names(t *testing.T) { + h := New() + if err := h.Add("ONE", `\d`); err != nil { + t.Error(err) + } + if err := h.Add("TWO", `%{ONE:one}-%{ONE:two}`); err != nil { + t.Error(err) + } + if err := h.Add("THREE", `%{ONE:zero}-%{TWO:three}`); err != nil { + t.Error(err) + } + p, err := h.Get("THREE") + if err != nil { + t.Fatal(err) + } + ss := p.Names() + if len(ss) != 4 { + t.Error("Names returns wrong values count") + } + for _, v := range ss { + if !(v == "one" || v == "two" || v == "zero" || v == "three") { + t.Error("Names returns wrong values:", v) + } + } +} diff --git a/pkg/grok/patterns_fail.txt b/pkg/grok/patterns_fail.txt new file mode 100644 index 0000000000..cfcc672904 --- /dev/null +++ b/pkg/grok/patterns_fail.txt @@ -0,0 +1,2 @@ +ONE \d +TWO %{THREE:two} diff --git a/pkg/grok/patterns_pass.txt b/pkg/grok/patterns_pass.txt new file mode 100644 index 0000000000..0d2992273a --- /dev/null +++ b/pkg/grok/patterns_pass.txt @@ -0,0 +1,10 @@ +# +# for testing +# +ONE \d +TWO %{ONE:two} +THREE %{ONE:one}-%{TWO}-%{ONE:three} + +# +# enough +#