diff --git a/uast/types.go b/uast/types.go index 4a3e93e7..a594abec 100644 --- a/uast/types.go +++ b/uast/types.go @@ -19,10 +19,11 @@ var ( ) var ( - namespaces = make(map[string]string) - package2ns = make(map[string]string) - type2name = make(map[reflect.Type]nodeID) - name2type = make(map[nodeID]reflect.Type) + namespaces = make(map[string]string) // namespace to package + package2ns = make(map[string]string) // package to namespace + type2name = make(map[reflect.Type]nodeID) + name2type = make(map[nodeID]reflect.Type) + typeContentKey = make(map[string]string) // ns:type to "content" field name ) func parseNodeID(s string) nodeID { @@ -76,16 +77,36 @@ func RegisterPackage(ns string, types ...interface{}) { package2ns[pkg] = ns for _, o := range types { - rt := reflect.TypeOf(o) - if rt.Kind() == reflect.Ptr { - rt = rt.Elem() + registerType(ns, o) + } +} + +func registerType(ns string, o interface{}) { + rt := reflect.TypeOf(o) + if rt.Kind() == reflect.Ptr { + rt = rt.Elem() + } + if name, ok := type2name[rt]; ok { + panic(fmt.Errorf("type %v already registered under %s name", rt, name)) + } + id := nodeID{NS: ns, Name: rt.Name()} + type2name[rt] = id + name2type[id] = rt + if rt.Kind() != reflect.Struct { + return + } + for i := 0; i < rt.NumField(); i++ { + f := rt.Field(i) + if f.Anonymous { + continue // do not inherit content field } - if name, ok := type2name[rt]; ok { - panic(fmt.Errorf("type %v already registered under %s name", rt, name)) + d, err := getFieldDesc(f) + if err != nil { + panic(err) + } + if d.Content { + typeContentKey[id.String()] = d.Name } - name := nodeID{NS: ns, Name: rt.Name()} - type2name[rt] = name - name2type[name] = rt } } @@ -107,7 +128,7 @@ func zeroFieldsTo(obj, opt nodes.Object, rt reflect.Type) error { } continue } - name, omit, err := fieldName(f) + d, err := getFieldDesc(f) if err != nil { return err } @@ -124,10 +145,10 @@ func zeroFieldsTo(obj, opt nodes.Object, rt reflect.Type) error { case reflect.Uint, reflect.Uint64, reflect.Uint32, reflect.Uint16, reflect.Uint8: v = nodes.Uint(0) } - if omit { - opt[name] = v + if d.OmitEmpty { + opt[d.Name] = v } else { - obj[name] = v + obj[d.Name] = v } } return nil @@ -217,23 +238,37 @@ func typeOf(tp reflect.Type) nodeID { return nodeID{NS: ns, Name: name} } -func fieldName(f reflect.StructField) (string, bool, error) { - name := strings.SplitN(f.Tag.Get("uast"), ",", 2)[0] - omitempty := false - if name == "" { +type fieldDesc struct { + Name string + OmitEmpty bool + Content bool +} + +func getFieldDesc(f reflect.StructField) (fieldDesc, error) { + uastTag := strings.Split(f.Tag.Get("uast"), ",") + desc := fieldDesc{ + Name: uastTag[0], + } + for _, s := range uastTag[1:] { + if s == "content" { + desc.Content = true + break + } + } + if desc.Name == "" { tags := strings.Split(f.Tag.Get("json"), ",") for _, s := range tags[1:] { if s == "omitempty" { - omitempty = true + desc.OmitEmpty = true break } } - name = tags[0] + desc.Name = tags[0] } - if name == "" { - return "", false, fmt.Errorf("field %s should have uast or json name", f.Name) + if desc.Name == "" { + return desc, fmt.Errorf("field %s should have uast or json name", f.Name) } - return name, omitempty, nil + return desc, nil } var ( @@ -344,7 +379,7 @@ func structToNode(obj nodes.Object, rv reflect.Value, rt reflect.Type) error { } continue } - name, omit, err := fieldName(ft) + d, err := getFieldDesc(ft) if err != nil { return fmt.Errorf("type %s: %v", rt.Name(), err) } @@ -352,10 +387,10 @@ func structToNode(obj nodes.Object, rv reflect.Value, rt reflect.Type) error { if err != nil { return err } - if v == nil && omit { + if v == nil && d.OmitEmpty { continue } - obj[name] = v + obj[d.Name] = v } return nil } @@ -533,11 +568,11 @@ func nodeToStruct(rv reflect.Value, rt reflect.Type, obj nodes.ExternalObject) e } continue } - name, _, err := fieldName(ft) + d, err := getFieldDesc(ft) if err != nil { return fmt.Errorf("type %s: %v", rt.Name(), err) } - v, ok := obj.ValueAt(name) + v, ok := obj.ValueAt(d.Name) if !ok { continue } diff --git a/uast/uast.go b/uast/uast.go index 7a54db42..12e8a5ce 100644 --- a/uast/uast.go +++ b/uast/uast.go @@ -245,6 +245,11 @@ func RolesOf(n nodes.Node) role.Roles { } // TokenOf is a helper for getting node token (see KeyToken). +// +// The token is an exact code snippet that represents a given AST node. It only works for +// primitive nodes like identifiers and string literals, and is only available in Native +// and Annotated parsing modes. For Semantic mode, see ContentOf. +// // It returns an empty string if the node is not an object, or there is no token. func TokenOf(n nodes.Node) string { switch n := n.(type) { @@ -262,7 +267,7 @@ func TokenOf(n nodes.Node) string { return "" } -// Tokens collects all tokens of the tree recursively (pre-order). +// Tokens collects all tokens of the tree recursively (pre-order). See TokenOf. func Tokens(n nodes.Node) []string { var tokens []string nodes.WalkPreOrder(n, func(n nodes.Node) bool { @@ -276,6 +281,26 @@ func Tokens(n nodes.Node) []string { return tokens } +// ContentOf returns any relevant string content of a node. It returns a Name for +// Identifiers, Value for Strings, etc and uses TokenOf for non-Semantic nodes. +// +// The result may not exactly match the source file since values in Semantic nodes +// are normalized. +// +// It returns an empty string if the node has no string content. +func ContentOf(n nodes.Node) string { + if obj, ok := n.(nodes.Object); ok { + typ, _ := obj[KeyType].(nodes.String) + + if field, ok := typeContentKey[string(typ)]; ok { + // allow nested objects + return ContentOf(obj[field]) + } + } + // fallback to token + return TokenOf(n) +} + // HashNoPos hashes the node, but skips positional information. func HashNoPos(n nodes.External) nodes.Hash { h := nodes.NewHasher() @@ -310,7 +335,7 @@ type GenNode struct { type Identifier struct { GenNode // Name of an entity. Can be any valid UTF8 string. - Name string `json:"Name"` + Name string `json:"Name" uast:",content"` } // Roles returns a list of UAST node roles that apply to this node. @@ -337,7 +362,7 @@ type String struct { // Value is a UTF8 string literal value. // // Drivers should remove any quotes and unescape the value according to the language rules. - Value string `json:"Value"` + Value string `json:"Value" uast:",content"` // Format is an optional language-specific string that describes the format of the literal. // @@ -388,7 +413,7 @@ type Comment struct { // */ // // only "some comment" is considered a text - Text string `json:"Text"` + Text string `json:"Text" uast:",content"` // Prefix is a set of whitespaces and stylistic characters that appear before // the first line of an actual comment text. @@ -606,5 +631,5 @@ type Function struct { // Bool is a boolean literal. type Bool struct { GenNode - Value bool `json:"Value"` + Value bool `json:"Value" uast:",content"` } diff --git a/uast/uast_test.go b/uast/uast_test.go index ea6ebc8b..43622df1 100644 --- a/uast/uast_test.go +++ b/uast/uast_test.go @@ -22,6 +22,69 @@ func tObj(typ, tok string) Obj { return obj } +func TestContentOf(t *testing.T) { + var cases = []struct { + name string + node interface{} + exp string + }{ + { + name: "string", + node: nodes.String("a"), + exp: "a", + }, + { + name: "int", + node: nodes.Int(1), + exp: "1", + }, + { + name: "string lit", + node: String{ + Value: "a", + }, + exp: "a", + }, + { + name: "bool lit", + node: Bool{ + Value: true, + }, + exp: "true", + }, + { + name: "identifier", + node: Identifier{ + Name: "a", + }, + exp: "a", + }, + { + name: "comment", + node: Comment{ + Prefix: " ", + Text: "a", + }, + exp: "a", + }, + { + name: "array", + node: nodes.Array{ + nodes.String("a"), + nodes.String("b"), + }, + exp: "", // TODO(dennwc): define it later if we find a valid use case for it + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + n, err := ToNode(c.node) + require.NoError(t, err) + require.Equal(t, c.exp, ContentOf(n)) + }) + } +} + func TestPrefixTokens(t *testing.T) { require := require.New(t)