bblfsh · dennwc · Mar 26, 2019 · Mar 14, 2019 · creachadair · Mar 14, 2019
@@ -19,10 +19,11 @@ var (
 )
 
 var (
-	namespaces = make(map[string]string)
-	package2ns = make(map[string]string)
-	type2name  = make(map[reflect.Type]nodeID)
-	name2type  = make(map[nodeID]reflect.Type)
+	namespaces     = make(map[string]string) // namespace to package
+	package2ns     = make(map[string]string) // package to namespace
+	type2name      = make(map[reflect.Type]nodeID)
+	name2type      = make(map[nodeID]reflect.Type)
+	typeContentKey = make(map[string]string) // ns:type to "content" field name
 )
 
 func parseNodeID(s string) nodeID {
@@ -76,16 +77,36 @@ func RegisterPackage(ns string, types ...interface{}) {
 	package2ns[pkg] = ns
 
 	for _, o := range types {
-		rt := reflect.TypeOf(o)
-		if rt.Kind() == reflect.Ptr {
-			rt = rt.Elem()
+		registerType(ns, o)
+	}
+}
+
+func registerType(ns string, o interface{}) {
+	rt := reflect.TypeOf(o)
+	if rt.Kind() == reflect.Ptr {
+		rt = rt.Elem()
+	}
+	if name, ok := type2name[rt]; ok {
+		panic(fmt.Errorf("type %v already registered under %s name", rt, name))
+	}
+	id := nodeID{NS: ns, Name: rt.Name()}
+	type2name[rt] = id
+	name2type[id] = rt
+	if rt.Kind() != reflect.Struct {
+		return
+	}
+	for i := 0; i < rt.NumField(); i++ {
+		f := rt.Field(i)
+		if f.Anonymous {
+			continue // do not inherit content field
 		}
-		if name, ok := type2name[rt]; ok {
-			panic(fmt.Errorf("type %v already registered under %s name", rt, name))
+		d, err := getFieldDesc(f)
+		if err != nil {
+			panic(err)
+		}
+		if d.Content {
+			typeContentKey[id.String()] = d.Name
 		}
-		name := nodeID{NS: ns, Name: rt.Name()}
-		type2name[rt] = name
-		name2type[name] = rt
 	}
 }
 
@@ -107,7 +128,7 @@ func zeroFieldsTo(obj, opt nodes.Object, rt reflect.Type) error {
 			}
 			continue
 		}
-		name, omit, err := fieldName(f)
+		d, err := getFieldDesc(f)
 		if err != nil {
 			return err
 		}
@@ -124,10 +145,10 @@ func zeroFieldsTo(obj, opt nodes.Object, rt reflect.Type) error {
 		case reflect.Uint, reflect.Uint64, reflect.Uint32, reflect.Uint16, reflect.Uint8:
 			v = nodes.Uint(0)
 		}
-		if omit {
-			opt[name] = v
+		if d.OmitEmpty {
+			opt[d.Name] = v
 		} else {
-			obj[name] = v
+			obj[d.Name] = v
 		}
 	}
 	return nil
@@ -217,23 +238,37 @@ func typeOf(tp reflect.Type) nodeID {
 	return nodeID{NS: ns, Name: name}
 }
 
-func fieldName(f reflect.StructField) (string, bool, error) {
-	name := strings.SplitN(f.Tag.Get("uast"), ",", 2)[0]
-	omitempty := false
-	if name == "" {
+type fieldDesc struct {
+	Name      string
+	OmitEmpty bool
+	Content   bool
+}
+
+func getFieldDesc(f reflect.StructField) (fieldDesc, error) {
+	uastTag := strings.Split(f.Tag.Get("uast"), ",")
+	desc := fieldDesc{
+		Name: uastTag[0],
+	}
+	for _, s := range uastTag[1:] {
+		if s == "content" {
+			desc.Content = true
+			break
+		}
+	}
+	if desc.Name == "" {
 		tags := strings.Split(f.Tag.Get("json"), ",")
 		for _, s := range tags[1:] {
 			if s == "omitempty" {
-				omitempty = true
+				desc.OmitEmpty = true
 				break
 			}
 		}
-		name = tags[0]
+		desc.Name = tags[0]
 	}
-	if name == "" {
-		return "", false, fmt.Errorf("field %s should have uast or json name", f.Name)
+	if desc.Name == "" {
+		return desc, fmt.Errorf("field %s should have uast or json name", f.Name)
 	}
-	return name, omitempty, nil
+	return desc, nil
 }
 
 var (
@@ -344,18 +379,18 @@ func structToNode(obj nodes.Object, rv reflect.Value, rt reflect.Type) error {
 			}
 			continue
 		}
-		name, omit, err := fieldName(ft)
+		d, err := getFieldDesc(ft)
 		if err != nil {
 			return fmt.Errorf("type %s: %v", rt.Name(), err)
 		}
 		v, err := toNodeReflect(f)
 		if err != nil {
 			return err
 		}
-		if v == nil && omit {
+		if v == nil && d.OmitEmpty {
 			continue
 		}
-		obj[name] = v
+		obj[d.Name] = v
 	}
 	return nil
 }
@@ -533,11 +568,11 @@ func nodeToStruct(rv reflect.Value, rt reflect.Type, obj nodes.ExternalObject) e
 			}
 			continue
 		}
-		name, _, err := fieldName(ft)
+		d, err := getFieldDesc(ft)
 		if err != nil {
 			return fmt.Errorf("type %s: %v", rt.Name(), err)
 		}
-		v, ok := obj.ValueAt(name)
+		v, ok := obj.ValueAt(d.Name)
 		if !ok {
 			continue
 		}

@@ -245,6 +245,11 @@ func RolesOf(n nodes.Node) role.Roles {
 }
 
 // TokenOf is a helper for getting node token (see KeyToken).
+//
+// The token is an exact code snippet that represents a given AST node. It only works for
+// primitive nodes like identifiers and string literals, and is only available in Native
+// and Annotated parsing modes. For Semantic mode, see ContentOf.
+//
 // It returns an empty string if the node is not an object, or there is no token.
 func TokenOf(n nodes.Node) string {
 	switch n := n.(type) {
@@ -262,7 +267,7 @@ func TokenOf(n nodes.Node) string {
 	return ""
 }
 
-// Tokens collects all tokens of the tree recursively (pre-order).
+// Tokens collects all tokens of the tree recursively (pre-order). See TokenOf.
 func Tokens(n nodes.Node) []string {
 	var tokens []string
 	nodes.WalkPreOrder(n, func(n nodes.Node) bool {
@@ -276,6 +281,26 @@ func Tokens(n nodes.Node) []string {
 	return tokens
 }
 
+// ContentOf returns any relevant string content of a node. It returns a Name for
+// Identifiers, Value for Strings, etc and uses TokenOf for non-Semantic nodes.
+//
+// The result may not exactly match the source file since values in Semantic nodes
+// are normalized.
+//
+// It returns an empty string if the node has no string content.
+func ContentOf(n nodes.Node) string {
+	if obj, ok := n.(nodes.Object); ok {
+		typ, _ := obj[KeyType].(nodes.String)
+
+		if field, ok := typeContentKey[string(typ)]; ok {
+			// allow nested objects
+			return ContentOf(obj[field])
+		}
+	}
+	// fallback to token
+	return TokenOf(n)
+}
+
 // HashNoPos hashes the node, but skips positional information.
 func HashNoPos(n nodes.External) nodes.Hash {
 	h := nodes.NewHasher()
@@ -310,7 +335,7 @@ type GenNode struct {
 type Identifier struct {
 	GenNode
 	// Name of an entity. Can be any valid UTF8 string.
-	Name string `json:"Name"`
+	Name string `json:"Name" uast:",content"`
 }
 
 // Roles returns a list of UAST node roles that apply to this node.
@@ -337,7 +362,7 @@ type String struct {
 	// Value is a UTF8 string literal value.
 	//
 	// Drivers should remove any quotes and unescape the value according to the language rules.
-	Value string `json:"Value"`
+	Value string `json:"Value" uast:",content"`
 
 	// Format is an optional language-specific string that describes the format of the literal.
 	//
@@ -388,7 +413,7 @@ type Comment struct {
 	//     */
 	//
 	//    only "some comment" is considered a text
-	Text string `json:"Text"`
+	Text string `json:"Text" uast:",content"`
 
 	// Prefix is a set of whitespaces and stylistic characters that appear before
 	// the first line of an actual comment text.
@@ -606,5 +631,5 @@ type Function struct {
 // Bool is a boolean literal.
 type Bool struct {
 	GenNode
-	Value bool `json:"Value"`
+	Value bool `json:"Value" uast:",content"`
 }
@@ -22,6 +22,69 @@ func tObj(typ, tok string) Obj {
 	return obj
 }
 
+func TestContentOf(t *testing.T) {
+	var cases = []struct {
+		name string
+		node interface{}
+		exp  string
+	}{
+		{
+			name: "string",
+			node: nodes.String("a"),
+			exp:  "a",
+		},
+		{
+			name: "int",
+			node: nodes.Int(1),
+			exp:  "1",
+		},
+		{
+			name: "string lit",
+			node: String{
+				Value: "a",
+			},
+			exp: "a",
+		},
+		{
+			name: "bool lit",
+			node: Bool{
+				Value: true,
+			},
+			exp: "true",
+		},
+		{
+			name: "identifier",
+			node: Identifier{
+				Name: "a",
+			},
+			exp: "a",
+		},
+		{
+			name: "comment",
+			node: Comment{
+				Prefix: " ",
+				Text:   "a",
+			},
+			exp: "a",
+		},
+		{
+			name: "array",
+			node: nodes.Array{
+				nodes.String("a"),
+				nodes.String("b"),
+			},
+			exp: "", // TODO(dennwc): define it later if we find a valid use case for it
+		},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			n, err := ToNode(c.node)
+			require.NoError(t, err)
+			require.Equal(t, c.exp, ContentOf(n))
+		})
+	}
+}
+
 func TestPrefixTokens(t *testing.T) {
 	require := require.New(t)