Skip to content

Commit

Permalink
fix #111
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengchun committed Dec 10, 2023
1 parent f30da80 commit cde8fe6
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 7 deletions.
19 changes: 12 additions & 7 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net/http"
"regexp"
"strings"
"sync"

"github.com/antchfx/xpath"
"golang.org/x/net/html/charset"
Expand Down Expand Up @@ -59,6 +60,8 @@ type parser struct {
streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call.
streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev.
reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA.
once sync.Once
space2prefix map[string]string
}

func createParser(r io.Reader) *parser {
Expand All @@ -77,9 +80,11 @@ func createParser(r io.Reader) *parser {
}

func (p *parser) parse() (*Node, error) {
var streamElementNodeCounter int
space2prefix := map[string]string{"http://www.w3.org/XML/1998/namespace": "xml"}
p.once.Do(func() {
p.space2prefix = map[string]string{"http://www.w3.org/XML/1998/namespace": "xml"}
})

var streamElementNodeCounter int
for {
p.reader.StartCaching()
tok, err := p.decoder.Token()
Expand Down Expand Up @@ -108,24 +113,24 @@ func (p *parser) parse() (*Node, error) {

for _, att := range tok.Attr {
if att.Name.Local == "xmlns" {
space2prefix[att.Value] = "" // reset empty if exist the default namespace
p.space2prefix[att.Value] = "" // reset empty if exist the default namespace
// defaultNamespaceURL = att.Value
} else if att.Name.Space == "xmlns" {
// maybe there are have duplicate NamespaceURL?
space2prefix[att.Value] = att.Name.Local
p.space2prefix[att.Value] = att.Name.Local
}
}

if space := tok.Name.Space; space != "" {
if _, found := space2prefix[space]; !found && p.decoder.Strict {
if _, found := p.space2prefix[space]; !found && p.decoder.Strict {
return nil, fmt.Errorf("xmlquery: invalid XML document, namespace %s is missing", space)
}
}

attributes := make([]Attr, len(tok.Attr))
for i, att := range tok.Attr {
name := att.Name
if prefix, ok := space2prefix[name.Space]; ok {
if prefix, ok := p.space2prefix[name.Space]; ok {
name.Space = prefix
}
attributes[i] = Attr{
Expand Down Expand Up @@ -155,7 +160,7 @@ func (p *parser) parse() (*Node, error) {
}

if node.NamespaceURI != "" {
if v, ok := space2prefix[node.NamespaceURI]; ok {
if v, ok := p.space2prefix[node.NamespaceURI]; ok {
cached := string(p.reader.Cache())
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v, node.Data)) {
node.Prefix = v
Expand Down
38 changes: 38 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,41 @@ func TestXMLPreservation(t *testing.T) {
testOutputXML(t, "first call result",
`<?xml version="1.0" encoding="UTF-8"?><AAA><CCC><![CDATA[c1]]></CCC></AAA>`, doc)
}

func TestStreamParser_DefaultNamespace(t *testing.T) {
s := `
<Objects xmlns="http://example.com/schema/2007/someschema">
<Object id="ObjectA">ObjectA</Object>
<Object id="ObjectB">ObjectB</Object>
<Object id="ObjectC">ObjectD</Object>
</Objects>`

sp, err := CreateStreamParser(strings.NewReader(s), "//Objects/*[namespace-uri()=\"http://example.com/schema/2007/someschema\" and local-name()=\"Object\"]")
if err != nil {
t.Fatal(err.Error())
}

n, err := sp.Read()
if err != nil {
t.Fatal(err.Error())
}

var x = `<Object id="ObjectA">ObjectA</Object>`
testOutputXML(t, "first call result", x, n)

n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}

x = `<Object id="ObjectB">ObjectB</Object>`
testOutputXML(t, "second call result", x, n)

n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}

x = `<Object id="ObjectC">ObjectD</Object>`
testOutputXML(t, "third call result", x, n)
}

0 comments on commit cde8fe6

Please sign in to comment.