xml_decoder.go raw

   1  package xml
   2  
   3  import (
   4  	"encoding/xml"
   5  	"fmt"
   6  	"strings"
   7  )
   8  
   9  // NodeDecoder is a XML decoder wrapper that is responsible to decoding
  10  // a single XML Node element and it's nested member elements. This wrapper decoder
  11  // takes in the start element of the top level node being decoded.
  12  type NodeDecoder struct {
  13  	Decoder *xml.Decoder
  14  	StartEl xml.StartElement
  15  }
  16  
  17  // WrapNodeDecoder returns an initialized XMLNodeDecoder
  18  func WrapNodeDecoder(decoder *xml.Decoder, startEl xml.StartElement) NodeDecoder {
  19  	return NodeDecoder{
  20  		Decoder: decoder,
  21  		StartEl: startEl,
  22  	}
  23  }
  24  
  25  // Token on a Node Decoder returns a xml StartElement. It returns a boolean that indicates the
  26  // a token is the node decoder's end node token; and an error which indicates any error
  27  // that occurred while retrieving the start element
  28  func (d NodeDecoder) Token() (t xml.StartElement, done bool, err error) {
  29  	for {
  30  		token, e := d.Decoder.Token()
  31  		if e != nil {
  32  			return t, done, e
  33  		}
  34  
  35  		// check if we reach end of the node being decoded
  36  		if el, ok := token.(xml.EndElement); ok {
  37  			return t, el == d.StartEl.End(), err
  38  		}
  39  
  40  		if t, ok := token.(xml.StartElement); ok {
  41  			return restoreAttrNamespaces(t), false, err
  42  		}
  43  
  44  		// skip token if it is a comment or preamble or empty space value due to indentation
  45  		// or if it's a value and is not expected
  46  	}
  47  }
  48  
  49  // restoreAttrNamespaces update XML attributes to restore the short namespaces found within
  50  // the raw XML document.
  51  func restoreAttrNamespaces(node xml.StartElement) xml.StartElement {
  52  	if len(node.Attr) == 0 {
  53  		return node
  54  	}
  55  
  56  	// Generate a mapping of XML namespace values to their short names.
  57  	ns := map[string]string{}
  58  	for _, a := range node.Attr {
  59  		if a.Name.Space == "xmlns" {
  60  			ns[a.Value] = a.Name.Local
  61  			break
  62  		}
  63  	}
  64  
  65  	for i, a := range node.Attr {
  66  		if a.Name.Space == "xmlns" {
  67  			continue
  68  		}
  69  		// By default, xml.Decoder will fully resolve these namespaces. So if you had <foo xmlns:bar=baz bar:bin=hi/>
  70  		// then by default the second attribute would have the `Name.Space` resolved to `baz`. But we need it to
  71  		// continue to resolve as `bar` so we can easily identify it later on.
  72  		if v, ok := ns[node.Attr[i].Name.Space]; ok {
  73  			node.Attr[i].Name.Space = v
  74  		}
  75  	}
  76  	return node
  77  }
  78  
  79  // GetElement looks for the given tag name at the current level, and returns the element if found, and
  80  // skipping over non-matching elements. Returns an error if the node is not found, or if an error occurs while walking
  81  // the document.
  82  func (d NodeDecoder) GetElement(name string) (t xml.StartElement, err error) {
  83  	for {
  84  		token, done, err := d.Token()
  85  		if err != nil {
  86  			return t, err
  87  		}
  88  		if done {
  89  			return t, fmt.Errorf("%s node not found", name)
  90  		}
  91  		switch {
  92  		case strings.EqualFold(name, token.Name.Local):
  93  			return token, nil
  94  		default:
  95  			err = d.Decoder.Skip()
  96  			if err != nil {
  97  				return t, err
  98  			}
  99  		}
 100  	}
 101  }
 102  
 103  // Value provides an abstraction to retrieve char data value within an xml element.
 104  // The method will return an error if it encounters a nested xml element instead of char data.
 105  // This method should only be used to retrieve simple type or blob shape values as []byte.
 106  func (d NodeDecoder) Value() (c []byte, err error) {
 107  	t, e := d.Decoder.Token()
 108  	if e != nil {
 109  		return c, e
 110  	}
 111  
 112  	endElement := d.StartEl.End()
 113  
 114  	switch ev := t.(type) {
 115  	case xml.CharData:
 116  		c = ev.Copy()
 117  	case xml.EndElement: // end tag or self-closing
 118  		if ev == endElement {
 119  			return []byte{}, err
 120  		}
 121  		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
 122  	default:
 123  		return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
 124  	}
 125  
 126  	t, e = d.Decoder.Token()
 127  	if e != nil {
 128  		return c, e
 129  	}
 130  
 131  	if ev, ok := t.(xml.EndElement); ok {
 132  		if ev == endElement {
 133  			return c, err
 134  		}
 135  	}
 136  
 137  	return c, fmt.Errorf("expected end element %v, got %T type %v instead", endElement, t, t)
 138  }
 139  
 140  // FetchRootElement takes in a decoder and returns the first start element within the xml body.
 141  // This function is useful in fetching the start element of an XML response and ignore the
 142  // comments and preamble
 143  func FetchRootElement(decoder *xml.Decoder) (startElement xml.StartElement, err error) {
 144  	for {
 145  		t, e := decoder.Token()
 146  		if e != nil {
 147  			return startElement, e
 148  		}
 149  
 150  		if startElement, ok := t.(xml.StartElement); ok {
 151  			return startElement, err
 152  		}
 153  	}
 154  }
 155