xmlseq.go raw

   1  // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file
   4  
   5  // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
   6  // Also, handles comments, directives and process instructions.
   7  
   8  package mxj
   9  
  10  import (
  11  	"bytes"
  12  	"encoding/xml"
  13  	"errors"
  14  	"fmt"
  15  	"io"
  16  	"regexp"
  17  	"sort"
  18  	"strings"
  19  )
  20  
  21  // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
  22  // the XML elements when the map[string]interface{} is marshaled. Element attributes are
  23  // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
  24  // value instead of denoting the keys with a prefix character.  Also, comments, directives and
  25  // process instructions are preserved.
  26  type MapSeq map[string]interface{}
  27  
  28  // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
  29  // in the XML data stream and the element is not contained in an XML object with a root element.
  30  var NoRoot = errors.New("no root key")
  31  var NO_ROOT = NoRoot // maintain backwards compatibility
  32  
  33  // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
  34  
  35  // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
  36  // as map["#seq"]<int value>.
  37  // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
  38  // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
  39  //	• attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
  40  //	• all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
  41  //	• lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
  42  //	  include a "#seq" k:v pair based on sequence they are decoded.  Thus, XML like:
  43  //	      <doc>
  44  //	         <ltag>value 1</ltag>
  45  //	         <newtag>value 2</newtag>
  46  //	         <ltag>value 3</ltag>
  47  //	      </doc>
  48  //	  is decoded as:
  49  //	    doc :
  50  //	      ltag :[[]interface{}]
  51  //	        [item: 0]
  52  //	          #seq :[int] 0
  53  //	          #text :[string] value 1
  54  //	        [item: 1]
  55  //	          #seq :[int] 2
  56  //	          #text :[string] value 3
  57  //	      newtag :
  58  //	        #seq :[int] 1
  59  //	        #text :[string] value 2
  60  //	  It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
  61  //	• comments - "<!--comment-->" -  are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
  62  //	• directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
  63  //	• process instructions  - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
  64  //	  is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
  65  //	• comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
  66  //	  map[string]interface{} and the error value 'NoRoot'.
  67  //	• note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
  68  //	   and: "\r\n" is converted to "\n"
  69  //
  70  //	NOTES:
  71  //	   1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
  72  //	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
  73  //	   2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
  74  //	      re-encode the message in its original structure.
  75  //	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
  76  //
  77  //	NAME SPACES:
  78  //	   1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
  79  //	      "<prefix>:" notation rather than stripping it as with NewMapXml().
  80  //	   2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
  81  //
  82  //	ERRORS:
  83  //	   1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
  84  //	      "#directive" or #procinst" key.
  85  //	   2. Unmarshaling an XML doc that is formatted using the whitespace character, " ", will error, since
  86  //	      Decoder.RawToken treats such occurances as significant. See NewMapFormattedXmlSeq().
  87  func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
  88  	var r bool
  89  	if len(cast) == 1 {
  90  		r = cast[0]
  91  	}
  92  	return xmlSeqToMap(xmlVal, r)
  93  }
  94  
  95  // NewMapFormattedXmlSeq performs the same as NewMapXmlSeq but is useful for processing XML objects that
  96  // are formatted using the whitespace character, " ".  (The stdlib xml.Decoder, by default, treats all
  97  // whitespace as significant; Decoder.Token() and Decoder.RawToken() will return strings of one or more
  98  // whitespace characters and without alphanumeric or punctuation characters as xml.CharData values.)
  99  //
 100  // If you're processing such XML, then this will convert all occurrences of whitespace-only strings
 101  // into an empty string, "", prior to parsing the XML - irrespective of whether the occurrence is
 102  // formatting or is a actual element value.
 103  func NewMapFormattedXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
 104  	var c bool
 105  	if len(cast) == 1 {
 106  		c = cast[0]
 107  	}
 108  
 109  	// Per PR #104 - clean out formatting characters so they don't show up in Decoder.RawToken() stream.
 110  	// NOTE: Also replaces element values that are solely comprised of formatting/whitespace characters
 111  	// with empty string, "".
 112  	r := regexp.MustCompile(`>[\n\t\r ]*<`)
 113  	xmlVal = r.ReplaceAll(xmlVal, []byte("><"))
 114  	return xmlSeqToMap(xmlVal, c)
 115  }
 116  
 117  // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
 118  //	NOTES:
 119  //	   1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
 120  //	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
 121  //	   2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
 122  //	      re-encode the message in its original structure.
 123  //	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
 124  //
 125  //	ERRORS:
 126  //	   1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
 127  //	      "#directive" or #procinst" key.
 128  func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
 129  	var r bool
 130  	if len(cast) == 1 {
 131  		r = cast[0]
 132  	}
 133  
 134  	// We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
 135  	// will wrap it in a bufio.Reader and seek on the file beyond where the
 136  	// xml.Decoder parses!
 137  	if _, ok := xmlReader.(io.ByteReader); !ok {
 138  		xmlReader = myByteReader(xmlReader) // see code at EOF
 139  	}
 140  
 141  	// build the map
 142  	return xmlSeqReaderToMap(xmlReader, r)
 143  }
 144  
 145  // NewMapXmlSeqReaderRaw returns the  next XML doc from  an io.Reader as a MapSeq value.
 146  // Returns MapSeq value, slice with the raw XML, and any error.
 147  //	NOTES:
 148  //	   1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
 149  //	      using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
 150  //	      See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
 151  //	      data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
 152  //	      you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
 153  //	    2. The 'raw' return value may be larger than the XML text value.
 154  //	    3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
 155  //	       extraneous xml.CharData will be ignored unless io.EOF is reached first.
 156  //	    4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
 157  //	       re-encode the message in its original structure.
 158  //	    5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
 159  //
 160  //	ERRORS:
 161  //	    1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
 162  //	       "#directive" or #procinst" key.
 163  func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
 164  	var r bool
 165  	if len(cast) == 1 {
 166  		r = cast[0]
 167  	}
 168  	// create TeeReader so we can retrieve raw XML
 169  	buf := make([]byte, 0)
 170  	wb := bytes.NewBuffer(buf)
 171  	trdr := myTeeReader(xmlReader, wb)
 172  
 173  	m, err := xmlSeqReaderToMap(trdr, r)
 174  
 175  	// retrieve the raw XML that was decoded
 176  	b := wb.Bytes()
 177  
 178  	// err may be NoRoot
 179  	return m, b, err
 180  }
 181  
 182  // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
 183  func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
 184  	// parse the Reader
 185  	p := xml.NewDecoder(rdr)
 186  	if CustomDecoder != nil {
 187  		useCustomDecoder(p)
 188  	} else {
 189  		p.CharsetReader = XmlCharsetReader
 190  	}
 191  	return xmlSeqToMapParser("", nil, p, r)
 192  }
 193  
 194  // xmlSeqToMap - convert a XML doc into map[string]interface{} value
 195  func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
 196  	b := bytes.NewReader(doc)
 197  	p := xml.NewDecoder(b)
 198  	if CustomDecoder != nil {
 199  		useCustomDecoder(p)
 200  	} else {
 201  		p.CharsetReader = XmlCharsetReader
 202  	}
 203  	return xmlSeqToMapParser("", nil, p, r)
 204  }
 205  
 206  // ===================================== where the work happens =============================
 207  
 208  // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
 209  // Add #seq tag value for each element decoded - to be used for Encoding later.
 210  func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
 211  	if snakeCaseKeys {
 212  		skey = strings.Replace(skey, "-", "_", -1)
 213  	}
 214  
 215  	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
 216  	var n, na map[string]interface{}
 217  	var seq int // for including seq num when decoding
 218  
 219  	// Allocate maps and load attributes, if any.
 220  	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
 221  	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
 222  	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
 223  	if skey != "" {
 224  		// 'n' only needs one slot - save call to runtime•hashGrow()
 225  		// 'na' we don't know
 226  		n = make(map[string]interface{}, 1)
 227  		na = make(map[string]interface{})
 228  		if len(a) > 0 {
 229  			// xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
 230  			// where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
 231  			aa := make(map[string]interface{}, len(a))
 232  			for i, v := range a {
 233  				if snakeCaseKeys {
 234  					v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
 235  				}
 236  				if xmlEscapeCharsDecoder { // per issue#84
 237  					v.Value = escapeChars(v.Value)
 238  				}
 239  				if len(v.Name.Space) > 0 {
 240  					aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
 241  				} else {
 242  					aa[v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
 243  				}
 244  			}
 245  			na[attrK] = aa
 246  		}
 247  	}
 248  
 249  	// Return XMPP <stream:stream> message.
 250  	if handleXMPPStreamTag && skey == "stream:stream" {
 251  		n[skey] = na
 252  		return n, nil
 253  	}
 254  
 255  	for {
 256  		t, err := p.RawToken()
 257  		if err != nil {
 258  			if err != io.EOF {
 259  				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
 260  			}
 261  			return nil, err
 262  		}
 263  		switch t.(type) {
 264  		case xml.StartElement:
 265  			tt := t.(xml.StartElement)
 266  
 267  			// First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
 268  			// So when the loop is first entered, the first token is the root tag along
 269  			// with any attributes, which we process here.
 270  			//
 271  			// Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
 272  			// processing before getting the next token which is the element value,
 273  			// which is done above.
 274  			if skey == "" {
 275  				if len(tt.Name.Space) > 0 {
 276  					return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
 277  				} else {
 278  					return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
 279  				}
 280  			}
 281  
 282  			// If not initializing the map, parse the element.
 283  			// len(nn) == 1, necessarily - it is just an 'n'.
 284  			var nn map[string]interface{}
 285  			if len(tt.Name.Space) > 0 {
 286  				nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
 287  			} else {
 288  				nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
 289  			}
 290  			if err != nil {
 291  				return nil, err
 292  			}
 293  
 294  			// The nn map[string]interface{} value is a na[nn_key] value.
 295  			// We need to see if nn_key already exists - means we're parsing a list.
 296  			// This may require converting na[nn_key] value into []interface{} type.
 297  			// First, extract the key:val for the map - it's a singleton.
 298  			var key string
 299  			var val interface{}
 300  			for key, val = range nn {
 301  				break
 302  			}
 303  
 304  			// add "#seq" k:v pair -
 305  			// Sequence number included even in list elements - this should allow us
 306  			// to properly resequence even something goofy like:
 307  			//     <list>item 1</list>
 308  			//     <subelement>item 2</subelement>
 309  			//     <list>item 3</list>
 310  			// where all the "list" subelements are decoded into an array.
 311  			switch val.(type) {
 312  			case map[string]interface{}:
 313  				val.(map[string]interface{})[seqK] = seq
 314  				seq++
 315  			case interface{}: // a non-nil simple element: string, float64, bool
 316  				v := map[string]interface{}{textK: val, seqK: seq}
 317  				seq++
 318  				val = v
 319  			}
 320  
 321  			// 'na' holding sub-elements of n.
 322  			// See if 'key' already exists.
 323  			// If 'key' exists, then this is a list, if not just add key:val to na.
 324  			if v, ok := na[key]; ok {
 325  				var a []interface{}
 326  				switch v.(type) {
 327  				case []interface{}:
 328  					a = v.([]interface{})
 329  				default: // anything else - note: v.(type) != nil
 330  					a = []interface{}{v}
 331  				}
 332  				a = append(a, val)
 333  				na[key] = a
 334  			} else {
 335  				na[key] = val // save it as a singleton
 336  			}
 337  		case xml.EndElement:
 338  			if skey != "" {
 339  				tt := t.(xml.EndElement)
 340  				if snakeCaseKeys {
 341  					tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
 342  				}
 343  				var name string
 344  				if len(tt.Name.Space) > 0 {
 345  					name = tt.Name.Space + `:` + tt.Name.Local
 346  				} else {
 347  					name = tt.Name.Local
 348  				}
 349  				if skey != name {
 350  					return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
 351  						skey, name, p.InputOffset())
 352  				}
 353  			}
 354  			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
 355  			if len(n) == 0 {
 356  				// If len(na)==0 we have an empty element == "";
 357  				// it has no xml.Attr nor xml.CharData.
 358  				// Empty element content will be  map["etag"]map["#text"]""
 359  				// after #seq injection - map["etag"]map["#seq"]seq - after return.
 360  				if len(na) > 0 {
 361  					n[skey] = na
 362  				} else {
 363  					n[skey] = "" // empty element
 364  				}
 365  			}
 366  			return n, nil
 367  		case xml.CharData:
 368  			// clean up possible noise
 369  			tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
 370  			if xmlEscapeCharsDecoder { // issue#84
 371  				tt = escapeChars(tt)
 372  			}
 373  			if skey == "" {
 374  				// per Adrian (http://www.adrianlungu.com/) catch stray text
 375  				// in decoder stream -
 376  				// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
 377  				// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
 378  				// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
 379  				continue
 380  			}
 381  			if len(tt) > 0 {
 382  				// every simple element is a #text and has #seq associated with it
 383  				na[textK] = cast(tt, r, "")
 384  				na[seqK] = seq
 385  				seq++
 386  			}
 387  		case xml.Comment:
 388  			if n == nil { // no root 'key'
 389  				n = map[string]interface{}{commentK: string(t.(xml.Comment))}
 390  				return n, NoRoot
 391  			}
 392  			cm := make(map[string]interface{}, 2)
 393  			cm[textK] = string(t.(xml.Comment))
 394  			cm[seqK] = seq
 395  			seq++
 396  			na[commentK] = cm
 397  		case xml.Directive:
 398  			if n == nil { // no root 'key'
 399  				n = map[string]interface{}{directiveK: string(t.(xml.Directive))}
 400  				return n, NoRoot
 401  			}
 402  			dm := make(map[string]interface{}, 2)
 403  			dm[textK] = string(t.(xml.Directive))
 404  			dm[seqK] = seq
 405  			seq++
 406  			na[directiveK] = dm
 407  		case xml.ProcInst:
 408  			if n == nil {
 409  				na = map[string]interface{}{targetK: t.(xml.ProcInst).Target, instK: string(t.(xml.ProcInst).Inst)}
 410  				n = map[string]interface{}{procinstK: na}
 411  				return n, NoRoot
 412  			}
 413  			pm := make(map[string]interface{}, 3)
 414  			pm[targetK] = t.(xml.ProcInst).Target
 415  			pm[instK] = string(t.(xml.ProcInst).Inst)
 416  			pm[seqK] = seq
 417  			seq++
 418  			na[procinstK] = pm
 419  		default:
 420  			// noop - shouldn't ever get here, now, since we handle all token types
 421  		}
 422  	}
 423  }
 424  
 425  // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
 426  
 427  // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
 428  
 429  // Xml encodes a MapSeq as XML with elements sorted on #seq.  The companion of NewMapXmlSeq().
 430  // The following rules apply.
 431  //    - The "#seq" key value is used to seqence the subelements or attributes only.
 432  //    - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
 433  //    - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
 434  //    - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
 435  //    - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
 436  //      map entries - <?target inst?>.
 437  //    - Value type encoding:
 438  //          > string, bool, float64, int, int32, int64, float32: per "%v" formating
 439  //          > []bool, []uint8: by casting to string
 440  //          > structures, etc.: handed to xml.Marshal() - if there is an error, the element
 441  //            value is "UNKNOWN"
 442  //    - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
 443  //    - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
 444  //      Thus, `{ "key":"value" }` encodes as "<key>value</key>".
 445  func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
 446  	m := map[string]interface{}(mv)
 447  	var err error
 448  	s := new(string)
 449  	p := new(pretty) // just a stub
 450  
 451  	if len(m) == 1 && len(rootTag) == 0 {
 452  		for key, value := range m {
 453  			// if it's an array, see if all values are map[string]interface{}
 454  			// we force a new root tag if we'll end up with no key:value in the list
 455  			// so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
 456  			switch value.(type) {
 457  			case []interface{}:
 458  				for _, v := range value.([]interface{}) {
 459  					switch v.(type) {
 460  					case map[string]interface{}: // noop
 461  					default: // anything else
 462  						err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
 463  						goto done
 464  					}
 465  				}
 466  			}
 467  			err = mapToXmlSeqIndent(false, s, key, value, p)
 468  		}
 469  	} else if len(rootTag) == 1 {
 470  		err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
 471  	} else {
 472  		err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
 473  	}
 474  done:
 475  	if xmlCheckIsValid {
 476  		d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
 477  		for {
 478  			_, err = d.Token()
 479  			if err == io.EOF {
 480  				err = nil
 481  				break
 482  			} else if err != nil {
 483  				return nil, err
 484  			}
 485  		}
 486  	}
 487  	return []byte(*s), err
 488  }
 489  
 490  // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
 491  // The names will also provide a key for the number of return arguments.
 492  
 493  // XmlWriter Writes the MapSeq value as  XML on the Writer.
 494  // See MapSeq.Xml() for encoding rules.
 495  func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
 496  	x, err := mv.Xml(rootTag...)
 497  	if err != nil {
 498  		return err
 499  	}
 500  
 501  	_, err = xmlWriter.Write(x)
 502  	return err
 503  }
 504  
 505  // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
 506  // See Map.XmlSeq() for encoding rules.
 507  /*
 508  func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
 509  	x, err := mv.Xml(rootTag...)
 510  	if err != nil {
 511  		return x, err
 512  	}
 513  
 514  	_, err = xmlWriter.Write(x)
 515  	return x, err
 516  }
 517  */
 518  
 519  // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
 520  // See MapSeq.Xml() for encoding rules.
 521  func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
 522  	x, err := mv.XmlIndent(prefix, indent, rootTag...)
 523  	if err != nil {
 524  		return err
 525  	}
 526  
 527  	_, err = xmlWriter.Write(x)
 528  	return err
 529  }
 530  
 531  // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
 532  // See Map.XmlSeq() for encoding rules.
 533  /*
 534  func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
 535  	x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
 536  	if err != nil {
 537  		return x, err
 538  	}
 539  
 540  	_, err = xmlWriter.Write(x)
 541  	return x, err
 542  }
 543  */
 544  
 545  // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
 546  
 547  // ---------------------- XmlSeqIndent ----------------------------
 548  
 549  // XmlIndent encodes a map[string]interface{} as a pretty XML string.
 550  // See MapSeq.XmlSeq() for encoding rules.
 551  func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
 552  	m := map[string]interface{}(mv)
 553  
 554  	var err error
 555  	s := new(string)
 556  	p := new(pretty)
 557  	p.indent = indent
 558  	p.padding = prefix
 559  
 560  	if len(m) == 1 && len(rootTag) == 0 {
 561  		// this can extract the key for the single map element
 562  		// use it if it isn't a key for a list
 563  		for key, value := range m {
 564  			if _, ok := value.([]interface{}); ok {
 565  				err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
 566  			} else {
 567  				err = mapToXmlSeqIndent(true, s, key, value, p)
 568  			}
 569  		}
 570  	} else if len(rootTag) == 1 {
 571  		err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
 572  	} else {
 573  		err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
 574  	}
 575  	if xmlCheckIsValid {
 576  		if _, err = NewMapXml([]byte(*s)); err != nil {
 577  			return nil, err
 578  		}
 579  		d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
 580  		for {
 581  			_, err = d.Token()
 582  			if err == io.EOF {
 583  				err = nil
 584  				break
 585  			} else if err != nil {
 586  				return nil, err
 587  			}
 588  		}
 589  	}
 590  	return []byte(*s), err
 591  }
 592  
 593  // where the work actually happens
 594  // returns an error if an attribute is not atomic
 595  func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
 596  	var endTag bool
 597  	var isSimple bool
 598  	var noEndTag bool
 599  	var elen int
 600  	var ss string
 601  	p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
 602  
 603  	switch value.(type) {
 604  	case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
 605  		if doIndent {
 606  			*s += p.padding
 607  		}
 608  		if key != commentK && key != directiveK && key != procinstK {
 609  			*s += `<` + key
 610  		}
 611  	}
 612  	switch value.(type) {
 613  	case map[string]interface{}:
 614  		val := value.(map[string]interface{})
 615  
 616  		if key == commentK {
 617  			*s += `<!--` + val[textK].(string) + `-->`
 618  			noEndTag = true
 619  			break
 620  		}
 621  
 622  		if key == directiveK {
 623  			*s += `<!` + val[textK].(string) + `>`
 624  			noEndTag = true
 625  			break
 626  		}
 627  
 628  		if key == procinstK {
 629  			*s += `<?` + val[targetK].(string) + ` ` + val[instK].(string) + `?>`
 630  			noEndTag = true
 631  			break
 632  		}
 633  
 634  		haveAttrs := false
 635  		// process attributes first
 636  		if v, ok := val[attrK].(map[string]interface{}); ok {
 637  			// First, unroll the map[string]interface{} into a []keyval array.
 638  			// Then sequence it.
 639  			kv := make([]keyval, len(v))
 640  			n := 0
 641  			for ak, av := range v {
 642  				kv[n] = keyval{ak, av}
 643  				n++
 644  			}
 645  			sort.Sort(elemListSeq(kv))
 646  			// Now encode the attributes in original decoding sequence, using keyval array.
 647  			for _, a := range kv {
 648  				vv := a.v.(map[string]interface{})
 649  				switch vv[textK].(type) {
 650  				case string:
 651  					if xmlEscapeChars {
 652  						ss = escapeChars(vv[textK].(string))
 653  					} else {
 654  						ss = vv[textK].(string)
 655  					}
 656  					*s += ` ` + a.k + `="` + ss + `"`
 657  				case float64, bool, int, int32, int64, float32:
 658  					*s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv[textK]) + `"`
 659  				case []byte:
 660  					if xmlEscapeChars {
 661  						ss = escapeChars(string(vv[textK].([]byte)))
 662  					} else {
 663  						ss = string(vv[textK].([]byte))
 664  					}
 665  					*s += ` ` + a.k + `="` + ss + `"`
 666  				default:
 667  					return fmt.Errorf("invalid attribute value for: %s", a.k)
 668  				}
 669  			}
 670  			haveAttrs = true
 671  		}
 672  
 673  		// simple element?
 674  		// every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
 675  		_, seqOK := val[seqK] // have key
 676  		if v, ok := val[textK]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
 677  			if stmp, ok := v.(string); ok && stmp != "" {
 678  				if xmlEscapeChars {
 679  					stmp = escapeChars(stmp)
 680  				}
 681  				*s += ">" + stmp
 682  				endTag = true
 683  				elen = 1
 684  			}
 685  			isSimple = true
 686  			break
 687  		} else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
 688  			// here no #text but have #seq or #seq+#attr
 689  			endTag = false
 690  			break
 691  		}
 692  
 693  		// we now need to sequence everything except attributes
 694  		// 'kv' will hold everything that needs to be written
 695  		kv := make([]keyval, 0)
 696  		for k, v := range val {
 697  			if k == attrK { // already processed
 698  				continue
 699  			}
 700  			if k == seqK { // ignore - just for sorting
 701  				continue
 702  			}
 703  			switch v.(type) {
 704  			case []interface{}:
 705  				// unwind the array as separate entries
 706  				for _, vv := range v.([]interface{}) {
 707  					kv = append(kv, keyval{k, vv})
 708  				}
 709  			default:
 710  				kv = append(kv, keyval{k, v})
 711  			}
 712  		}
 713  
 714  		// close tag with possible attributes
 715  		*s += ">"
 716  		if doIndent {
 717  			*s += "\n"
 718  		}
 719  		// something more complex
 720  		p.mapDepth++
 721  		sort.Sort(elemListSeq(kv))
 722  		i := 0
 723  		for _, v := range kv {
 724  			switch v.v.(type) {
 725  			case []interface{}:
 726  			default:
 727  				if i == 0 && doIndent {
 728  					p.Indent()
 729  				}
 730  			}
 731  			i++
 732  			if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
 733  				return err
 734  			}
 735  			switch v.v.(type) {
 736  			case []interface{}: // handled in []interface{} case
 737  			default:
 738  				if doIndent {
 739  					p.Outdent()
 740  				}
 741  			}
 742  			i--
 743  		}
 744  		p.mapDepth--
 745  		endTag = true
 746  		elen = 1 // we do have some content other than attrs
 747  	case []interface{}:
 748  		for _, v := range value.([]interface{}) {
 749  			if doIndent {
 750  				p.Indent()
 751  			}
 752  			if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
 753  				return err
 754  			}
 755  			if doIndent {
 756  				p.Outdent()
 757  			}
 758  		}
 759  		return nil
 760  	case nil:
 761  		// terminate the tag
 762  		if doIndent {
 763  			*s += p.padding
 764  		}
 765  		*s += "<" + key
 766  		endTag, isSimple = true, true
 767  		break
 768  	default: // handle anything - even goofy stuff
 769  		elen = 0
 770  		switch value.(type) {
 771  		case string:
 772  			if xmlEscapeChars {
 773  				ss = escapeChars(value.(string))
 774  			} else {
 775  				ss = value.(string)
 776  			}
 777  			elen = len(ss)
 778  			if elen > 0 {
 779  				*s += ">" + ss
 780  			}
 781  		case float64, bool, int, int32, int64, float32:
 782  			v := fmt.Sprintf("%v", value)
 783  			elen = len(v)
 784  			if elen > 0 {
 785  				*s += ">" + v
 786  			}
 787  		case []byte: // NOTE: byte is just an alias for uint8
 788  			// similar to how xml.Marshal handles []byte structure members
 789  			if xmlEscapeChars {
 790  				ss = escapeChars(string(value.([]byte)))
 791  			} else {
 792  				ss = string(value.([]byte))
 793  			}
 794  			elen = len(ss)
 795  			if elen > 0 {
 796  				*s += ">" + ss
 797  			}
 798  		default:
 799  			var v []byte
 800  			var err error
 801  			if doIndent {
 802  				v, err = xml.MarshalIndent(value, p.padding, p.indent)
 803  			} else {
 804  				v, err = xml.Marshal(value)
 805  			}
 806  			if err != nil {
 807  				*s += ">UNKNOWN"
 808  			} else {
 809  				elen = len(v)
 810  				if elen > 0 {
 811  					*s += string(v)
 812  				}
 813  			}
 814  		}
 815  		isSimple = true
 816  		endTag = true
 817  	}
 818  	if endTag && !noEndTag {
 819  		if doIndent {
 820  			if !isSimple {
 821  				*s += p.padding
 822  			}
 823  		}
 824  		switch value.(type) {
 825  		case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
 826  			if elen > 0 || useGoXmlEmptyElemSyntax {
 827  				if elen == 0 {
 828  					*s += ">"
 829  				}
 830  				*s += `</` + key + ">"
 831  			} else {
 832  				*s += `/>`
 833  			}
 834  		}
 835  	} else if !noEndTag {
 836  		if useGoXmlEmptyElemSyntax {
 837  			*s += `</` + key + ">"
 838  			// *s += "></" + key + ">"
 839  		} else {
 840  			*s += "/>"
 841  		}
 842  	}
 843  	if doIndent {
 844  		if p.cnt > p.start {
 845  			*s += "\n"
 846  		}
 847  		p.Outdent()
 848  	}
 849  
 850  	return nil
 851  }
 852  
 853  // the element sort implementation
 854  
 855  type keyval struct {
 856  	k string
 857  	v interface{}
 858  }
 859  type elemListSeq []keyval
 860  
 861  func (e elemListSeq) Len() int {
 862  	return len(e)
 863  }
 864  
 865  func (e elemListSeq) Swap(i, j int) {
 866  	e[i], e[j] = e[j], e[i]
 867  }
 868  
 869  func (e elemListSeq) Less(i, j int) bool {
 870  	var iseq, jseq int
 871  	var fiseq, fjseq float64
 872  	var ok bool
 873  	if iseq, ok = e[i].v.(map[string]interface{})[seqK].(int); !ok {
 874  		if fiseq, ok = e[i].v.(map[string]interface{})[seqK].(float64); ok {
 875  			iseq = int(fiseq)
 876  		} else {
 877  			iseq = 9999999
 878  		}
 879  	}
 880  
 881  	if jseq, ok = e[j].v.(map[string]interface{})[seqK].(int); !ok {
 882  		if fjseq, ok = e[j].v.(map[string]interface{})[seqK].(float64); ok {
 883  			jseq = int(fjseq)
 884  		} else {
 885  			jseq = 9999999
 886  		}
 887  	}
 888  
 889  	return iseq <= jseq
 890  }
 891  
 892  // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
 893  
 894  // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
 895  // It preserves comments, directives and process instructions,
 896  func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
 897  	x, err := NewMapXmlSeq(b)
 898  	if err != nil {
 899  		return nil, err
 900  	}
 901  	return x.XmlIndent(prefix, indent)
 902  }
 903