1 // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file
4 5 // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
6 // Also, handles comments, directives and process instructions.
7 8 package mxj
9 10 import (
11 "bytes"
12 "encoding/xml"
13 "errors"
14 "fmt"
15 "io"
16 "regexp"
17 "sort"
18 "strings"
19 )
20 21 // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
22 // the XML elements when the map[string]interface{} is marshaled. Element attributes are
23 // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
24 // value instead of denoting the keys with a prefix character. Also, comments, directives and
25 // process instructions are preserved.
26 type MapSeq map[string]interface{}
27 28 // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
29 // in the XML data stream and the element is not contained in an XML object with a root element.
30 var NoRoot = errors.New("no root key")
31 var NO_ROOT = NoRoot // maintain backwards compatibility
32 33 // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
34 35 // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
36 // as map["#seq"]<int value>.
37 // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
38 // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
39 // • attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
40 // • all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
41 // • lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
42 // include a "#seq" k:v pair based on sequence they are decoded. Thus, XML like:
43 // <doc>
44 // <ltag>value 1</ltag>
45 // <newtag>value 2</newtag>
46 // <ltag>value 3</ltag>
47 // </doc>
48 // is decoded as:
49 // doc :
50 // ltag :[[]interface{}]
51 // [item: 0]
52 // #seq :[int] 0
53 // #text :[string] value 1
54 // [item: 1]
55 // #seq :[int] 2
56 // #text :[string] value 3
57 // newtag :
58 // #seq :[int] 1
59 // #text :[string] value 2
60 // It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
61 // • comments - "<!--comment-->" - are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
62 // • directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
63 // • process instructions - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
64 // is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
65 // • comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
66 // map[string]interface{} and the error value 'NoRoot'.
67 // • note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
68 // and: "\r\n" is converted to "\n"
69 //
70 // NOTES:
71 // 1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
72 // extraneous xml.CharData will be ignored unless io.EOF is reached first.
73 // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
74 // re-encode the message in its original structure.
75 // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
76 //
77 // NAME SPACES:
78 // 1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
79 // "<prefix>:" notation rather than stripping it as with NewMapXml().
80 // 2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
81 //
82 // ERRORS:
83 // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
84 // "#directive" or #procinst" key.
85 // 2. Unmarshaling an XML doc that is formatted using the whitespace character, " ", will error, since
86 // Decoder.RawToken treats such occurances as significant. See NewMapFormattedXmlSeq().
87 func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
88 var r bool
89 if len(cast) == 1 {
90 r = cast[0]
91 }
92 return xmlSeqToMap(xmlVal, r)
93 }
94 95 // NewMapFormattedXmlSeq performs the same as NewMapXmlSeq but is useful for processing XML objects that
96 // are formatted using the whitespace character, " ". (The stdlib xml.Decoder, by default, treats all
97 // whitespace as significant; Decoder.Token() and Decoder.RawToken() will return strings of one or more
98 // whitespace characters and without alphanumeric or punctuation characters as xml.CharData values.)
99 //
100 // If you're processing such XML, then this will convert all occurrences of whitespace-only strings
101 // into an empty string, "", prior to parsing the XML - irrespective of whether the occurrence is
102 // formatting or is a actual element value.
103 func NewMapFormattedXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
104 var c bool
105 if len(cast) == 1 {
106 c = cast[0]
107 }
108 109 // Per PR #104 - clean out formatting characters so they don't show up in Decoder.RawToken() stream.
110 // NOTE: Also replaces element values that are solely comprised of formatting/whitespace characters
111 // with empty string, "".
112 r := regexp.MustCompile(`>[\n\t\r ]*<`)
113 xmlVal = r.ReplaceAll(xmlVal, []byte("><"))
114 return xmlSeqToMap(xmlVal, c)
115 }
116 117 // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
118 // NOTES:
119 // 1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
120 // extraneous xml.CharData will be ignored unless io.EOF is reached first.
121 // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
122 // re-encode the message in its original structure.
123 // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
124 //
125 // ERRORS:
126 // 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
127 // "#directive" or #procinst" key.
128 func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
129 var r bool
130 if len(cast) == 1 {
131 r = cast[0]
132 }
133 134 // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
135 // will wrap it in a bufio.Reader and seek on the file beyond where the
136 // xml.Decoder parses!
137 if _, ok := xmlReader.(io.ByteReader); !ok {
138 xmlReader = myByteReader(xmlReader) // see code at EOF
139 }
140 141 // build the map
142 return xmlSeqReaderToMap(xmlReader, r)
143 }
144 145 // NewMapXmlSeqReaderRaw returns the next XML doc from an io.Reader as a MapSeq value.
146 // Returns MapSeq value, slice with the raw XML, and any error.
147 // NOTES:
148 // 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
149 // using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
150 // See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
151 // data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
152 // you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
153 // 2. The 'raw' return value may be larger than the XML text value.
154 // 3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
155 // extraneous xml.CharData will be ignored unless io.EOF is reached first.
156 // 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
157 // re-encode the message in its original structure.
158 // 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
159 //
160 // ERRORS:
161 // 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
162 // "#directive" or #procinst" key.
163 func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
164 var r bool
165 if len(cast) == 1 {
166 r = cast[0]
167 }
168 // create TeeReader so we can retrieve raw XML
169 buf := make([]byte, 0)
170 wb := bytes.NewBuffer(buf)
171 trdr := myTeeReader(xmlReader, wb)
172 173 m, err := xmlSeqReaderToMap(trdr, r)
174 175 // retrieve the raw XML that was decoded
176 b := wb.Bytes()
177 178 // err may be NoRoot
179 return m, b, err
180 }
181 182 // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
183 func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
184 // parse the Reader
185 p := xml.NewDecoder(rdr)
186 if CustomDecoder != nil {
187 useCustomDecoder(p)
188 } else {
189 p.CharsetReader = XmlCharsetReader
190 }
191 return xmlSeqToMapParser("", nil, p, r)
192 }
193 194 // xmlSeqToMap - convert a XML doc into map[string]interface{} value
195 func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
196 b := bytes.NewReader(doc)
197 p := xml.NewDecoder(b)
198 if CustomDecoder != nil {
199 useCustomDecoder(p)
200 } else {
201 p.CharsetReader = XmlCharsetReader
202 }
203 return xmlSeqToMapParser("", nil, p, r)
204 }
205 206 // ===================================== where the work happens =============================
207 208 // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
209 // Add #seq tag value for each element decoded - to be used for Encoding later.
210 func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
211 if snakeCaseKeys {
212 skey = strings.Replace(skey, "-", "_", -1)
213 }
214 215 // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
216 var n, na map[string]interface{}
217 var seq int // for including seq num when decoding
218 219 // Allocate maps and load attributes, if any.
220 // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
221 // to get StartElement then recurse with skey==xml.StartElement.Name.Local
222 // where we begin allocating map[string]interface{} values 'n' and 'na'.
223 if skey != "" {
224 // 'n' only needs one slot - save call to runtime•hashGrow()
225 // 'na' we don't know
226 n = make(map[string]interface{}, 1)
227 na = make(map[string]interface{})
228 if len(a) > 0 {
229 // xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
230 // where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
231 aa := make(map[string]interface{}, len(a))
232 for i, v := range a {
233 if snakeCaseKeys {
234 v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
235 }
236 if xmlEscapeCharsDecoder { // per issue#84
237 v.Value = escapeChars(v.Value)
238 }
239 if len(v.Name.Space) > 0 {
240 aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
241 } else {
242 aa[v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
243 }
244 }
245 na[attrK] = aa
246 }
247 }
248 249 // Return XMPP <stream:stream> message.
250 if handleXMPPStreamTag && skey == "stream:stream" {
251 n[skey] = na
252 return n, nil
253 }
254 255 for {
256 t, err := p.RawToken()
257 if err != nil {
258 if err != io.EOF {
259 return nil, errors.New("xml.Decoder.Token() - " + err.Error())
260 }
261 return nil, err
262 }
263 switch t.(type) {
264 case xml.StartElement:
265 tt := t.(xml.StartElement)
266 267 // First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
268 // So when the loop is first entered, the first token is the root tag along
269 // with any attributes, which we process here.
270 //
271 // Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
272 // processing before getting the next token which is the element value,
273 // which is done above.
274 if skey == "" {
275 if len(tt.Name.Space) > 0 {
276 return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
277 } else {
278 return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
279 }
280 }
281 282 // If not initializing the map, parse the element.
283 // len(nn) == 1, necessarily - it is just an 'n'.
284 var nn map[string]interface{}
285 if len(tt.Name.Space) > 0 {
286 nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
287 } else {
288 nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
289 }
290 if err != nil {
291 return nil, err
292 }
293 294 // The nn map[string]interface{} value is a na[nn_key] value.
295 // We need to see if nn_key already exists - means we're parsing a list.
296 // This may require converting na[nn_key] value into []interface{} type.
297 // First, extract the key:val for the map - it's a singleton.
298 var key string
299 var val interface{}
300 for key, val = range nn {
301 break
302 }
303 304 // add "#seq" k:v pair -
305 // Sequence number included even in list elements - this should allow us
306 // to properly resequence even something goofy like:
307 // <list>item 1</list>
308 // <subelement>item 2</subelement>
309 // <list>item 3</list>
310 // where all the "list" subelements are decoded into an array.
311 switch val.(type) {
312 case map[string]interface{}:
313 val.(map[string]interface{})[seqK] = seq
314 seq++
315 case interface{}: // a non-nil simple element: string, float64, bool
316 v := map[string]interface{}{textK: val, seqK: seq}
317 seq++
318 val = v
319 }
320 321 // 'na' holding sub-elements of n.
322 // See if 'key' already exists.
323 // If 'key' exists, then this is a list, if not just add key:val to na.
324 if v, ok := na[key]; ok {
325 var a []interface{}
326 switch v.(type) {
327 case []interface{}:
328 a = v.([]interface{})
329 default: // anything else - note: v.(type) != nil
330 a = []interface{}{v}
331 }
332 a = append(a, val)
333 na[key] = a
334 } else {
335 na[key] = val // save it as a singleton
336 }
337 case xml.EndElement:
338 if skey != "" {
339 tt := t.(xml.EndElement)
340 if snakeCaseKeys {
341 tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
342 }
343 var name string
344 if len(tt.Name.Space) > 0 {
345 name = tt.Name.Space + `:` + tt.Name.Local
346 } else {
347 name = tt.Name.Local
348 }
349 if skey != name {
350 return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
351 skey, name, p.InputOffset())
352 }
353 }
354 // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
355 if len(n) == 0 {
356 // If len(na)==0 we have an empty element == "";
357 // it has no xml.Attr nor xml.CharData.
358 // Empty element content will be map["etag"]map["#text"]""
359 // after #seq injection - map["etag"]map["#seq"]seq - after return.
360 if len(na) > 0 {
361 n[skey] = na
362 } else {
363 n[skey] = "" // empty element
364 }
365 }
366 return n, nil
367 case xml.CharData:
368 // clean up possible noise
369 tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
370 if xmlEscapeCharsDecoder { // issue#84
371 tt = escapeChars(tt)
372 }
373 if skey == "" {
374 // per Adrian (http://www.adrianlungu.com/) catch stray text
375 // in decoder stream -
376 // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
377 // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
378 // a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
379 continue
380 }
381 if len(tt) > 0 {
382 // every simple element is a #text and has #seq associated with it
383 na[textK] = cast(tt, r, "")
384 na[seqK] = seq
385 seq++
386 }
387 case xml.Comment:
388 if n == nil { // no root 'key'
389 n = map[string]interface{}{commentK: string(t.(xml.Comment))}
390 return n, NoRoot
391 }
392 cm := make(map[string]interface{}, 2)
393 cm[textK] = string(t.(xml.Comment))
394 cm[seqK] = seq
395 seq++
396 na[commentK] = cm
397 case xml.Directive:
398 if n == nil { // no root 'key'
399 n = map[string]interface{}{directiveK: string(t.(xml.Directive))}
400 return n, NoRoot
401 }
402 dm := make(map[string]interface{}, 2)
403 dm[textK] = string(t.(xml.Directive))
404 dm[seqK] = seq
405 seq++
406 na[directiveK] = dm
407 case xml.ProcInst:
408 if n == nil {
409 na = map[string]interface{}{targetK: t.(xml.ProcInst).Target, instK: string(t.(xml.ProcInst).Inst)}
410 n = map[string]interface{}{procinstK: na}
411 return n, NoRoot
412 }
413 pm := make(map[string]interface{}, 3)
414 pm[targetK] = t.(xml.ProcInst).Target
415 pm[instK] = string(t.(xml.ProcInst).Inst)
416 pm[seqK] = seq
417 seq++
418 na[procinstK] = pm
419 default:
420 // noop - shouldn't ever get here, now, since we handle all token types
421 }
422 }
423 }
424 425 // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
426 427 // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
428 429 // Xml encodes a MapSeq as XML with elements sorted on #seq. The companion of NewMapXmlSeq().
430 // The following rules apply.
431 // - The "#seq" key value is used to seqence the subelements or attributes only.
432 // - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
433 // - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
434 // - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
435 // - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
436 // map entries - <?target inst?>.
437 // - Value type encoding:
438 // > string, bool, float64, int, int32, int64, float32: per "%v" formating
439 // > []bool, []uint8: by casting to string
440 // > structures, etc.: handed to xml.Marshal() - if there is an error, the element
441 // value is "UNKNOWN"
442 // - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
443 // - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
444 // Thus, `{ "key":"value" }` encodes as "<key>value</key>".
445 func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
446 m := map[string]interface{}(mv)
447 var err error
448 s := new(string)
449 p := new(pretty) // just a stub
450 451 if len(m) == 1 && len(rootTag) == 0 {
452 for key, value := range m {
453 // if it's an array, see if all values are map[string]interface{}
454 // we force a new root tag if we'll end up with no key:value in the list
455 // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
456 switch value.(type) {
457 case []interface{}:
458 for _, v := range value.([]interface{}) {
459 switch v.(type) {
460 case map[string]interface{}: // noop
461 default: // anything else
462 err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
463 goto done
464 }
465 }
466 }
467 err = mapToXmlSeqIndent(false, s, key, value, p)
468 }
469 } else if len(rootTag) == 1 {
470 err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
471 } else {
472 err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
473 }
474 done:
475 if xmlCheckIsValid {
476 d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
477 for {
478 _, err = d.Token()
479 if err == io.EOF {
480 err = nil
481 break
482 } else if err != nil {
483 return nil, err
484 }
485 }
486 }
487 return []byte(*s), err
488 }
489 490 // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
491 // The names will also provide a key for the number of return arguments.
492 493 // XmlWriter Writes the MapSeq value as XML on the Writer.
494 // See MapSeq.Xml() for encoding rules.
495 func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
496 x, err := mv.Xml(rootTag...)
497 if err != nil {
498 return err
499 }
500 501 _, err = xmlWriter.Write(x)
502 return err
503 }
504 505 // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
506 // See Map.XmlSeq() for encoding rules.
507 /*
508 func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
509 x, err := mv.Xml(rootTag...)
510 if err != nil {
511 return x, err
512 }
513 514 _, err = xmlWriter.Write(x)
515 return x, err
516 }
517 */
518 519 // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
520 // See MapSeq.Xml() for encoding rules.
521 func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
522 x, err := mv.XmlIndent(prefix, indent, rootTag...)
523 if err != nil {
524 return err
525 }
526 527 _, err = xmlWriter.Write(x)
528 return err
529 }
530 531 // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
532 // See Map.XmlSeq() for encoding rules.
533 /*
534 func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
535 x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
536 if err != nil {
537 return x, err
538 }
539 540 _, err = xmlWriter.Write(x)
541 return x, err
542 }
543 */
544 545 // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
546 547 // ---------------------- XmlSeqIndent ----------------------------
548 549 // XmlIndent encodes a map[string]interface{} as a pretty XML string.
550 // See MapSeq.XmlSeq() for encoding rules.
551 func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
552 m := map[string]interface{}(mv)
553 554 var err error
555 s := new(string)
556 p := new(pretty)
557 p.indent = indent
558 p.padding = prefix
559 560 if len(m) == 1 && len(rootTag) == 0 {
561 // this can extract the key for the single map element
562 // use it if it isn't a key for a list
563 for key, value := range m {
564 if _, ok := value.([]interface{}); ok {
565 err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
566 } else {
567 err = mapToXmlSeqIndent(true, s, key, value, p)
568 }
569 }
570 } else if len(rootTag) == 1 {
571 err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
572 } else {
573 err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
574 }
575 if xmlCheckIsValid {
576 if _, err = NewMapXml([]byte(*s)); err != nil {
577 return nil, err
578 }
579 d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
580 for {
581 _, err = d.Token()
582 if err == io.EOF {
583 err = nil
584 break
585 } else if err != nil {
586 return nil, err
587 }
588 }
589 }
590 return []byte(*s), err
591 }
592 593 // where the work actually happens
594 // returns an error if an attribute is not atomic
595 func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
596 var endTag bool
597 var isSimple bool
598 var noEndTag bool
599 var elen int
600 var ss string
601 p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
602 603 switch value.(type) {
604 case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
605 if doIndent {
606 *s += p.padding
607 }
608 if key != commentK && key != directiveK && key != procinstK {
609 *s += `<` + key
610 }
611 }
612 switch value.(type) {
613 case map[string]interface{}:
614 val := value.(map[string]interface{})
615 616 if key == commentK {
617 *s += `<!--` + val[textK].(string) + `-->`
618 noEndTag = true
619 break
620 }
621 622 if key == directiveK {
623 *s += `<!` + val[textK].(string) + `>`
624 noEndTag = true
625 break
626 }
627 628 if key == procinstK {
629 *s += `<?` + val[targetK].(string) + ` ` + val[instK].(string) + `?>`
630 noEndTag = true
631 break
632 }
633 634 haveAttrs := false
635 // process attributes first
636 if v, ok := val[attrK].(map[string]interface{}); ok {
637 // First, unroll the map[string]interface{} into a []keyval array.
638 // Then sequence it.
639 kv := make([]keyval, len(v))
640 n := 0
641 for ak, av := range v {
642 kv[n] = keyval{ak, av}
643 n++
644 }
645 sort.Sort(elemListSeq(kv))
646 // Now encode the attributes in original decoding sequence, using keyval array.
647 for _, a := range kv {
648 vv := a.v.(map[string]interface{})
649 switch vv[textK].(type) {
650 case string:
651 if xmlEscapeChars {
652 ss = escapeChars(vv[textK].(string))
653 } else {
654 ss = vv[textK].(string)
655 }
656 *s += ` ` + a.k + `="` + ss + `"`
657 case float64, bool, int, int32, int64, float32:
658 *s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv[textK]) + `"`
659 case []byte:
660 if xmlEscapeChars {
661 ss = escapeChars(string(vv[textK].([]byte)))
662 } else {
663 ss = string(vv[textK].([]byte))
664 }
665 *s += ` ` + a.k + `="` + ss + `"`
666 default:
667 return fmt.Errorf("invalid attribute value for: %s", a.k)
668 }
669 }
670 haveAttrs = true
671 }
672 673 // simple element?
674 // every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
675 _, seqOK := val[seqK] // have key
676 if v, ok := val[textK]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
677 if stmp, ok := v.(string); ok && stmp != "" {
678 if xmlEscapeChars {
679 stmp = escapeChars(stmp)
680 }
681 *s += ">" + stmp
682 endTag = true
683 elen = 1
684 }
685 isSimple = true
686 break
687 } else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
688 // here no #text but have #seq or #seq+#attr
689 endTag = false
690 break
691 }
692 693 // we now need to sequence everything except attributes
694 // 'kv' will hold everything that needs to be written
695 kv := make([]keyval, 0)
696 for k, v := range val {
697 if k == attrK { // already processed
698 continue
699 }
700 if k == seqK { // ignore - just for sorting
701 continue
702 }
703 switch v.(type) {
704 case []interface{}:
705 // unwind the array as separate entries
706 for _, vv := range v.([]interface{}) {
707 kv = append(kv, keyval{k, vv})
708 }
709 default:
710 kv = append(kv, keyval{k, v})
711 }
712 }
713 714 // close tag with possible attributes
715 *s += ">"
716 if doIndent {
717 *s += "\n"
718 }
719 // something more complex
720 p.mapDepth++
721 sort.Sort(elemListSeq(kv))
722 i := 0
723 for _, v := range kv {
724 switch v.v.(type) {
725 case []interface{}:
726 default:
727 if i == 0 && doIndent {
728 p.Indent()
729 }
730 }
731 i++
732 if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
733 return err
734 }
735 switch v.v.(type) {
736 case []interface{}: // handled in []interface{} case
737 default:
738 if doIndent {
739 p.Outdent()
740 }
741 }
742 i--
743 }
744 p.mapDepth--
745 endTag = true
746 elen = 1 // we do have some content other than attrs
747 case []interface{}:
748 for _, v := range value.([]interface{}) {
749 if doIndent {
750 p.Indent()
751 }
752 if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
753 return err
754 }
755 if doIndent {
756 p.Outdent()
757 }
758 }
759 return nil
760 case nil:
761 // terminate the tag
762 if doIndent {
763 *s += p.padding
764 }
765 *s += "<" + key
766 endTag, isSimple = true, true
767 break
768 default: // handle anything - even goofy stuff
769 elen = 0
770 switch value.(type) {
771 case string:
772 if xmlEscapeChars {
773 ss = escapeChars(value.(string))
774 } else {
775 ss = value.(string)
776 }
777 elen = len(ss)
778 if elen > 0 {
779 *s += ">" + ss
780 }
781 case float64, bool, int, int32, int64, float32:
782 v := fmt.Sprintf("%v", value)
783 elen = len(v)
784 if elen > 0 {
785 *s += ">" + v
786 }
787 case []byte: // NOTE: byte is just an alias for uint8
788 // similar to how xml.Marshal handles []byte structure members
789 if xmlEscapeChars {
790 ss = escapeChars(string(value.([]byte)))
791 } else {
792 ss = string(value.([]byte))
793 }
794 elen = len(ss)
795 if elen > 0 {
796 *s += ">" + ss
797 }
798 default:
799 var v []byte
800 var err error
801 if doIndent {
802 v, err = xml.MarshalIndent(value, p.padding, p.indent)
803 } else {
804 v, err = xml.Marshal(value)
805 }
806 if err != nil {
807 *s += ">UNKNOWN"
808 } else {
809 elen = len(v)
810 if elen > 0 {
811 *s += string(v)
812 }
813 }
814 }
815 isSimple = true
816 endTag = true
817 }
818 if endTag && !noEndTag {
819 if doIndent {
820 if !isSimple {
821 *s += p.padding
822 }
823 }
824 switch value.(type) {
825 case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
826 if elen > 0 || useGoXmlEmptyElemSyntax {
827 if elen == 0 {
828 *s += ">"
829 }
830 *s += `</` + key + ">"
831 } else {
832 *s += `/>`
833 }
834 }
835 } else if !noEndTag {
836 if useGoXmlEmptyElemSyntax {
837 *s += `</` + key + ">"
838 // *s += "></" + key + ">"
839 } else {
840 *s += "/>"
841 }
842 }
843 if doIndent {
844 if p.cnt > p.start {
845 *s += "\n"
846 }
847 p.Outdent()
848 }
849 850 return nil
851 }
852 853 // the element sort implementation
854 855 type keyval struct {
856 k string
857 v interface{}
858 }
859 type elemListSeq []keyval
860 861 func (e elemListSeq) Len() int {
862 return len(e)
863 }
864 865 func (e elemListSeq) Swap(i, j int) {
866 e[i], e[j] = e[j], e[i]
867 }
868 869 func (e elemListSeq) Less(i, j int) bool {
870 var iseq, jseq int
871 var fiseq, fjseq float64
872 var ok bool
873 if iseq, ok = e[i].v.(map[string]interface{})[seqK].(int); !ok {
874 if fiseq, ok = e[i].v.(map[string]interface{})[seqK].(float64); ok {
875 iseq = int(fiseq)
876 } else {
877 iseq = 9999999
878 }
879 }
880 881 if jseq, ok = e[j].v.(map[string]interface{})[seqK].(int); !ok {
882 if fjseq, ok = e[j].v.(map[string]interface{})[seqK].(float64); ok {
883 jseq = int(fjseq)
884 } else {
885 jseq = 9999999
886 }
887 }
888 889 return iseq <= jseq
890 }
891 892 // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
893 894 // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
895 // It preserves comments, directives and process instructions,
896 func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
897 x, err := NewMapXmlSeq(b)
898 if err != nil {
899 return nil, err
900 }
901 return x.XmlIndent(prefix, indent)
902 }
903