1 // Copyright 2012-2014 Charles Banning. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file
4 5 // keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters.
6 7 package mxj
8 9 import (
10 "errors"
11 "fmt"
12 "strconv"
13 "strings"
14 )
15 16 // ----------------------------- get everything FOR a single key -------------------------
17 18 const (
19 minArraySize = 32
20 )
21 22 var defaultArraySize int = minArraySize
23 24 // SetArraySize adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath().
25 // This can have the effect of significantly reducing memory allocation-copy functions for large data sets.
26 // Returns the initial buffer size.
27 func SetArraySize(size int) int {
28 if size > minArraySize {
29 defaultArraySize = size
30 } else {
31 defaultArraySize = minArraySize
32 }
33 return defaultArraySize
34 }
35 36 // ValuesForKey return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match.
37 // On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*".
38 // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
39 // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
40 // - For attributes prefix the label with the attribute prefix character, by default a
41 // hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
42 // - If the 'key' refers to a list, then "key:value" could select a list member of the list.
43 // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
44 // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
45 // exclusion critera - e.g., "!author:William T. Gaddis".
46 // - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
47 func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) {
48 m := map[string]interface{}(mv)
49 var subKeyMap map[string]interface{}
50 if len(subkeys) > 0 {
51 var err error
52 subKeyMap, err = getSubKeyMap(subkeys...)
53 if err != nil {
54 return nil, err
55 }
56 }
57 58 ret := make([]interface{}, 0, defaultArraySize)
59 var cnt int
60 hasKey(m, key, &ret, &cnt, subKeyMap)
61 return ret[:cnt], nil
62 }
63 64 var KeyNotExistError = errors.New("Key does not exist")
65 66 // ValueForKey is a wrapper on ValuesForKey. It returns the first member of []interface{}, if any.
67 // If there is no value, "nil, nil" is returned.
68 func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) {
69 vals, err := mv.ValuesForKey(key, subkeys...)
70 if err != nil {
71 return nil, err
72 }
73 if len(vals) == 0 {
74 return nil, KeyNotExistError
75 }
76 return vals[0], nil
77 }
78 79 // hasKey - if the map 'key' exists append it to array
80 // if it doesn't do nothing except scan array and map values
81 func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) {
82 // func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) {
83 switch iv.(type) {
84 case map[string]interface{}:
85 vv := iv.(map[string]interface{})
86 // see if the current value is of interest
87 if v, ok := vv[key]; ok {
88 switch v.(type) {
89 case map[string]interface{}:
90 if hasSubKeys(v, subkeys) {
91 *ret = append(*ret, v)
92 *cnt++
93 }
94 case []interface{}:
95 for _, av := range v.([]interface{}) {
96 if hasSubKeys(av, subkeys) {
97 *ret = append(*ret, av)
98 *cnt++
99 }
100 }
101 default:
102 if len(subkeys) == 0 {
103 *ret = append(*ret, v)
104 *cnt++
105 }
106 }
107 }
108 109 // wildcard case
110 if key == "*" {
111 for _, v := range vv {
112 switch v.(type) {
113 case map[string]interface{}:
114 if hasSubKeys(v, subkeys) {
115 *ret = append(*ret, v)
116 *cnt++
117 }
118 case []interface{}:
119 for _, av := range v.([]interface{}) {
120 if hasSubKeys(av, subkeys) {
121 *ret = append(*ret, av)
122 *cnt++
123 }
124 }
125 default:
126 if len(subkeys) == 0 {
127 *ret = append(*ret, v)
128 *cnt++
129 }
130 }
131 }
132 }
133 134 // scan the rest
135 for _, v := range vv {
136 hasKey(v, key, ret, cnt, subkeys)
137 }
138 case []interface{}:
139 for _, v := range iv.([]interface{}) {
140 hasKey(v, key, ret, cnt, subkeys)
141 }
142 }
143 }
144 145 // ----------------------- get everything for a node in the Map ---------------------------
146 147 // Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.)
148 // 2014.04.28 - implementation note.
149 // Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion
150 // of wildcards and unindexed arrays. Embedding such logic into valuesForKeyPath() would have made the
151 // code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead.
152 153 // ValuesForPatb retrieves all values for a path from the Map. If len(returned_values) == 0, then no match.
154 // On error, the returned array is 'nil'.
155 // 'path' is a dot-separated path of key values.
156 // - If a node in the path is '*', then everything beyond is walked.
157 // - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" -
158 // even "*[2].*[0].field".
159 // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
160 // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
161 // - For attributes prefix the label with the attribute prefix character, by default a
162 // hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
163 // - If the 'path' refers to a list, then "tag:value" would return member of the list.
164 // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
165 // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
166 // exclusion critera - e.g., "!author:William T. Gaddis".
167 // - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
168 func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
169 // If there are no array indexes in path, use legacy ValuesForPath() logic.
170 if strings.Index(path, "[") < 0 {
171 return mv.oldValuesForPath(path, subkeys...)
172 }
173 174 var subKeyMap map[string]interface{}
175 if len(subkeys) > 0 {
176 var err error
177 subKeyMap, err = getSubKeyMap(subkeys...)
178 if err != nil {
179 return nil, err
180 }
181 }
182 183 keys, kerr := parsePath(path)
184 if kerr != nil {
185 return nil, kerr
186 }
187 188 vals, verr := valuesForArray(keys, mv)
189 if verr != nil {
190 return nil, verr // Vals may be nil, but return empty array.
191 }
192 193 // Need to handle subkeys ... only return members of vals that satisfy conditions.
194 retvals := make([]interface{}, 0)
195 for _, v := range vals {
196 if hasSubKeys(v, subKeyMap) {
197 retvals = append(retvals, v)
198 }
199 }
200 return retvals, nil
201 }
202 203 func valuesForArray(keys []*key, m Map) ([]interface{}, error) {
204 var tmppath string
205 var haveFirst bool
206 var vals []interface{}
207 var verr error
208 209 lastkey := len(keys) - 1
210 for i := 0; i <= lastkey; i++ {
211 if !haveFirst {
212 tmppath = keys[i].name
213 haveFirst = true
214 } else {
215 tmppath += "." + keys[i].name
216 }
217 218 // Look-ahead: explode wildcards and unindexed arrays.
219 // Need to handle un-indexed list recursively:
220 // e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]".
221 // Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ...
222 if !keys[i].isArray && i < lastkey && keys[i+1].isArray {
223 // Can't pass subkeys because we may not be at literal end of path.
224 vv, vverr := m.oldValuesForPath(tmppath)
225 if vverr != nil {
226 return nil, vverr
227 }
228 for _, v := range vv {
229 // See if we can walk the value.
230 am, ok := v.(map[string]interface{})
231 if !ok {
232 continue
233 }
234 // Work the backend.
235 nvals, nvalserr := valuesForArray(keys[i+1:], Map(am))
236 if nvalserr != nil {
237 return nil, nvalserr
238 }
239 vals = append(vals, nvals...)
240 }
241 break // have recursed the whole path - return
242 }
243 244 if keys[i].isArray || i == lastkey {
245 // Don't pass subkeys because may not be at literal end of path.
246 vals, verr = m.oldValuesForPath(tmppath)
247 } else {
248 continue
249 }
250 if verr != nil {
251 return nil, verr
252 }
253 254 if i == lastkey && !keys[i].isArray {
255 break
256 }
257 258 // Now we're looking at an array - supposedly.
259 // Is index in range of vals?
260 if len(vals) <= keys[i].position {
261 vals = nil
262 break
263 }
264 265 // Return the array member of interest, if at end of path.
266 if i == lastkey {
267 vals = vals[keys[i].position:(keys[i].position + 1)]
268 break
269 }
270 271 // Extract the array member of interest.
272 am := vals[keys[i].position:(keys[i].position + 1)]
273 274 // must be a map[string]interface{} value so we can keep walking the path
275 amm, ok := am[0].(map[string]interface{})
276 if !ok {
277 vals = nil
278 break
279 }
280 281 m = Map(amm)
282 haveFirst = false
283 }
284 285 return vals, nil
286 }
287 288 type key struct {
289 name string
290 isArray bool
291 position int
292 }
293 294 func parsePath(s string) ([]*key, error) {
295 keys := strings.Split(s, ".")
296 297 ret := make([]*key, 0)
298 299 for i := 0; i < len(keys); i++ {
300 if keys[i] == "" {
301 continue
302 }
303 304 newkey := new(key)
305 if strings.Index(keys[i], "[") < 0 {
306 newkey.name = keys[i]
307 ret = append(ret, newkey)
308 continue
309 }
310 311 p := strings.Split(keys[i], "[")
312 newkey.name = p[0]
313 p = strings.Split(p[1], "]")
314 if p[0] == "" { // no right bracket
315 return nil, fmt.Errorf("no right bracket on key index: %s", keys[i])
316 }
317 // convert p[0] to a int value
318 pos, nerr := strconv.ParseInt(p[0], 10, 32)
319 if nerr != nil {
320 return nil, fmt.Errorf("cannot convert index to int value: %s", p[0])
321 }
322 newkey.position = int(pos)
323 newkey.isArray = true
324 ret = append(ret, newkey)
325 }
326 327 return ret, nil
328 }
329 330 // legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'.
331 func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
332 m := map[string]interface{}(mv)
333 var subKeyMap map[string]interface{}
334 if len(subkeys) > 0 {
335 var err error
336 subKeyMap, err = getSubKeyMap(subkeys...)
337 if err != nil {
338 return nil, err
339 }
340 }
341 342 keys := strings.Split(path, ".")
343 if keys[len(keys)-1] == "" {
344 keys = keys[:len(keys)-1]
345 }
346 ivals := make([]interface{}, 0, defaultArraySize)
347 var cnt int
348 valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap)
349 return ivals[:cnt], nil
350 }
351 352 func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) {
353 lenKeys := len(keys)
354 355 // load 'm' values into 'ret'
356 // expand any lists
357 if lenKeys == 0 {
358 switch m.(type) {
359 case map[string]interface{}:
360 if subkeys != nil {
361 if ok := hasSubKeys(m, subkeys); !ok {
362 return
363 }
364 }
365 *ret = append(*ret, m)
366 *cnt++
367 case []interface{}:
368 for i, v := range m.([]interface{}) {
369 if subkeys != nil {
370 if ok := hasSubKeys(v, subkeys); !ok {
371 continue // only load list members with subkeys
372 }
373 }
374 *ret = append(*ret, (m.([]interface{}))[i])
375 *cnt++
376 }
377 default:
378 if subkeys != nil {
379 return // must be map[string]interface{} if there are subkeys
380 }
381 *ret = append(*ret, m)
382 *cnt++
383 }
384 return
385 }
386 387 // key of interest
388 key := keys[0]
389 switch key {
390 case "*": // wildcard - scan all values
391 switch m.(type) {
392 case map[string]interface{}:
393 for _, v := range m.(map[string]interface{}) {
394 // valuesForKeyPath(ret, v, keys[1:], subkeys)
395 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
396 }
397 case []interface{}:
398 for _, v := range m.([]interface{}) {
399 switch v.(type) {
400 // flatten out a list of maps - keys are processed
401 case map[string]interface{}:
402 for _, vv := range v.(map[string]interface{}) {
403 // valuesForKeyPath(ret, vv, keys[1:], subkeys)
404 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
405 }
406 default:
407 // valuesForKeyPath(ret, v, keys[1:], subkeys)
408 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
409 }
410 }
411 }
412 default: // key - must be map[string]interface{}
413 switch m.(type) {
414 case map[string]interface{}:
415 if v, ok := m.(map[string]interface{})[key]; ok {
416 // valuesForKeyPath(ret, v, keys[1:], subkeys)
417 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
418 }
419 case []interface{}: // may be buried in list
420 for _, v := range m.([]interface{}) {
421 switch v.(type) {
422 case map[string]interface{}:
423 if vv, ok := v.(map[string]interface{})[key]; ok {
424 // valuesForKeyPath(ret, vv, keys[1:], subkeys)
425 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
426 }
427 }
428 }
429 }
430 }
431 }
432 433 // hasSubKeys() - interface{} equality works for string, float64, bool
434 // 'v' must be a map[string]interface{} value to have subkeys
435 // 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard.
436 func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool {
437 if len(subkeys) == 0 {
438 return true
439 }
440 441 switch v.(type) {
442 case map[string]interface{}:
443 // do all subKey name:value pairs match?
444 mv := v.(map[string]interface{})
445 for skey, sval := range subkeys {
446 isNotKey := false
447 if skey[:1] == "!" { // a NOT-key
448 skey = skey[1:]
449 isNotKey = true
450 }
451 vv, ok := mv[skey]
452 if !ok { // key doesn't exist
453 if isNotKey { // key not there, but that's what we want
454 if kv, ok := sval.(string); ok && kv == "*" {
455 continue
456 }
457 }
458 return false
459 }
460 // wildcard check
461 if kv, ok := sval.(string); ok && kv == "*" {
462 if isNotKey { // key is there, and we don't want it
463 return false
464 }
465 continue
466 }
467 switch sval.(type) {
468 case string:
469 if s, ok := vv.(string); ok && s == sval.(string) {
470 if isNotKey {
471 return false
472 }
473 continue
474 }
475 case bool:
476 if b, ok := vv.(bool); ok && b == sval.(bool) {
477 if isNotKey {
478 return false
479 }
480 continue
481 }
482 case float64:
483 if f, ok := vv.(float64); ok && f == sval.(float64) {
484 if isNotKey {
485 return false
486 }
487 continue
488 }
489 }
490 // key there but didn't match subkey value
491 if isNotKey { // that's what we want
492 continue
493 }
494 return false
495 }
496 // all subkeys matched
497 return true
498 }
499 500 // not a map[string]interface{} value, can't have subkeys
501 return false
502 }
503 504 // Generate map of key:value entries as map[string]string.
505 // 'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'.
506 // If len(kv) == 0, the return is (nil, nil).
507 func getSubKeyMap(kv ...string) (map[string]interface{}, error) {
508 if len(kv) == 0 {
509 return nil, nil
510 }
511 m := make(map[string]interface{}, 0)
512 for _, v := range kv {
513 vv := strings.Split(v, fieldSep)
514 switch len(vv) {
515 case 2:
516 m[vv[0]] = interface{}(vv[1])
517 case 3:
518 switch vv[2] {
519 case "string", "char", "text":
520 m[vv[0]] = interface{}(vv[1])
521 case "bool", "boolean":
522 // ParseBool treats "1"==true & "0"==false
523 b, err := strconv.ParseBool(vv[1])
524 if err != nil {
525 return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1])
526 }
527 m[vv[0]] = interface{}(b)
528 case "float", "float64", "num", "number", "numeric":
529 f, err := strconv.ParseFloat(vv[1], 64)
530 if err != nil {
531 return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1])
532 }
533 m[vv[0]] = interface{}(f)
534 default:
535 return nil, fmt.Errorf("unknown subkey conversion spec: %s", v)
536 }
537 default:
538 return nil, fmt.Errorf("unknown subkey spec: %s", v)
539 }
540 }
541 return m, nil
542 }
543 544 // ------------------------------- END of valuesFor ... ----------------------------
545 546 // ----------------------- locate where a key value is in the tree -------------------
547 548 //----------------------------- find all paths to a key --------------------------------
549 550 // PathsForKey returns all paths through Map, 'mv', (in dot-notation) that terminate with the specified key.
551 // Results can be used with ValuesForPath.
552 func (mv Map) PathsForKey(key string) []string {
553 m := map[string]interface{}(mv)
554 breadbasket := make(map[string]bool, 0)
555 breadcrumbs := ""
556 557 hasKeyPath(breadcrumbs, m, key, breadbasket)
558 if len(breadbasket) == 0 {
559 return nil
560 }
561 562 // unpack map keys to return
563 res := make([]string, len(breadbasket))
564 var i int
565 for k := range breadbasket {
566 res[i] = k
567 i++
568 }
569 570 return res
571 }
572 573 // PathForKeyShortest extracts the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'..
574 // Paths are strings using dot-notation.
575 func (mv Map) PathForKeyShortest(key string) string {
576 paths := mv.PathsForKey(key)
577 578 lp := len(paths)
579 if lp == 0 {
580 return ""
581 }
582 if lp == 1 {
583 return paths[0]
584 }
585 586 shortest := paths[0]
587 shortestLen := len(strings.Split(shortest, "."))
588 589 for i := 1; i < len(paths); i++ {
590 vlen := len(strings.Split(paths[i], "."))
591 if vlen < shortestLen {
592 shortest = paths[i]
593 shortestLen = vlen
594 }
595 }
596 597 return shortest
598 }
599 600 // hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth
601 // This is really just a breadcrumber that saves all trails that hit the prescribed 'key'.
602 func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) {
603 switch iv.(type) {
604 case map[string]interface{}:
605 vv := iv.(map[string]interface{})
606 if _, ok := vv[key]; ok {
607 // create a new breadcrumb, intialized with the one we have
608 var nbc string
609 if crumbs == "" {
610 nbc = key
611 } else {
612 nbc = crumbs + "." + key
613 }
614 basket[nbc] = true
615 }
616 // walk on down the path, key could occur again at deeper node
617 for k, v := range vv {
618 // create a new breadcrumb, intialized with the one we have
619 var nbc string
620 if crumbs == "" {
621 nbc = k
622 } else {
623 nbc = crumbs + "." + k
624 }
625 hasKeyPath(nbc, v, key, basket)
626 }
627 case []interface{}:
628 // crumb-trail doesn't change, pass it on
629 for _, v := range iv.([]interface{}) {
630 hasKeyPath(crumbs, v, key, basket)
631 }
632 }
633 }
634 635 var PathNotExistError = errors.New("Path does not exist")
636 637 // ValueForPath wraps ValuesFor Path and returns the first value returned.
638 // If no value is found it returns 'nil' and PathNotExistError.
639 func (mv Map) ValueForPath(path string) (interface{}, error) {
640 vals, err := mv.ValuesForPath(path)
641 if err != nil {
642 return nil, err
643 }
644 if len(vals) == 0 {
645 return nil, PathNotExistError
646 }
647 return vals[0], nil
648 }
649 650 // ValuesForPathString returns the first found value for the path as a string.
651 func (mv Map) ValueForPathString(path string) (string, error) {
652 vals, err := mv.ValuesForPath(path)
653 if err != nil {
654 return "", err
655 }
656 if len(vals) == 0 {
657 return "", errors.New("ValueForPath: path not found")
658 }
659 val := vals[0]
660 return fmt.Sprintf("%v", val), nil
661 }
662 663 // ValueOrEmptyForPathString returns the first found value for the path as a string.
664 // If the path is not found then it returns an empty string.
665 func (mv Map) ValueOrEmptyForPathString(path string) string {
666 str, _ := mv.ValueForPathString(path)
667 return str
668 }
669