escape.go raw

   1  package text
   2  
   3  // NostrEscape for JSON encoding according to RFC8259.
   4  //
   5  // This is the efficient implementation based on the NIP-01 specification:
   6  //
   7  // To prevent implementation differences from creating a different event ID for
   8  // the same event, the following rules MUST be followed while serializing:
   9  //
  10  //	No whitespace, line breaks or other unnecessary formatting should be included
  11  //	in the output JSON. No characters except the following should be escaped, and
  12  //	instead should be included verbatim:
  13  //
  14  //	- A line break, 0x0A, as \n
  15  //	- A double quote, 0x22, as \"
  16  //	- A backslash, 0x5C, as \\
  17  //	- A carriage return, 0x0D, as \r
  18  //	- A tab character, 0x09, as \t
  19  //	- A backspace, 0x08, as \b
  20  //	- A form feed, 0x0C, as \f
  21  //
  22  //	UTF-8 should be used for encoding.
  23  //
  24  // NOTE: We also escape all other control characters (0x00-0x1F excluding those above)
  25  // to ensure valid JSON, even though NIP-01 doesn't require it. This prevents
  26  // JSON parsing errors when events with binary data in content are sent to relays.
  27  func NostrEscape(dst, src []byte) []byte {
  28  	l := len(src)
  29  	// Pre-allocate buffer if nil to reduce reallocations
  30  	// Estimate: worst case is all control chars which expand to 6 bytes each (\u00XX)
  31  	// but most strings have few escapes, so estimate len(src) * 1.5 as a safe middle ground
  32  	if dst == nil && l > 0 {
  33  		estimatedSize := l * 3 / 2
  34  		if estimatedSize < l {
  35  			estimatedSize = l
  36  		}
  37  		dst = make([]byte, 0, estimatedSize)
  38  	}
  39  	for i := 0; i < l; i++ {
  40  		c := src[i]
  41  		if c == '"' {
  42  			dst = append(dst, '\\', '"')
  43  		} else if c == '\\' {
  44  			// if i+1 < l && src[i+1] == 'u' || i+1 < l && src[i+1] == '/' {
  45  			if i+1 < l && src[i+1] == 'u' {
  46  				dst = append(dst, '\\')
  47  			} else {
  48  				dst = append(dst, '\\', '\\')
  49  			}
  50  		} else if c == '\b' {
  51  			dst = append(dst, '\\', 'b')
  52  		} else if c == '\t' {
  53  			dst = append(dst, '\\', 't')
  54  		} else if c == '\n' {
  55  			dst = append(dst, '\\', 'n')
  56  		} else if c == '\f' {
  57  			dst = append(dst, '\\', 'f')
  58  		} else if c == '\r' {
  59  			dst = append(dst, '\\', 'r')
  60  		} else if c < 32 {
  61  			// Escape all other control characters (0x00-0x1F except those handled above) as \uXXXX
  62  			// This ensures valid JSON even when content contains binary data
  63  			dst = append(dst, '\\', 'u', '0', '0')
  64  			hexHigh := (c >> 4) & 0x0F
  65  			hexLow := c & 0x0F
  66  			if hexHigh < 10 {
  67  				dst = append(dst, byte('0'+hexHigh))
  68  			} else {
  69  				dst = append(dst, byte('a'+(hexHigh-10)))
  70  			}
  71  			if hexLow < 10 {
  72  				dst = append(dst, byte('0'+hexLow))
  73  			} else {
  74  				dst = append(dst, byte('a'+(hexLow-10)))
  75  			}
  76  		} else {
  77  			dst = append(dst, c)
  78  		}
  79  	}
  80  	return dst
  81  }
  82  
  83  // NostrUnescape reverses the operation of NostrEscape except instead of
  84  // appending it to the provided slice, it rewrites it, eliminating a memory
  85  // copy. Keep in mind that the original JSON will be mangled by this operation,
  86  // but the resultant slices will cost zero allocations.
  87  func NostrUnescape(dst []byte) (b []byte) {
  88  	var r, w int
  89  	for ; r < len(dst); r++ {
  90  		if dst[r] == '\\' {
  91  			r++
  92  			c := dst[r]
  93  			switch {
  94  
  95  			// nip-01 specifies the following single letter C-style escapes for
  96  			// control codes under 0x20.
  97  			//
  98  			// no others are specified but must be preserved, so only these can
  99  			// be safely decoded at runtime as they must be re-encoded when
 100  			// marshalled.
 101  			case c == '"':
 102  				dst[w] = '"'
 103  				w++
 104  			case c == '\\':
 105  				dst[w] = '\\'
 106  				w++
 107  			case c == 'b':
 108  				dst[w] = '\b'
 109  				w++
 110  			case c == 't':
 111  				dst[w] = '\t'
 112  				w++
 113  			case c == 'n':
 114  				dst[w] = '\n'
 115  				w++
 116  			case c == 'f':
 117  				dst[w] = '\f'
 118  				w++
 119  			case c == 'r':
 120  				dst[w] = '\r'
 121  				w++
 122  
 123  			// special cases for non-nip-01 specified json escapes (must be
 124  			// preserved for ID generation).
 125  		case c == 'u':
 126  			// Check if this is a \u0000-\u001F sequence we generated
 127  			if r+4 < len(dst) && dst[r+1] == '0' && dst[r+2] == '0' {
 128  				// Extract hex digits
 129  				hexHigh := dst[r+3]
 130  				hexLow := dst[r+4]
 131  				
 132  				var val byte
 133  				if hexHigh >= '0' && hexHigh <= '9' {
 134  					val = (hexHigh - '0') << 4
 135  				} else if hexHigh >= 'a' && hexHigh <= 'f' {
 136  					val = (hexHigh - 'a' + 10) << 4
 137  				} else if hexHigh >= 'A' && hexHigh <= 'F' {
 138  					val = (hexHigh - 'A' + 10) << 4
 139  				}
 140  				
 141  				if hexLow >= '0' && hexLow <= '9' {
 142  					val |= hexLow - '0'
 143  				} else if hexLow >= 'a' && hexLow <= 'f' {
 144  					val |= hexLow - 'a' + 10
 145  				} else if hexLow >= 'A' && hexLow <= 'F' {
 146  					val |= hexLow - 'A' + 10
 147  				}
 148  				
 149  				// Only decode if it's a control character (0x00-0x1F)
 150  				if val < 32 {
 151  					dst[w] = val
 152  					w++
 153  					r += 4 // Skip the u00XX part
 154  					continue
 155  				}
 156  			}
 157  			// Not our generated \u0000-\u001F, preserve as-is
 158  			dst[w] = '\\'
 159  			w++
 160  			dst[w] = 'u'
 161  			w++
 162  		case c == '/':
 163  				dst[w] = '\\'
 164  				w++
 165  				dst[w] = '/'
 166  				w++
 167  
 168  			// special case for octal escapes (must be preserved for ID
 169  			// generation).
 170  			case c >= '0' && c <= '9':
 171  				dst[w] = '\\'
 172  				w++
 173  				dst[w] = c
 174  				w++
 175  
 176  				// anything else after a reverse solidus just preserve it.
 177  			default:
 178  				dst[w] = dst[r]
 179  				w++
 180  				dst[w] = c
 181  				w++
 182  			}
 183  		} else {
 184  			dst[w] = dst[r]
 185  			w++
 186  		}
 187  	}
 188  	b = dst[:w]
 189  	return
 190  }
 191