1 package text
2 3 // NostrEscape for JSON encoding according to RFC8259.
4 //
5 // This is the efficient implementation based on the NIP-01 specification:
6 //
7 // To prevent implementation differences from creating a different event ID for
8 // the same event, the following rules MUST be followed while serializing:
9 //
10 // No whitespace, line breaks or other unnecessary formatting should be included
11 // in the output JSON. No characters except the following should be escaped, and
12 // instead should be included verbatim:
13 //
14 // - A line break, 0x0A, as \n
15 // - A double quote, 0x22, as \"
16 // - A backslash, 0x5C, as \\
17 // - A carriage return, 0x0D, as \r
18 // - A tab character, 0x09, as \t
19 // - A backspace, 0x08, as \b
20 // - A form feed, 0x0C, as \f
21 //
22 // UTF-8 should be used for encoding.
23 //
24 // NOTE: We also escape all other control characters (0x00-0x1F excluding those above)
25 // to ensure valid JSON, even though NIP-01 doesn't require it. This prevents
26 // JSON parsing errors when events with binary data in content are sent to relays.
27 func NostrEscape(dst, src []byte) []byte {
28 l := len(src)
29 // Pre-allocate buffer if nil to reduce reallocations
30 // Estimate: worst case is all control chars which expand to 6 bytes each (\u00XX)
31 // but most strings have few escapes, so estimate len(src) * 1.5 as a safe middle ground
32 if dst == nil && l > 0 {
33 estimatedSize := l * 3 / 2
34 if estimatedSize < l {
35 estimatedSize = l
36 }
37 dst = make([]byte, 0, estimatedSize)
38 }
39 for i := 0; i < l; i++ {
40 c := src[i]
41 if c == '"' {
42 dst = append(dst, '\\', '"')
43 } else if c == '\\' {
44 // if i+1 < l && src[i+1] == 'u' || i+1 < l && src[i+1] == '/' {
45 if i+1 < l && src[i+1] == 'u' {
46 dst = append(dst, '\\')
47 } else {
48 dst = append(dst, '\\', '\\')
49 }
50 } else if c == '\b' {
51 dst = append(dst, '\\', 'b')
52 } else if c == '\t' {
53 dst = append(dst, '\\', 't')
54 } else if c == '\n' {
55 dst = append(dst, '\\', 'n')
56 } else if c == '\f' {
57 dst = append(dst, '\\', 'f')
58 } else if c == '\r' {
59 dst = append(dst, '\\', 'r')
60 } else if c < 32 {
61 // Escape all other control characters (0x00-0x1F except those handled above) as \uXXXX
62 // This ensures valid JSON even when content contains binary data
63 dst = append(dst, '\\', 'u', '0', '0')
64 hexHigh := (c >> 4) & 0x0F
65 hexLow := c & 0x0F
66 if hexHigh < 10 {
67 dst = append(dst, byte('0'+hexHigh))
68 } else {
69 dst = append(dst, byte('a'+(hexHigh-10)))
70 }
71 if hexLow < 10 {
72 dst = append(dst, byte('0'+hexLow))
73 } else {
74 dst = append(dst, byte('a'+(hexLow-10)))
75 }
76 } else {
77 dst = append(dst, c)
78 }
79 }
80 return dst
81 }
82 83 // NostrUnescape reverses the operation of NostrEscape except instead of
84 // appending it to the provided slice, it rewrites it, eliminating a memory
85 // copy. Keep in mind that the original JSON will be mangled by this operation,
86 // but the resultant slices will cost zero allocations.
87 func NostrUnescape(dst []byte) (b []byte) {
88 var r, w int
89 for ; r < len(dst); r++ {
90 if dst[r] == '\\' {
91 r++
92 c := dst[r]
93 switch {
94 95 // nip-01 specifies the following single letter C-style escapes for
96 // control codes under 0x20.
97 //
98 // no others are specified but must be preserved, so only these can
99 // be safely decoded at runtime as they must be re-encoded when
100 // marshalled.
101 case c == '"':
102 dst[w] = '"'
103 w++
104 case c == '\\':
105 dst[w] = '\\'
106 w++
107 case c == 'b':
108 dst[w] = '\b'
109 w++
110 case c == 't':
111 dst[w] = '\t'
112 w++
113 case c == 'n':
114 dst[w] = '\n'
115 w++
116 case c == 'f':
117 dst[w] = '\f'
118 w++
119 case c == 'r':
120 dst[w] = '\r'
121 w++
122 123 // special cases for non-nip-01 specified json escapes (must be
124 // preserved for ID generation).
125 case c == 'u':
126 // Check if this is a \u0000-\u001F sequence we generated
127 if r+4 < len(dst) && dst[r+1] == '0' && dst[r+2] == '0' {
128 // Extract hex digits
129 hexHigh := dst[r+3]
130 hexLow := dst[r+4]
131 132 var val byte
133 if hexHigh >= '0' && hexHigh <= '9' {
134 val = (hexHigh - '0') << 4
135 } else if hexHigh >= 'a' && hexHigh <= 'f' {
136 val = (hexHigh - 'a' + 10) << 4
137 } else if hexHigh >= 'A' && hexHigh <= 'F' {
138 val = (hexHigh - 'A' + 10) << 4
139 }
140 141 if hexLow >= '0' && hexLow <= '9' {
142 val |= hexLow - '0'
143 } else if hexLow >= 'a' && hexLow <= 'f' {
144 val |= hexLow - 'a' + 10
145 } else if hexLow >= 'A' && hexLow <= 'F' {
146 val |= hexLow - 'A' + 10
147 }
148 149 // Only decode if it's a control character (0x00-0x1F)
150 if val < 32 {
151 dst[w] = val
152 w++
153 r += 4 // Skip the u00XX part
154 continue
155 }
156 }
157 // Not our generated \u0000-\u001F, preserve as-is
158 dst[w] = '\\'
159 w++
160 dst[w] = 'u'
161 w++
162 case c == '/':
163 dst[w] = '\\'
164 w++
165 dst[w] = '/'
166 w++
167 168 // special case for octal escapes (must be preserved for ID
169 // generation).
170 case c >= '0' && c <= '9':
171 dst[w] = '\\'
172 w++
173 dst[w] = c
174 w++
175 176 // anything else after a reverse solidus just preserve it.
177 default:
178 dst[w] = dst[r]
179 w++
180 dst[w] = c
181 w++
182 }
183 } else {
184 dst[w] = dst[r]
185 w++
186 }
187 }
188 b = dst[:w]
189 return
190 }
191