punycode.mx raw
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package cookiejar
6
7 // This file implements the Punycode algorithm from RFC 3492.
8
9 import (
10 "fmt"
11 "net/http/internal/ascii"
12 "bytes"
13 "unicode/utf8"
14 )
15
16 // These parameter values are specified in section 5.
17 //
18 // All computation is done with int32s, so that overflow behavior is identical
19 // regardless of whether int is 32-bit or 64-bit.
20 const (
21 base int32 = 36
22 damp int32 = 700
23 initialBias int32 = 72
24 initialN int32 = 128
25 skew int32 = 38
26 tmax int32 = 26
27 tmin int32 = 1
28 )
29
30 // encode encodes a string as specified in section 6.3 and prepends prefix to
31 // the result.
32 //
33 // The "while h < length(input)" line in the specification becomes "for
34 // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
35 func encode(prefix, s string) (string, error) {
36 output := []byte{:len(prefix):len(prefix)+1+2*len(s)}
37 copy(output, prefix)
38 delta, n, bias := int32(0), initialN, initialBias
39 b, remaining := int32(0), int32(0)
40 for _, r := range s {
41 if r < utf8.RuneSelf {
42 b++
43 output = append(output, byte(r))
44 } else {
45 remaining++
46 }
47 }
48 h := b
49 if b > 0 {
50 output = append(output, '-')
51 }
52 for remaining != 0 {
53 m := int32(0x7fffffff)
54 for _, r := range s {
55 if m > r && r >= n {
56 m = r
57 }
58 }
59 delta += (m - n) * (h + 1)
60 if delta < 0 {
61 return "", fmt.Errorf("cookiejar: invalid label %q", s)
62 }
63 n = m
64 for _, r := range s {
65 if r < n {
66 delta++
67 if delta < 0 {
68 return "", fmt.Errorf("cookiejar: invalid label %q", s)
69 }
70 continue
71 }
72 if r > n {
73 continue
74 }
75 q := delta
76 for k := base; ; k += base {
77 t := k - bias
78 if t < tmin {
79 t = tmin
80 } else if t > tmax {
81 t = tmax
82 }
83 if q < t {
84 break
85 }
86 output = append(output, encodeDigit(t+(q-t)%(base-t)))
87 q = (q - t) / (base - t)
88 }
89 output = append(output, encodeDigit(q))
90 bias = adapt(delta, h+1, h == b)
91 delta = 0
92 h++
93 remaining--
94 }
95 delta++
96 n++
97 }
98 return string(output), nil
99 }
100
101 func encodeDigit(digit int32) byte {
102 switch {
103 case 0 <= digit && digit < 26:
104 return byte(digit + 'a')
105 case 26 <= digit && digit < 36:
106 return byte(digit + ('0' - 26))
107 }
108 panic("cookiejar: internal error in punycode encoding")
109 }
110
111 // adapt is the bias adaptation function specified in section 6.1.
112 func adapt(delta, numPoints int32, firstTime bool) int32 {
113 if firstTime {
114 delta /= damp
115 } else {
116 delta /= 2
117 }
118 delta += delta / numPoints
119 k := int32(0)
120 for delta > ((base-tmin)*tmax)/2 {
121 delta /= base - tmin
122 k += base
123 }
124 return k + (base-tmin+1)*delta/(delta+skew)
125 }
126
127 // Strictly speaking, the remaining code below deals with IDNA (RFC 5890 and
128 // friends) and not Punycode (RFC 3492) per se.
129
130 // acePrefix is the ASCII Compatible Encoding prefix.
131 const acePrefix = "xn--"
132
133 // toASCII converts a domain or domain label to its ASCII form. For example,
134 // toASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
135 // toASCII("golang") is "golang".
136 func toASCII(s string) (string, error) {
137 if ascii.Is(s) {
138 return s, nil
139 }
140 labels := bytes.Split(s, ".")
141 for i, label := range labels {
142 if !ascii.Is(label) {
143 a, err := encode(acePrefix, label)
144 if err != nil {
145 return "", err
146 }
147 labels[i] = a
148 }
149 }
150 return bytes.Join(labels, "."), nil
151 }
152