1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:generate go run gen.go gen_trieval.go gen_ranges.go
6 7 // Package bidi contains functionality for bidirectional text support.
8 //
9 // See https://www.unicode.org/reports/tr9.
10 //
11 // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
12 // and without notice.
13 package bidi // import "golang.org/x/text/unicode/bidi"
14 15 // TODO
16 // - Transformer for reordering?
17 // - Transformer (validator, really) for Bidi Rule.
18 19 import (
20 "bytes"
21 )
22 23 // This API tries to avoid dealing with embedding levels for now. Under the hood
24 // these will be computed, but the question is to which extent the user should
25 // know they exist. We should at some point allow the user to specify an
26 // embedding hierarchy, though.
27 28 // A Direction indicates the overall flow of text.
29 type Direction int
30 31 const (
32 // LeftToRight indicates the text contains no right-to-left characters and
33 // that either there are some left-to-right characters or the option
34 // DefaultDirection(LeftToRight) was passed.
35 LeftToRight Direction = iota
36 37 // RightToLeft indicates the text contains no left-to-right characters and
38 // that either there are some right-to-left characters or the option
39 // DefaultDirection(RightToLeft) was passed.
40 RightToLeft
41 42 // Mixed indicates text contains both left-to-right and right-to-left
43 // characters.
44 Mixed
45 46 // Neutral means that text contains no left-to-right and right-to-left
47 // characters and that no default direction has been set.
48 Neutral
49 )
50 51 type options struct {
52 defaultDirection Direction
53 }
54 55 // An Option is an option for Bidi processing.
56 type Option func(*options)
57 58 // ICU allows the user to define embedding levels. This may be used, for example,
59 // to use hierarchical structure of markup languages to define embeddings.
60 // The following option may be a way to expose this functionality in this API.
61 // // LevelFunc sets a function that associates nesting levels with the given text.
62 // // The levels function will be called with monotonically increasing values for p.
63 // func LevelFunc(levels func(p int) int) Option {
64 // panic("unimplemented")
65 // }
66 67 // DefaultDirection sets the default direction for a Paragraph. The direction is
68 // overridden if the text contains directional characters.
69 func DefaultDirection(d Direction) Option {
70 return func(opts *options) {
71 opts.defaultDirection = d
72 }
73 }
74 75 // A Paragraph holds a single Paragraph for Bidi processing.
76 type Paragraph struct {
77 p []byte
78 o Ordering
79 opts []Option
80 types []Class
81 pairTypes []bracketType
82 pairValues []rune
83 runes []rune
84 options options
85 }
86 87 // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
88 // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
89 // separator (bidi class B).
90 //
91 // The function p.Order() needs these values to be set, so this preparation could be postponed.
92 // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
93 // separator, the whole input needs to be processed anyway and should not be done twice.
94 //
95 // The function has the same return values as SetBytes() / SetString()
96 func (p *Paragraph) prepareInput() (n int, err error) {
97 p.runes = bytes.Runes(p.p)
98 bytecount := 0
99 // clear slices from previous SetString or SetBytes
100 p.pairTypes = nil
101 p.pairValues = nil
102 p.types = nil
103 104 for _, r := range p.runes {
105 props, i := LookupRune(r)
106 bytecount += i
107 cls := props.Class()
108 if cls == B {
109 return bytecount, nil
110 }
111 p.types = append(p.types, cls)
112 if props.IsOpeningBracket() {
113 p.pairTypes = append(p.pairTypes, bpOpen)
114 p.pairValues = append(p.pairValues, r)
115 } else if props.IsBracket() {
116 // this must be a closing bracket,
117 // since IsOpeningBracket is not true
118 p.pairTypes = append(p.pairTypes, bpClose)
119 p.pairValues = append(p.pairValues, r)
120 } else {
121 p.pairTypes = append(p.pairTypes, bpNone)
122 p.pairValues = append(p.pairValues, 0)
123 }
124 }
125 return bytecount, nil
126 }
127 128 // SetBytes configures p for the given paragraph text. It replaces text
129 // previously set by SetBytes or SetString. If b contains a paragraph separator
130 // it will only process the first paragraph and report the number of bytes
131 // consumed from b including this separator. Error may be non-nil if options are
132 // given.
133 func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
134 p.p = b
135 p.opts = opts
136 return p.prepareInput()
137 }
138 139 // SetString configures s for the given paragraph text. It replaces text
140 // previously set by SetBytes or SetString. If s contains a paragraph separator
141 // it will only process the first paragraph and report the number of bytes
142 // consumed from s including this separator. Error may be non-nil if options are
143 // given.
144 func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
145 p.p = []byte(s)
146 p.opts = opts
147 return p.prepareInput()
148 }
149 150 // IsLeftToRight reports whether the principle direction of rendering for this
151 // paragraphs is left-to-right. If this returns false, the principle direction
152 // of rendering is right-to-left.
153 func (p *Paragraph) IsLeftToRight() bool {
154 return p.Direction() == LeftToRight
155 }
156 157 // Direction returns the direction of the text of this paragraph.
158 //
159 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
160 func (p *Paragraph) Direction() Direction {
161 return p.o.Direction()
162 }
163 164 // TODO: what happens if the position is > len(input)? This should return an error.
165 166 // RunAt reports the Run at the given position of the input text.
167 //
168 // This method can be used for computing line breaks on paragraphs.
169 func (p *Paragraph) RunAt(pos int) Run {
170 c := 0
171 runNumber := 0
172 for i, r := range p.o.runes {
173 c += len(r)
174 if pos < c {
175 runNumber = i
176 }
177 }
178 return p.o.Run(runNumber)
179 }
180 181 func calculateOrdering(levels []level, runes []rune) Ordering {
182 var curDir Direction
183 184 prevDir := Neutral
185 prevI := 0
186 187 o := Ordering{}
188 // lvl = 0,2,4,...: left to right
189 // lvl = 1,3,5,...: right to left
190 for i, lvl := range levels {
191 if lvl%2 == 0 {
192 curDir = LeftToRight
193 } else {
194 curDir = RightToLeft
195 }
196 if curDir != prevDir {
197 if i > 0 {
198 o.runes = append(o.runes, runes[prevI:i])
199 o.directions = append(o.directions, prevDir)
200 o.startpos = append(o.startpos, prevI)
201 }
202 prevI = i
203 prevDir = curDir
204 }
205 }
206 o.runes = append(o.runes, runes[prevI:])
207 o.directions = append(o.directions, prevDir)
208 o.startpos = append(o.startpos, prevI)
209 return o
210 }
211 212 // Order computes the visual ordering of all the runs in a Paragraph.
213 func (p *Paragraph) Order() (Ordering, error) {
214 if len(p.types) == 0 {
215 return Ordering{}, nil
216 }
217 218 for _, fn := range p.opts {
219 fn(&p.options)
220 }
221 lvl := level(-1)
222 if p.options.defaultDirection == RightToLeft {
223 lvl = 1
224 }
225 para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
226 if err != nil {
227 return Ordering{}, err
228 }
229 230 levels := para.getLevels([]int{len(p.types)})
231 232 p.o = calculateOrdering(levels, p.runes)
233 return p.o, nil
234 }
235 236 // Line computes the visual ordering of runs for a single line starting and
237 // ending at the given positions in the original text.
238 func (p *Paragraph) Line(start, end int) (Ordering, error) {
239 lineTypes := p.types[start:end]
240 para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
241 if err != nil {
242 return Ordering{}, err
243 }
244 levels := para.getLevels([]int{len(lineTypes)})
245 o := calculateOrdering(levels, p.runes[start:end])
246 return o, nil
247 }
248 249 // An Ordering holds the computed visual order of runs of a Paragraph. Calling
250 // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
251 // The methods of an Ordering should only be called by one goroutine at a time.
252 type Ordering struct {
253 runes [][]rune
254 directions []Direction
255 startpos []int
256 }
257 258 // Direction reports the directionality of the runs.
259 //
260 // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
261 func (o *Ordering) Direction() Direction {
262 return o.directions[0]
263 }
264 265 // NumRuns returns the number of runs.
266 func (o *Ordering) NumRuns() int {
267 return len(o.runes)
268 }
269 270 // Run returns the ith run within the ordering.
271 func (o *Ordering) Run(i int) Run {
272 r := Run{
273 runes: o.runes[i],
274 direction: o.directions[i],
275 startpos: o.startpos[i],
276 }
277 return r
278 }
279 280 // TODO: perhaps with options.
281 // // Reorder creates a reader that reads the runes in visual order per character.
282 // // Modifiers remain after the runes they modify.
283 // func (l *Runs) Reorder() io.Reader {
284 // panic("unimplemented")
285 // }
286 287 // A Run is a continuous sequence of characters of a single direction.
288 type Run struct {
289 runes []rune
290 direction Direction
291 startpos int
292 }
293 294 // String returns the text of the run in its original order.
295 func (r *Run) String() string {
296 return string(r.runes)
297 }
298 299 // Bytes returns the text of the run in its original order.
300 func (r *Run) Bytes() []byte {
301 return []byte(r.String())
302 }
303 304 // TODO: methods for
305 // - Display order
306 // - headers and footers
307 // - bracket replacement.
308 309 // Direction reports the direction of the run.
310 func (r *Run) Direction() Direction {
311 return r.direction
312 }
313 314 // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
315 // originating Paragraph value.
316 func (r *Run) Pos() (start, end int) {
317 return r.startpos, r.startpos + len(r.runes) - 1
318 }
319 320 // AppendReverse reverses the order of characters of in, appends them to out,
321 // and returns the result. Modifiers will still follow the runes they modify.
322 // Brackets are replaced with their counterparts.
323 func AppendReverse(out, in []byte) []byte {
324 ret := []byte{:len(in)+len(out)}
325 copy(ret, out)
326 inRunes := bytes.Runes(in)
327 328 for i, r := range inRunes {
329 prop, _ := LookupRune(r)
330 if prop.IsBracket() {
331 inRunes[i] = prop.reverseBracket(r)
332 }
333 }
334 335 for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
336 inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
337 }
338 copy(ret[len(out):], string(inRunes))
339 340 return ret
341 }
342 343 // ReverseString reverses the order of characters in s and returns a new string.
344 // Modifiers will still follow the runes they modify. Brackets are replaced with
345 // their counterparts.
346 func ReverseString(s string) string {
347 input := []rune(s)
348 li := len(input)
349 ret := []rune{:li}
350 for i, r := range input {
351 prop, _ := LookupRune(r)
352 if prop.IsBracket() {
353 ret[li-i-1] = prop.reverseBracket(r)
354 } else {
355 ret[li-i-1] = r
356 }
357 }
358 return string(ret)
359 }
360