bidi.mx raw

   1  // Copyright 2015 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:generate go run gen.go gen_trieval.go gen_ranges.go
   6  
   7  // Package bidi contains functionality for bidirectional text support.
   8  //
   9  // See https://www.unicode.org/reports/tr9.
  10  //
  11  // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
  12  // and without notice.
  13  package bidi // import "golang.org/x/text/unicode/bidi"
  14  
  15  // TODO
  16  // - Transformer for reordering?
  17  // - Transformer (validator, really) for Bidi Rule.
  18  
  19  import (
  20  	"bytes"
  21  )
  22  
  23  // This API tries to avoid dealing with embedding levels for now. Under the hood
  24  // these will be computed, but the question is to which extent the user should
  25  // know they exist. We should at some point allow the user to specify an
  26  // embedding hierarchy, though.
  27  
  28  // A Direction indicates the overall flow of text.
  29  type Direction int
  30  
  31  const (
  32  	// LeftToRight indicates the text contains no right-to-left characters and
  33  	// that either there are some left-to-right characters or the option
  34  	// DefaultDirection(LeftToRight) was passed.
  35  	LeftToRight Direction = iota
  36  
  37  	// RightToLeft indicates the text contains no left-to-right characters and
  38  	// that either there are some right-to-left characters or the option
  39  	// DefaultDirection(RightToLeft) was passed.
  40  	RightToLeft
  41  
  42  	// Mixed indicates text contains both left-to-right and right-to-left
  43  	// characters.
  44  	Mixed
  45  
  46  	// Neutral means that text contains no left-to-right and right-to-left
  47  	// characters and that no default direction has been set.
  48  	Neutral
  49  )
  50  
  51  type options struct {
  52  	defaultDirection Direction
  53  }
  54  
  55  // An Option is an option for Bidi processing.
  56  type Option func(*options)
  57  
  58  // ICU allows the user to define embedding levels. This may be used, for example,
  59  // to use hierarchical structure of markup languages to define embeddings.
  60  // The following option may be a way to expose this functionality in this API.
  61  // // LevelFunc sets a function that associates nesting levels with the given text.
  62  // // The levels function will be called with monotonically increasing values for p.
  63  // func LevelFunc(levels func(p int) int) Option {
  64  // 	panic("unimplemented")
  65  // }
  66  
  67  // DefaultDirection sets the default direction for a Paragraph. The direction is
  68  // overridden if the text contains directional characters.
  69  func DefaultDirection(d Direction) Option {
  70  	return func(opts *options) {
  71  		opts.defaultDirection = d
  72  	}
  73  }
  74  
  75  // A Paragraph holds a single Paragraph for Bidi processing.
  76  type Paragraph struct {
  77  	p          []byte
  78  	o          Ordering
  79  	opts       []Option
  80  	types      []Class
  81  	pairTypes  []bracketType
  82  	pairValues []rune
  83  	runes      []rune
  84  	options    options
  85  }
  86  
  87  // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
  88  // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
  89  // separator (bidi class B).
  90  //
  91  // The function p.Order() needs these values to be set, so this preparation could be postponed.
  92  // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
  93  // separator, the whole input needs to be processed anyway and should not be done twice.
  94  //
  95  // The function has the same return values as SetBytes() / SetString()
  96  func (p *Paragraph) prepareInput() (n int, err error) {
  97  	p.runes = bytes.Runes(p.p)
  98  	bytecount := 0
  99  	// clear slices from previous SetString or SetBytes
 100  	p.pairTypes = nil
 101  	p.pairValues = nil
 102  	p.types = nil
 103  
 104  	for _, r := range p.runes {
 105  		props, i := LookupRune(r)
 106  		bytecount += i
 107  		cls := props.Class()
 108  		if cls == B {
 109  			return bytecount, nil
 110  		}
 111  		p.types = append(p.types, cls)
 112  		if props.IsOpeningBracket() {
 113  			p.pairTypes = append(p.pairTypes, bpOpen)
 114  			p.pairValues = append(p.pairValues, r)
 115  		} else if props.IsBracket() {
 116  			// this must be a closing bracket,
 117  			// since IsOpeningBracket is not true
 118  			p.pairTypes = append(p.pairTypes, bpClose)
 119  			p.pairValues = append(p.pairValues, r)
 120  		} else {
 121  			p.pairTypes = append(p.pairTypes, bpNone)
 122  			p.pairValues = append(p.pairValues, 0)
 123  		}
 124  	}
 125  	return bytecount, nil
 126  }
 127  
 128  // SetBytes configures p for the given paragraph text. It replaces text
 129  // previously set by SetBytes or SetString. If b contains a paragraph separator
 130  // it will only process the first paragraph and report the number of bytes
 131  // consumed from b including this separator. Error may be non-nil if options are
 132  // given.
 133  func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
 134  	p.p = b
 135  	p.opts = opts
 136  	return p.prepareInput()
 137  }
 138  
 139  // SetString configures s for the given paragraph text. It replaces text
 140  // previously set by SetBytes or SetString. If s contains a paragraph separator
 141  // it will only process the first paragraph and report the number of bytes
 142  // consumed from s including this separator. Error may be non-nil if options are
 143  // given.
 144  func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
 145  	p.p = []byte(s)
 146  	p.opts = opts
 147  	return p.prepareInput()
 148  }
 149  
 150  // IsLeftToRight reports whether the principle direction of rendering for this
 151  // paragraphs is left-to-right. If this returns false, the principle direction
 152  // of rendering is right-to-left.
 153  func (p *Paragraph) IsLeftToRight() bool {
 154  	return p.Direction() == LeftToRight
 155  }
 156  
 157  // Direction returns the direction of the text of this paragraph.
 158  //
 159  // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
 160  func (p *Paragraph) Direction() Direction {
 161  	return p.o.Direction()
 162  }
 163  
 164  // TODO: what happens if the position is > len(input)? This should return an error.
 165  
 166  // RunAt reports the Run at the given position of the input text.
 167  //
 168  // This method can be used for computing line breaks on paragraphs.
 169  func (p *Paragraph) RunAt(pos int) Run {
 170  	c := 0
 171  	runNumber := 0
 172  	for i, r := range p.o.runes {
 173  		c += len(r)
 174  		if pos < c {
 175  			runNumber = i
 176  		}
 177  	}
 178  	return p.o.Run(runNumber)
 179  }
 180  
 181  func calculateOrdering(levels []level, runes []rune) Ordering {
 182  	var curDir Direction
 183  
 184  	prevDir := Neutral
 185  	prevI := 0
 186  
 187  	o := Ordering{}
 188  	// lvl = 0,2,4,...: left to right
 189  	// lvl = 1,3,5,...: right to left
 190  	for i, lvl := range levels {
 191  		if lvl%2 == 0 {
 192  			curDir = LeftToRight
 193  		} else {
 194  			curDir = RightToLeft
 195  		}
 196  		if curDir != prevDir {
 197  			if i > 0 {
 198  				o.runes = append(o.runes, runes[prevI:i])
 199  				o.directions = append(o.directions, prevDir)
 200  				o.startpos = append(o.startpos, prevI)
 201  			}
 202  			prevI = i
 203  			prevDir = curDir
 204  		}
 205  	}
 206  	o.runes = append(o.runes, runes[prevI:])
 207  	o.directions = append(o.directions, prevDir)
 208  	o.startpos = append(o.startpos, prevI)
 209  	return o
 210  }
 211  
 212  // Order computes the visual ordering of all the runs in a Paragraph.
 213  func (p *Paragraph) Order() (Ordering, error) {
 214  	if len(p.types) == 0 {
 215  		return Ordering{}, nil
 216  	}
 217  
 218  	for _, fn := range p.opts {
 219  		fn(&p.options)
 220  	}
 221  	lvl := level(-1)
 222  	if p.options.defaultDirection == RightToLeft {
 223  		lvl = 1
 224  	}
 225  	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
 226  	if err != nil {
 227  		return Ordering{}, err
 228  	}
 229  
 230  	levels := para.getLevels([]int{len(p.types)})
 231  
 232  	p.o = calculateOrdering(levels, p.runes)
 233  	return p.o, nil
 234  }
 235  
 236  // Line computes the visual ordering of runs for a single line starting and
 237  // ending at the given positions in the original text.
 238  func (p *Paragraph) Line(start, end int) (Ordering, error) {
 239  	lineTypes := p.types[start:end]
 240  	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
 241  	if err != nil {
 242  		return Ordering{}, err
 243  	}
 244  	levels := para.getLevels([]int{len(lineTypes)})
 245  	o := calculateOrdering(levels, p.runes[start:end])
 246  	return o, nil
 247  }
 248  
 249  // An Ordering holds the computed visual order of runs of a Paragraph. Calling
 250  // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
 251  // The methods of an Ordering should only be called by one goroutine at a time.
 252  type Ordering struct {
 253  	runes      [][]rune
 254  	directions []Direction
 255  	startpos   []int
 256  }
 257  
 258  // Direction reports the directionality of the runs.
 259  //
 260  // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
 261  func (o *Ordering) Direction() Direction {
 262  	return o.directions[0]
 263  }
 264  
 265  // NumRuns returns the number of runs.
 266  func (o *Ordering) NumRuns() int {
 267  	return len(o.runes)
 268  }
 269  
 270  // Run returns the ith run within the ordering.
 271  func (o *Ordering) Run(i int) Run {
 272  	r := Run{
 273  		runes:     o.runes[i],
 274  		direction: o.directions[i],
 275  		startpos:  o.startpos[i],
 276  	}
 277  	return r
 278  }
 279  
 280  // TODO: perhaps with options.
 281  // // Reorder creates a reader that reads the runes in visual order per character.
 282  // // Modifiers remain after the runes they modify.
 283  // func (l *Runs) Reorder() io.Reader {
 284  // 	panic("unimplemented")
 285  // }
 286  
 287  // A Run is a continuous sequence of characters of a single direction.
 288  type Run struct {
 289  	runes     []rune
 290  	direction Direction
 291  	startpos  int
 292  }
 293  
 294  // String returns the text of the run in its original order.
 295  func (r *Run) String() string {
 296  	return string(r.runes)
 297  }
 298  
 299  // Bytes returns the text of the run in its original order.
 300  func (r *Run) Bytes() []byte {
 301  	return []byte(r.String())
 302  }
 303  
 304  // TODO: methods for
 305  // - Display order
 306  // - headers and footers
 307  // - bracket replacement.
 308  
 309  // Direction reports the direction of the run.
 310  func (r *Run) Direction() Direction {
 311  	return r.direction
 312  }
 313  
 314  // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
 315  // originating Paragraph value.
 316  func (r *Run) Pos() (start, end int) {
 317  	return r.startpos, r.startpos + len(r.runes) - 1
 318  }
 319  
 320  // AppendReverse reverses the order of characters of in, appends them to out,
 321  // and returns the result. Modifiers will still follow the runes they modify.
 322  // Brackets are replaced with their counterparts.
 323  func AppendReverse(out, in []byte) []byte {
 324  	ret := []byte{:len(in)+len(out)}
 325  	copy(ret, out)
 326  	inRunes := bytes.Runes(in)
 327  
 328  	for i, r := range inRunes {
 329  		prop, _ := LookupRune(r)
 330  		if prop.IsBracket() {
 331  			inRunes[i] = prop.reverseBracket(r)
 332  		}
 333  	}
 334  
 335  	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
 336  		inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
 337  	}
 338  	copy(ret[len(out):], string(inRunes))
 339  
 340  	return ret
 341  }
 342  
 343  // ReverseString reverses the order of characters in s and returns a new string.
 344  // Modifiers will still follow the runes they modify. Brackets are replaced with
 345  // their counterparts.
 346  func ReverseString(s string) string {
 347  	input := []rune(s)
 348  	li := len(input)
 349  	ret := []rune{:li}
 350  	for i, r := range input {
 351  		prop, _ := LookupRune(r)
 352  		if prop.IsBracket() {
 353  			ret[li-i-1] = prop.reverseBracket(r)
 354  		} else {
 355  			ret[li-i-1] = r
 356  		}
 357  	}
 358  	return string(ret)
 359  }
 360