cordbscs.c raw
1 /*
2 * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3 *
4 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6 *
7 * Permission is hereby granted to use or copy this program
8 * for any purpose, provided the above notices are retained on all copies.
9 * Permission to modify the code and to distribute modified code is granted,
10 * provided the above notices are retained, and a notice that the code was
11 * modified is included with the above copyright notice.
12 */
13
14 #ifdef HAVE_CONFIG_H
15 # include "config.h"
16 #endif
17 #ifndef CORD_BUILD
18 # define CORD_BUILD
19 #endif
20
21 #include "gc/gc.h"
22
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "gc/cord.h"
27
28 /*
29 * An implementation of the `cord` primitives. These are the only
30 * functions that understand the representation. We perform only
31 * minimal checks on arguments to these functions. Out of bounds
32 * arguments to the iteration functions may result in client functions
33 * invoked on garbage data. In most cases, client functions should be
34 * programmed defensively enough that this does not result in memory
35 * smashes.
36 */
37
38 CORD_oom_fn_t CORD_oom_fn = 0;
39
40 void
41 CORD_set_oom_fn(CORD_oom_fn_t fn)
42 {
43 CORD_oom_fn = fn;
44 }
45
46 CORD_oom_fn_t
47 CORD_get_oom_fn(void)
48 {
49 return CORD_oom_fn;
50 }
51
52 void
53 CORD__call_oom_fn(void)
54 {
55 if (CORD_oom_fn != 0)
56 (*CORD_oom_fn)();
57 }
58
59 #define OUT_OF_MEMORY \
60 { \
61 CORD__call_oom_fn(); \
62 ABORT("Out of memory"); \
63 }
64
65 #define ABORT(msg) \
66 { \
67 fprintf(stderr, "%s\n", msg); \
68 abort(); \
69 }
70
71 /* A structure representing a concatenation of two strings.
72 * It is assumed that both of them are not empty.
73 */
74 struct Concatenation {
75 CORD left;
76 CORD right;
77 };
78
79 struct Function {
80 CORD_fn fn;
81 void *client_data;
82 };
83
84 struct Generic {
85 char nul;
86 char header;
87 /* Concatenation nesting depth; 0 for function. */
88 char depth;
89 /*
90 * Length of left-concatenated child if it is sufficiently short;
91 * 0 otherwise.
92 */
93 unsigned char left_len;
94 unsigned long len;
95 };
96
97 union ConcatOrFunc {
98 struct Concatenation concat;
99 struct Function function;
100 };
101
102 typedef struct {
103 struct Generic generic;
104 union ConcatOrFunc data;
105 } CordRep;
106
107 #define CONCAT_HDR 1
108
109 #define FN_HDR 4
110
111 /*
112 * Substring nodes are a special case of function nodes.
113 * The `client_data` field is known to point to a `substr_args`
114 * structure, and the function is either `CORD_apply_access_fn`
115 * or `CORD_index_access_fn`.
116 */
117 #define SUBSTR_HDR 6
118
119 /* The following may be applied only to function and concatenation nodes: */
120 #define IS_CONCATENATION(s) \
121 (((const CordRep *)s)->generic.header == CONCAT_HDR)
122
123 #define IS_FUNCTION(s) ((((const CordRep *)s)->generic.header & FN_HDR) != 0)
124
125 #define IS_SUBSTR(s) (((const CordRep *)s)->generic.header == SUBSTR_HDR)
126
127 #define LEN(s) (((const CordRep *)s)->generic.len)
128 #define DEPTH(s) (((const CordRep *)s)->generic.depth)
129 #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
130
131 #define MAX_LEFT_LEN 255
132
133 #define LEFT_LEN(s) \
134 (((const CordRep *)s)->generic.left_len != 0 \
135 ? ((const CordRep *)s)->generic.left_len \
136 : (CORD_IS_STRING(((const CordRep *)s)->data.concat.left) \
137 ? ((const CordRep *)s)->generic.len \
138 - GEN_LEN(((const CordRep *)s)->data.concat.right) \
139 : LEN(((const CordRep *)s)->data.concat.left)))
140
141 /* Cords shorter than this are C strings. */
142 #define SHORT_LIMIT (sizeof(CordRep) - 1)
143
144 /*
145 * Dump the internal representation of `x` to `stdout`, with initial
146 * indentation level `n`.
147 */
148 static void
149 CORD_dump_inner(CORD x, unsigned n)
150 {
151 size_t i;
152
153 for (i = 0; i < (size_t)n; i++) {
154 fputs(" ", stdout);
155 }
156 if (x == 0) {
157 fputs("NIL\n", stdout);
158 } else if (CORD_IS_STRING(x)) {
159 for (i = 0; i <= SHORT_LIMIT; i++) {
160 if (x[i] == '\0')
161 break;
162 putchar(x[i]);
163 }
164 if (x[i] != '\0')
165 fputs("...", stdout);
166 putchar('\n');
167 } else if (IS_CONCATENATION(x)) {
168 const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
169
170 printf("Concatenation: %p (len: %d, depth: %d)\n", (const void *)x,
171 (int)LEN(x), (int)DEPTH(x));
172 CORD_dump_inner(conc->left, n + 1);
173 CORD_dump_inner(conc->right, n + 1);
174 } else /* function */ {
175 const struct Function *f = &((const CordRep *)x)->data.function;
176 size_t lim = (size_t)LEN(x);
177
178 if (IS_SUBSTR(x))
179 printf("(Substring) ");
180 printf("Function: %p (len: %d): ", (const void *)x, (int)lim);
181 for (i = 0; i < 20 && i < lim; i++) {
182 putchar(f->fn(i, f->client_data));
183 }
184 if (i < lim)
185 fputs("...", stdout);
186 putchar('\n');
187 }
188 }
189
190 void
191 CORD_dump(CORD x)
192 {
193 CORD_dump_inner(x, 0);
194 fflush(stdout);
195 }
196
197 CORD
198 CORD_cat_char_star(CORD x, const char *y, size_t leny)
199 {
200 size_t result_len;
201 size_t lenx;
202 int depth;
203
204 if (x == CORD_EMPTY)
205 return y;
206 if (leny == 0)
207 return x;
208 if (CORD_IS_STRING(x)) {
209 lenx = strlen(x);
210 result_len = lenx + leny;
211 if (result_len <= SHORT_LIMIT) {
212 char *result = (char *)GC_MALLOC_ATOMIC(result_len + 1);
213
214 if (NULL == result)
215 OUT_OF_MEMORY;
216 #ifdef LINT2
217 memcpy(result, x, lenx + 1);
218 #else
219 /*
220 * No need to copy the terminating zero as `result[lenx]` is
221 * written below.
222 */
223 memcpy(result, x, lenx);
224 #endif
225 memcpy(result + lenx, y, leny);
226 result[result_len] = '\0';
227 return (CORD)result;
228 } else {
229 depth = 1;
230 }
231 } else {
232 CORD right;
233 CORD left;
234 char *new_right;
235
236 lenx = LEN(x);
237
238 if (leny <= SHORT_LIMIT / 2 && IS_CONCATENATION(x)
239 && CORD_IS_STRING(right = ((const CordRep *)x)->data.concat.right)) {
240 size_t right_len;
241
242 /* Merge `y` into right part of `x`. */
243 left = ((const CordRep *)x)->data.concat.left;
244 if (!CORD_IS_STRING(left)) {
245 right_len = lenx - LEN(left);
246 } else if (((const CordRep *)x)->generic.left_len != 0) {
247 right_len = lenx - ((const CordRep *)x)->generic.left_len;
248 } else {
249 right_len = strlen(right);
250 }
251 result_len = right_len + leny; /*< length of `new_right` */
252 if (result_len <= SHORT_LIMIT) {
253 new_right = (char *)GC_MALLOC_ATOMIC(result_len + 1);
254 if (new_right == 0)
255 OUT_OF_MEMORY;
256 memcpy(new_right, right, right_len);
257 memcpy(new_right + right_len, y, leny);
258 new_right[result_len] = '\0';
259 y = new_right;
260 leny = result_len;
261 x = left;
262 lenx -= right_len;
263 /* Now fall through to concatenate the two pieces: */
264 }
265 if (CORD_IS_STRING(x)) {
266 depth = 1;
267 } else {
268 depth = DEPTH(x) + 1;
269 }
270 } else {
271 depth = DEPTH(x) + 1;
272 }
273 result_len = lenx + leny;
274 }
275 {
276 /* The general case; `lenx` and `result_len` are known. */
277 CordRep *result = GC_NEW(CordRep);
278
279 if (NULL == result)
280 OUT_OF_MEMORY;
281 result->generic.header = CONCAT_HDR;
282 result->generic.depth = (char)depth;
283 if (lenx <= MAX_LEFT_LEN)
284 result->generic.left_len = (unsigned char)lenx;
285 result->generic.len = (unsigned long)result_len;
286 result->data.concat.left = x;
287 GC_PTR_STORE_AND_DIRTY((/* no const */ void *)&result->data.concat.right,
288 y);
289 GC_reachable_here(x);
290 if (depth >= CORD_MAX_DEPTH) {
291 return CORD_balance((CORD)result);
292 } else {
293 return (CORD)result;
294 }
295 }
296 }
297
298 CORD
299 CORD_cat(CORD x, CORD y)
300 {
301 size_t result_len;
302 int depth;
303 size_t lenx;
304
305 if (x == CORD_EMPTY)
306 return y;
307 if (y == CORD_EMPTY)
308 return x;
309 if (CORD_IS_STRING(y)) {
310 return CORD_cat_char_star(x, y, strlen(y));
311 } else if (CORD_IS_STRING(x)) {
312 lenx = strlen(x);
313 depth = DEPTH(y) + 1;
314 } else {
315 int depthy = DEPTH(y);
316
317 lenx = LEN(x);
318 depth = DEPTH(x) + 1;
319 if (depthy >= depth)
320 depth = depthy + 1;
321 }
322 result_len = lenx + LEN(y);
323 {
324 CordRep *result = GC_NEW(CordRep);
325
326 if (NULL == result)
327 OUT_OF_MEMORY;
328 result->generic.header = CONCAT_HDR;
329 result->generic.depth = (char)depth;
330 if (lenx <= MAX_LEFT_LEN)
331 result->generic.left_len = (unsigned char)lenx;
332 result->generic.len = (unsigned long)result_len;
333 result->data.concat.left = x;
334 GC_PTR_STORE_AND_DIRTY((/* no const */ void *)&result->data.concat.right,
335 y);
336 GC_reachable_here(x);
337 if (depth >= CORD_MAX_DEPTH) {
338 return CORD_balance((CORD)result);
339 } else {
340 return (CORD)result;
341 }
342 }
343 }
344
345 static CordRep *
346 CORD_from_fn_inner(CORD_fn fn, void *client_data, size_t len)
347 {
348 if (0 == len)
349 return NULL;
350 if (len <= SHORT_LIMIT) {
351 char *result;
352 size_t i;
353 char buf[SHORT_LIMIT + 1];
354
355 for (i = 0; i < len; i++) {
356 char c = fn(i, client_data);
357
358 if (c == '\0')
359 goto gen_case;
360 buf[i] = c;
361 }
362
363 result = (char *)GC_MALLOC_ATOMIC(len + 1);
364 if (NULL == result)
365 OUT_OF_MEMORY;
366 memcpy(result, buf, len);
367 result[len] = '\0';
368 return (CordRep *)result;
369 }
370 gen_case:
371 {
372 CordRep *result = GC_NEW(CordRep);
373
374 if (NULL == result)
375 OUT_OF_MEMORY;
376 result->generic.header = FN_HDR;
377 /* The depth is already zero. */
378 result->generic.len = (unsigned long)len;
379 result->data.function.fn = fn;
380 GC_PTR_STORE_AND_DIRTY(&result->data.function.client_data, client_data);
381 return result;
382 }
383 }
384
385 CORD
386 CORD_from_fn(CORD_fn fn, void *client_data, size_t len)
387 {
388 return (/* const */ CORD)CORD_from_fn_inner(fn, client_data, len);
389 }
390
391 size_t
392 CORD_len(CORD x)
393 {
394 return x == 0 ? 0 : GEN_LEN(x);
395 }
396
397 struct substr_args {
398 CordRep *sa_cord;
399 size_t sa_index;
400 };
401
402 static char
403 CORD_index_access_fn(size_t i, void *client_data)
404 {
405 struct substr_args *descr = (struct substr_args *)client_data;
406
407 return ((char *)descr->sa_cord)[i + descr->sa_index];
408 }
409
410 static char
411 CORD_apply_access_fn(size_t i, void *client_data)
412 {
413 struct substr_args *descr = (struct substr_args *)client_data;
414 struct Function *fn_cord = &descr->sa_cord->data.function;
415
416 return fn_cord->fn(i + descr->sa_index, fn_cord->client_data);
417 }
418
419 /*
420 * A variant of `CORD_substr` that simply returns a function node,
421 * thus postponing its work. `f` argument is a function that may
422 * be used for efficient access to the `i`-th character.
423 * Assumes `i >= 0` and `i + n < CORD_len(x)`.
424 */
425 static CORD
426 CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
427 {
428 struct substr_args *sa = GC_NEW(struct substr_args);
429 CordRep *result;
430
431 if (sa == 0)
432 OUT_OF_MEMORY;
433 sa->sa_index = i;
434 GC_PTR_STORE_AND_DIRTY(&sa->sa_cord, x);
435 result = CORD_from_fn_inner(f, sa, n);
436 if ((CORD)result != CORD_EMPTY && 0 == result->generic.nul)
437 result->generic.header = SUBSTR_HDR;
438 return (CORD)result;
439 }
440
441 /*
442 * Substrings of function nodes and flat strings shorter than
443 * this are flat strings. Otherwise we use a functional
444 * representation, which is significantly slower to access.
445 */
446 #define SUBSTR_LIMIT (10 * SHORT_LIMIT)
447
448 /*
449 * A variant of `CORD_substr` that assumes `i >= 0`, `n > 0` and
450 * `i + n < CORD_len(x)`.
451 */
452 static CORD
453 CORD_substr_checked(CORD x, size_t i, size_t n)
454 {
455 if (CORD_IS_STRING(x)) {
456 if (n > SUBSTR_LIMIT) {
457 return CORD_substr_closure(x, i, n, CORD_index_access_fn);
458 } else {
459 char *result = (char *)GC_MALLOC_ATOMIC(n + 1);
460
461 if (NULL == result)
462 OUT_OF_MEMORY;
463 strncpy(result, x + i, n);
464 result[n] = '\0';
465 return result;
466 }
467 } else if (IS_CONCATENATION(x)) {
468 const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
469 size_t left_len = LEFT_LEN(x);
470 size_t right_len = (size_t)LEN(x) - left_len;
471
472 if (i >= left_len) {
473 if (n == right_len)
474 return conc->right;
475 return CORD_substr_checked(conc->right, i - left_len, n);
476 } else if (i + n <= left_len) {
477 if (n == left_len)
478 return conc->left;
479 return CORD_substr_checked(conc->left, i, n);
480 } else {
481 /* Need at least one character from each side. */
482 CORD left_part;
483 CORD right_part;
484 size_t left_part_len = left_len - i;
485
486 if (i == 0) {
487 left_part = conc->left;
488 } else {
489 left_part = CORD_substr_checked(conc->left, i, left_part_len);
490 }
491 if (i + n == right_len + left_len) {
492 right_part = conc->right;
493 } else {
494 right_part = CORD_substr_checked(conc->right, 0, n - left_part_len);
495 }
496 return CORD_cat(left_part, right_part);
497 }
498 } else /* function */ {
499 if (n > SUBSTR_LIMIT) {
500 if (IS_SUBSTR(x)) {
501 /* Avoid nesting substring nodes. */
502 const struct Function *f = &((const CordRep *)x)->data.function;
503 const struct substr_args *descr = (struct substr_args *)f->client_data;
504
505 return CORD_substr_closure((CORD)descr->sa_cord, i + descr->sa_index,
506 n, f->fn);
507 } else {
508 return CORD_substr_closure(x, i, n, CORD_apply_access_fn);
509 }
510 } else {
511 char *result;
512 const struct Function *f = &((const CordRep *)x)->data.function;
513 char buf[SUBSTR_LIMIT + 1];
514 char *p = buf;
515 size_t j;
516 size_t lim = i + n;
517
518 for (j = i; j < lim; j++) {
519 char c = f->fn(j, f->client_data);
520
521 if (c == '\0') {
522 return CORD_substr_closure(x, i, n, CORD_apply_access_fn);
523 }
524 *p++ = c;
525 }
526 result = (char *)GC_MALLOC_ATOMIC(n + 1);
527 if (NULL == result)
528 OUT_OF_MEMORY;
529 memcpy(result, buf, n);
530 result[n] = '\0';
531 return result;
532 }
533 }
534 }
535
536 CORD
537 CORD_substr(CORD x, size_t i, size_t n)
538 {
539 size_t len = CORD_len(x);
540
541 if (i >= len || 0 == n)
542 return 0;
543 if (i + n > len)
544 n = len - i;
545 return CORD_substr_checked(x, i, n);
546 }
547
548 int
549 CORD_iter5(CORD x, size_t i, CORD_iter_fn f1, CORD_batched_iter_fn f2,
550 void *client_data)
551 {
552 if (0 == x)
553 return 0;
554 if (CORD_IS_STRING(x)) {
555 const char *p = x + i;
556
557 if (*p == '\0')
558 ABORT("2nd arg to CORD_iter5 too big");
559 if (f2 != CORD_NO_FN) {
560 return f2(p, client_data);
561 } else {
562 while (*p) {
563 if (f1(*p, client_data))
564 return 1;
565 p++;
566 }
567 return 0;
568 }
569 } else if (IS_CONCATENATION(x)) {
570 const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
571
572 if (i > 0) {
573 size_t left_len = LEFT_LEN(x);
574
575 if (i >= left_len) {
576 return CORD_iter5(conc->right, i - left_len, f1, f2, client_data);
577 }
578 }
579 if (CORD_iter5(conc->left, i, f1, f2, client_data)) {
580 return 1;
581 }
582 return CORD_iter5(conc->right, 0, f1, f2, client_data);
583 } else /* function */ {
584 const struct Function *f = &((const CordRep *)x)->data.function;
585 size_t j;
586 size_t lim = (size_t)LEN(x);
587
588 for (j = i; j < lim; j++) {
589 if (f1(f->fn(j, f->client_data), client_data)) {
590 return 1;
591 }
592 }
593 return 0;
594 }
595 }
596
597 #undef CORD_iter
598 int
599 CORD_iter(CORD x, CORD_iter_fn f1, void *client_data)
600 {
601 return CORD_iter5(x, 0, f1, CORD_NO_FN, client_data);
602 }
603
604 int
605 CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void *client_data)
606 {
607 if (0 == x)
608 return 0;
609 if (CORD_IS_STRING(x)) {
610 const char *p = x + i;
611
612 for (;;) {
613 char c = *p;
614
615 if (c == '\0')
616 ABORT("2nd arg to CORD_riter4 too big");
617 if (f1(c, client_data))
618 return 1;
619 if (p == x)
620 break;
621 p--;
622 }
623 } else if (IS_CONCATENATION(x)) {
624 const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
625 CORD left_part = conc->left;
626 size_t left_len = LEFT_LEN(x);
627
628 if (i >= left_len) {
629 if (CORD_riter4(conc->right, i - left_len, f1, client_data)) {
630 return 1;
631 }
632 return CORD_riter4(left_part, left_len - 1, f1, client_data);
633 } else {
634 return CORD_riter4(left_part, i, f1, client_data);
635 }
636 } else /* function */ {
637 const struct Function *f = &((const CordRep *)x)->data.function;
638 size_t j;
639
640 for (j = i;; j--) {
641 if (f1(f->fn(j, f->client_data), client_data)) {
642 return 1;
643 }
644 if (0 == j)
645 break;
646 }
647 }
648 return 0;
649 }
650
651 int
652 CORD_riter(CORD x, CORD_iter_fn f1, void *client_data)
653 {
654 size_t len = CORD_len(x);
655 if (0 == len)
656 return 0;
657 return CORD_riter4(x, len - 1, f1, client_data);
658 }
659
660 /*
661 * The following functions are concerned with balancing cords.
662 * Strategy:
663 * Scan the cord from left to right, keeping the cord scanned so far
664 * as a forest of balanced trees of exponentially decreasing length.
665 * When a new subtree needs to be added to the forest, we concatenate all
666 * shorter ones to the new tree in the appropriate order, and then insert
667 * the result into the forest.
668 * Crucial invariants:
669 * 1. The concatenation of the forest (in decreasing order) with the
670 * unscanned part of the rope is equal to the rope being balanced;
671 * 2. All trees in the forest are balanced;
672 * 3. `forest[i]` has depth at most `i`.
673 */
674
675 typedef struct {
676 CORD c;
677 /* The actual length of `c`. */
678 size_t len;
679 } ForestElement;
680
681 static size_t min_len[CORD_MAX_DEPTH];
682
683 static int min_len_init = 0;
684
685 /*
686 * The string is the concatenation of `forest` in order of decreasing indices.
687 * `forest[i].len >= fib(i + 1)` is assumed.
688 */
689 typedef ForestElement Forest[CORD_MAX_DEPTH];
690
691 static void
692 CORD_init_min_len(void)
693 {
694 int i;
695 size_t last, previous;
696
697 min_len[0] = previous = 1;
698 min_len[1] = last = 2;
699 for (i = 2; i < CORD_MAX_DEPTH; i++) {
700 size_t current = last < (~(size_t)0) - previous ? last + previous
701 : last /* overflow */;
702
703 min_len[i] = current;
704 previous = last;
705 last = current;
706 }
707 min_len_init = 1;
708 }
709
710 static void
711 CORD_init_forest(ForestElement *forest, size_t max_len)
712 {
713 int i;
714
715 for (i = 0; i < CORD_MAX_DEPTH; i++) {
716 forest[i].c = 0;
717 if (min_len[i] > max_len)
718 return;
719 }
720 ABORT("Cord too long");
721 }
722
723 /*
724 * Add a leaf to the appropriate level in `forest`, cleaning out lower
725 * levels as necessary. Also works if `x` is a balanced tree of
726 * concatenations; however in this case an extra concatenation node
727 * may be inserted above `x`; this node should not be counted in the
728 * statement of the invariants.
729 */
730 static void
731 CORD_add_forest(ForestElement *forest, CORD x, size_t len)
732 {
733 int i = 0;
734 CORD sum = CORD_EMPTY;
735 size_t sum_len = 0;
736
737 while (len > min_len[i + 1]) {
738 if (forest[i].c != 0) {
739 sum = CORD_cat(forest[i].c, sum);
740 sum_len += forest[i].len;
741 forest[i].c = 0;
742 }
743 i++;
744 }
745 /*
746 * `sum` has depth at most 1 greater than what would be required for the
747 * balance.
748 */
749 sum = CORD_cat(sum, x);
750 sum_len += len;
751 /*
752 * If `x` was a leaf, then `sum` is now balanced. To see this, consider
753 * the two cases in which `forest[i - 1]` either is or is not empty.
754 */
755 while (sum_len >= min_len[i]) {
756 if (forest[i].c != 0) {
757 sum = CORD_cat(forest[i].c, sum);
758 sum_len += forest[i].len;
759 /*
760 * This is again balanced, since `sum` was balanced, and has
761 * allowable depth that differs from `i` by at most 1.
762 */
763 forest[i].c = 0;
764 }
765 i++;
766 }
767 i--;
768 forest[i].c = sum;
769 forest[i].len = sum_len;
770 }
771
772 static CORD
773 CORD_concat_forest(ForestElement *forest, size_t expected_len)
774 {
775 int i = 0;
776 CORD sum = 0;
777 size_t sum_len = 0;
778
779 while (sum_len != expected_len) {
780 if (forest[i].c != 0) {
781 sum = CORD_cat(forest[i].c, sum);
782 sum_len += forest[i].len;
783 }
784 i++;
785 }
786 return sum;
787 }
788
789 /*
790 * Insert the frontier of `x` into `forest`. Balanced subtrees are treated
791 * as leaves. This potentially adds one to the depth of the final tree.
792 */
793 static void
794 CORD_balance_insert(CORD x, size_t len, ForestElement *forest)
795 {
796 int depth;
797
798 if (CORD_IS_STRING(x)) {
799 CORD_add_forest(forest, x, len);
800 } else if (IS_CONCATENATION(x)
801 && ((depth = DEPTH(x)) >= CORD_MAX_DEPTH
802 || len < min_len[depth])) {
803 const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
804 size_t left_len = LEFT_LEN(x);
805
806 CORD_balance_insert(conc->left, left_len, forest);
807 CORD_balance_insert(conc->right, len - left_len, forest);
808 } else /* function or balanced */ {
809 CORD_add_forest(forest, x, len);
810 }
811 }
812
813 CORD
814 CORD_balance(CORD x)
815 {
816 Forest forest;
817 size_t len;
818
819 if (0 == x)
820 return 0;
821 if (CORD_IS_STRING(x))
822 return x;
823 if (!min_len_init)
824 CORD_init_min_len();
825 len = LEN(x);
826 CORD_init_forest(forest, len);
827 CORD_balance_insert(x, len, forest);
828 return CORD_concat_forest(forest, len);
829 }
830
831 /* The position primitives. */
832
833 /* Private routines to deal with the hard cases only: */
834
835 /*
836 * `p` contains a prefix of the path to `cur_pos`. Extend it to a full path
837 * and set up leaf info.
838 */
839 static void
840 CORD_extend_path(CORD_pos p)
841 {
842 struct CORD_pe *current_pe = &p[0].path[p[0].path_len];
843 CORD top = current_pe->pe_cord;
844 size_t pos = p[0].cur_pos;
845 size_t top_pos = current_pe->pe_start_pos;
846 size_t top_len = GEN_LEN(top);
847
848 /* Fill in the rest of the path. */
849 while (!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
850 const struct Concatenation *conc = &((const CordRep *)top)->data.concat;
851 size_t left_len;
852
853 left_len = LEFT_LEN(top);
854 current_pe++;
855 if (pos >= top_pos + left_len) {
856 current_pe->pe_cord = top = conc->right;
857 current_pe->pe_start_pos = top_pos = top_pos + left_len;
858 top_len -= left_len;
859 } else {
860 current_pe->pe_cord = top = conc->left;
861 current_pe->pe_start_pos = top_pos;
862 top_len = left_len;
863 }
864 p[0].path_len++;
865 }
866 /* Fill in leaf description for fast access. */
867 if (CORD_IS_STRING(top)) {
868 p[0].cur_leaf = top;
869 p[0].cur_start = top_pos;
870 p[0].cur_end = top_pos + top_len;
871 } else {
872 p[0].cur_end = 0;
873 }
874 if (pos >= top_pos + top_len)
875 p[0].path_len = CORD_POS_INVALID;
876 }
877
878 char
879 CORD__pos_fetch(CORD_pos p)
880 {
881 /* Leaf is a function node. */
882 const struct CORD_pe *pe;
883 CORD leaf;
884 const struct Function *f;
885
886 if (!CORD_pos_valid(p))
887 ABORT("CORD_pos_fetch: invalid argument");
888 pe = &p[0].path[p[0].path_len];
889 leaf = pe->pe_cord;
890 if (!IS_FUNCTION(leaf))
891 ABORT("CORD_pos_fetch: bad leaf");
892 f = &((const CordRep *)leaf)->data.function;
893 return f->fn(p[0].cur_pos - pe->pe_start_pos, f->client_data);
894 }
895
896 void
897 CORD__next(CORD_pos p)
898 {
899 size_t cur_pos = p[0].cur_pos + 1;
900 const struct CORD_pe *current_pe;
901 CORD leaf;
902
903 if (!CORD_pos_valid(p))
904 ABORT("CORD_next: invalid argument");
905 current_pe = &p[0].path[p[0].path_len];
906 leaf = current_pe->pe_cord;
907
908 /* Leaf is not a string or we are at end of leaf. */
909 p[0].cur_pos = cur_pos;
910 if (!CORD_IS_STRING(leaf)) {
911 /* Function leaf. */
912 const struct Function *f = &((const CordRep *)leaf)->data.function;
913 size_t start_pos = current_pe->pe_start_pos;
914 size_t end_pos = start_pos + (size_t)LEN(leaf);
915
916 if (cur_pos < end_pos) {
917 /* Fill cache and return. */
918 size_t i;
919 size_t limit = CORD_FUNCTION_BUF_SZ;
920 CORD_fn fn = f->fn;
921 void *client_data = f->client_data;
922
923 if (end_pos - cur_pos < CORD_FUNCTION_BUF_SZ) {
924 limit = end_pos - cur_pos;
925 }
926 for (i = 0; i < limit; i++) {
927 p[0].function_buf[i] = fn(i + cur_pos - start_pos, client_data);
928 }
929 p[0].cur_start = cur_pos;
930 p[0].cur_leaf = p[0].function_buf;
931 p[0].cur_end = cur_pos + limit;
932 return;
933 }
934 }
935 /*
936 * End of leaf. Pop the stack until we find two concatenation nodes with
937 * the same start position: this implies we were in left part.
938 */
939 {
940 while (p[0].path_len > 0
941 && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
942 p[0].path_len--;
943 current_pe--;
944 }
945 if (p[0].path_len == 0) {
946 p[0].path_len = CORD_POS_INVALID;
947 return;
948 }
949 }
950 p[0].path_len--;
951 CORD_extend_path(p);
952 }
953
954 void
955 CORD__prev(CORD_pos p)
956 {
957 const struct CORD_pe *pe = &p[0].path[p[0].path_len];
958
959 if (p[0].cur_pos == 0) {
960 p[0].path_len = CORD_POS_INVALID;
961 return;
962 }
963 p[0].cur_pos--;
964 if (p[0].cur_pos >= pe->pe_start_pos)
965 return;
966
967 /* Beginning of leaf. */
968
969 /*
970 * Pop the stack until we find two concatenation nodes with the
971 * different start position: this implies we were in right part.
972 */
973 {
974 const struct CORD_pe *current_pe = &p[0].path[p[0].path_len];
975
976 while (p[0].path_len > 0
977 && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
978 p[0].path_len--;
979 current_pe--;
980 }
981 }
982 p[0].path_len--;
983 CORD_extend_path(p);
984 }
985
986 #undef CORD_pos_fetch
987 #undef CORD_next
988 #undef CORD_prev
989 #undef CORD_pos_to_index
990 #undef CORD_pos_to_cord
991 #undef CORD_pos_valid
992
993 char
994 CORD_pos_fetch(CORD_pos p)
995 {
996 if (p[0].cur_end != 0) {
997 return p[0].cur_leaf[p[0].cur_pos - p[0].cur_start];
998 } else {
999 return CORD__pos_fetch(p);
1000 }
1001 }
1002
1003 void
1004 CORD_next(CORD_pos p)
1005 {
1006 if (p[0].cur_pos + 1 < p[0].cur_end) {
1007 p[0].cur_pos++;
1008 } else {
1009 CORD__next(p);
1010 }
1011 }
1012
1013 void
1014 CORD_prev(CORD_pos p)
1015 {
1016 if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
1017 p[0].cur_pos--;
1018 } else {
1019 CORD__prev(p);
1020 }
1021 }
1022
1023 size_t
1024 CORD_pos_to_index(CORD_pos p)
1025 {
1026 return p[0].cur_pos;
1027 }
1028
1029 CORD
1030 CORD_pos_to_cord(CORD_pos p)
1031 {
1032 return p[0].path[0].pe_cord;
1033 }
1034
1035 int
1036 CORD_pos_valid(CORD_pos p)
1037 {
1038 return p[0].path_len != CORD_POS_INVALID;
1039 }
1040
1041 void
1042 CORD_set_pos(CORD_pos p, CORD x, size_t i)
1043 {
1044 if (x == CORD_EMPTY) {
1045 p[0].path_len = CORD_POS_INVALID;
1046 return;
1047 }
1048 p[0].path[0].pe_cord = x;
1049 p[0].path[0].pe_start_pos = 0;
1050 p[0].path_len = 0;
1051 p[0].cur_pos = i;
1052 CORD_extend_path(p);
1053 }
1054