cordbscs.c raw

   1  /*
   2   * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
   3   *
   4   * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   5   * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   6   *
   7   * Permission is hereby granted to use or copy this program
   8   * for any purpose, provided the above notices are retained on all copies.
   9   * Permission to modify the code and to distribute modified code is granted,
  10   * provided the above notices are retained, and a notice that the code was
  11   * modified is included with the above copyright notice.
  12   */
  13  
  14  #ifdef HAVE_CONFIG_H
  15  #  include "config.h"
  16  #endif
  17  #ifndef CORD_BUILD
  18  #  define CORD_BUILD
  19  #endif
  20  
  21  #include "gc/gc.h"
  22  
  23  #include <stdlib.h>
  24  #include <string.h>
  25  
  26  #include "gc/cord.h"
  27  
  28  /*
  29   * An implementation of the `cord` primitives.  These are the only
  30   * functions that understand the representation.  We perform only
  31   * minimal checks on arguments to these functions.  Out of bounds
  32   * arguments to the iteration functions may result in client functions
  33   * invoked on garbage data.  In most cases, client functions should be
  34   * programmed defensively enough that this does not result in memory
  35   * smashes.
  36   */
  37  
  38  CORD_oom_fn_t CORD_oom_fn = 0;
  39  
  40  void
  41  CORD_set_oom_fn(CORD_oom_fn_t fn)
  42  {
  43    CORD_oom_fn = fn;
  44  }
  45  
  46  CORD_oom_fn_t
  47  CORD_get_oom_fn(void)
  48  {
  49    return CORD_oom_fn;
  50  }
  51  
  52  void
  53  CORD__call_oom_fn(void)
  54  {
  55    if (CORD_oom_fn != 0)
  56      (*CORD_oom_fn)();
  57  }
  58  
  59  #define OUT_OF_MEMORY       \
  60    {                         \
  61      CORD__call_oom_fn();    \
  62      ABORT("Out of memory"); \
  63    }
  64  
  65  #define ABORT(msg)                \
  66    {                               \
  67      fprintf(stderr, "%s\n", msg); \
  68      abort();                      \
  69    }
  70  
  71  /* A structure representing a concatenation of two strings.
  72   * It is assumed that both of them are not empty.
  73   */
  74  struct Concatenation {
  75    CORD left;
  76    CORD right;
  77  };
  78  
  79  struct Function {
  80    CORD_fn fn;
  81    void *client_data;
  82  };
  83  
  84  struct Generic {
  85    char nul;
  86    char header;
  87    /* Concatenation nesting depth; 0 for function. */
  88    char depth;
  89    /*
  90     * Length of left-concatenated child if it is sufficiently short;
  91     * 0 otherwise.
  92     */
  93    unsigned char left_len;
  94    unsigned long len;
  95  };
  96  
  97  union ConcatOrFunc {
  98    struct Concatenation concat;
  99    struct Function function;
 100  };
 101  
 102  typedef struct {
 103    struct Generic generic;
 104    union ConcatOrFunc data;
 105  } CordRep;
 106  
 107  #define CONCAT_HDR 1
 108  
 109  #define FN_HDR 4
 110  
 111  /*
 112   * Substring nodes are a special case of function nodes.
 113   * The `client_data` field is known to point to a `substr_args`
 114   * structure, and the function is either `CORD_apply_access_fn`
 115   * or `CORD_index_access_fn`.
 116   */
 117  #define SUBSTR_HDR 6
 118  
 119  /* The following may be applied only to function and concatenation nodes: */
 120  #define IS_CONCATENATION(s) \
 121    (((const CordRep *)s)->generic.header == CONCAT_HDR)
 122  
 123  #define IS_FUNCTION(s) ((((const CordRep *)s)->generic.header & FN_HDR) != 0)
 124  
 125  #define IS_SUBSTR(s) (((const CordRep *)s)->generic.header == SUBSTR_HDR)
 126  
 127  #define LEN(s) (((const CordRep *)s)->generic.len)
 128  #define DEPTH(s) (((const CordRep *)s)->generic.depth)
 129  #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
 130  
 131  #define MAX_LEFT_LEN 255
 132  
 133  #define LEFT_LEN(s)                                                    \
 134    (((const CordRep *)s)->generic.left_len != 0                         \
 135         ? ((const CordRep *)s)->generic.left_len                        \
 136         : (CORD_IS_STRING(((const CordRep *)s)->data.concat.left)       \
 137                ? ((const CordRep *)s)->generic.len                      \
 138                      - GEN_LEN(((const CordRep *)s)->data.concat.right) \
 139                : LEN(((const CordRep *)s)->data.concat.left)))
 140  
 141  /* Cords shorter than this are C strings. */
 142  #define SHORT_LIMIT (sizeof(CordRep) - 1)
 143  
 144  /*
 145   * Dump the internal representation of `x` to `stdout`, with initial
 146   * indentation level `n`.
 147   */
 148  static void
 149  CORD_dump_inner(CORD x, unsigned n)
 150  {
 151    size_t i;
 152  
 153    for (i = 0; i < (size_t)n; i++) {
 154      fputs("  ", stdout);
 155    }
 156    if (x == 0) {
 157      fputs("NIL\n", stdout);
 158    } else if (CORD_IS_STRING(x)) {
 159      for (i = 0; i <= SHORT_LIMIT; i++) {
 160        if (x[i] == '\0')
 161          break;
 162        putchar(x[i]);
 163      }
 164      if (x[i] != '\0')
 165        fputs("...", stdout);
 166      putchar('\n');
 167    } else if (IS_CONCATENATION(x)) {
 168      const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
 169  
 170      printf("Concatenation: %p (len: %d, depth: %d)\n", (const void *)x,
 171             (int)LEN(x), (int)DEPTH(x));
 172      CORD_dump_inner(conc->left, n + 1);
 173      CORD_dump_inner(conc->right, n + 1);
 174    } else /* function */ {
 175      const struct Function *f = &((const CordRep *)x)->data.function;
 176      size_t lim = (size_t)LEN(x);
 177  
 178      if (IS_SUBSTR(x))
 179        printf("(Substring) ");
 180      printf("Function: %p (len: %d): ", (const void *)x, (int)lim);
 181      for (i = 0; i < 20 && i < lim; i++) {
 182        putchar(f->fn(i, f->client_data));
 183      }
 184      if (i < lim)
 185        fputs("...", stdout);
 186      putchar('\n');
 187    }
 188  }
 189  
 190  void
 191  CORD_dump(CORD x)
 192  {
 193    CORD_dump_inner(x, 0);
 194    fflush(stdout);
 195  }
 196  
 197  CORD
 198  CORD_cat_char_star(CORD x, const char *y, size_t leny)
 199  {
 200    size_t result_len;
 201    size_t lenx;
 202    int depth;
 203  
 204    if (x == CORD_EMPTY)
 205      return y;
 206    if (leny == 0)
 207      return x;
 208    if (CORD_IS_STRING(x)) {
 209      lenx = strlen(x);
 210      result_len = lenx + leny;
 211      if (result_len <= SHORT_LIMIT) {
 212        char *result = (char *)GC_MALLOC_ATOMIC(result_len + 1);
 213  
 214        if (NULL == result)
 215          OUT_OF_MEMORY;
 216  #ifdef LINT2
 217        memcpy(result, x, lenx + 1);
 218  #else
 219        /*
 220         * No need to copy the terminating zero as `result[lenx]` is
 221         * written below.
 222         */
 223        memcpy(result, x, lenx);
 224  #endif
 225        memcpy(result + lenx, y, leny);
 226        result[result_len] = '\0';
 227        return (CORD)result;
 228      } else {
 229        depth = 1;
 230      }
 231    } else {
 232      CORD right;
 233      CORD left;
 234      char *new_right;
 235  
 236      lenx = LEN(x);
 237  
 238      if (leny <= SHORT_LIMIT / 2 && IS_CONCATENATION(x)
 239          && CORD_IS_STRING(right = ((const CordRep *)x)->data.concat.right)) {
 240        size_t right_len;
 241  
 242        /* Merge `y` into right part of `x`. */
 243        left = ((const CordRep *)x)->data.concat.left;
 244        if (!CORD_IS_STRING(left)) {
 245          right_len = lenx - LEN(left);
 246        } else if (((const CordRep *)x)->generic.left_len != 0) {
 247          right_len = lenx - ((const CordRep *)x)->generic.left_len;
 248        } else {
 249          right_len = strlen(right);
 250        }
 251        result_len = right_len + leny; /*< length of `new_right` */
 252        if (result_len <= SHORT_LIMIT) {
 253          new_right = (char *)GC_MALLOC_ATOMIC(result_len + 1);
 254          if (new_right == 0)
 255            OUT_OF_MEMORY;
 256          memcpy(new_right, right, right_len);
 257          memcpy(new_right + right_len, y, leny);
 258          new_right[result_len] = '\0';
 259          y = new_right;
 260          leny = result_len;
 261          x = left;
 262          lenx -= right_len;
 263          /* Now fall through to concatenate the two pieces: */
 264        }
 265        if (CORD_IS_STRING(x)) {
 266          depth = 1;
 267        } else {
 268          depth = DEPTH(x) + 1;
 269        }
 270      } else {
 271        depth = DEPTH(x) + 1;
 272      }
 273      result_len = lenx + leny;
 274    }
 275    {
 276      /* The general case; `lenx` and `result_len` are known. */
 277      CordRep *result = GC_NEW(CordRep);
 278  
 279      if (NULL == result)
 280        OUT_OF_MEMORY;
 281      result->generic.header = CONCAT_HDR;
 282      result->generic.depth = (char)depth;
 283      if (lenx <= MAX_LEFT_LEN)
 284        result->generic.left_len = (unsigned char)lenx;
 285      result->generic.len = (unsigned long)result_len;
 286      result->data.concat.left = x;
 287      GC_PTR_STORE_AND_DIRTY((/* no const */ void *)&result->data.concat.right,
 288                             y);
 289      GC_reachable_here(x);
 290      if (depth >= CORD_MAX_DEPTH) {
 291        return CORD_balance((CORD)result);
 292      } else {
 293        return (CORD)result;
 294      }
 295    }
 296  }
 297  
 298  CORD
 299  CORD_cat(CORD x, CORD y)
 300  {
 301    size_t result_len;
 302    int depth;
 303    size_t lenx;
 304  
 305    if (x == CORD_EMPTY)
 306      return y;
 307    if (y == CORD_EMPTY)
 308      return x;
 309    if (CORD_IS_STRING(y)) {
 310      return CORD_cat_char_star(x, y, strlen(y));
 311    } else if (CORD_IS_STRING(x)) {
 312      lenx = strlen(x);
 313      depth = DEPTH(y) + 1;
 314    } else {
 315      int depthy = DEPTH(y);
 316  
 317      lenx = LEN(x);
 318      depth = DEPTH(x) + 1;
 319      if (depthy >= depth)
 320        depth = depthy + 1;
 321    }
 322    result_len = lenx + LEN(y);
 323    {
 324      CordRep *result = GC_NEW(CordRep);
 325  
 326      if (NULL == result)
 327        OUT_OF_MEMORY;
 328      result->generic.header = CONCAT_HDR;
 329      result->generic.depth = (char)depth;
 330      if (lenx <= MAX_LEFT_LEN)
 331        result->generic.left_len = (unsigned char)lenx;
 332      result->generic.len = (unsigned long)result_len;
 333      result->data.concat.left = x;
 334      GC_PTR_STORE_AND_DIRTY((/* no const */ void *)&result->data.concat.right,
 335                             y);
 336      GC_reachable_here(x);
 337      if (depth >= CORD_MAX_DEPTH) {
 338        return CORD_balance((CORD)result);
 339      } else {
 340        return (CORD)result;
 341      }
 342    }
 343  }
 344  
 345  static CordRep *
 346  CORD_from_fn_inner(CORD_fn fn, void *client_data, size_t len)
 347  {
 348    if (0 == len)
 349      return NULL;
 350    if (len <= SHORT_LIMIT) {
 351      char *result;
 352      size_t i;
 353      char buf[SHORT_LIMIT + 1];
 354  
 355      for (i = 0; i < len; i++) {
 356        char c = fn(i, client_data);
 357  
 358        if (c == '\0')
 359          goto gen_case;
 360        buf[i] = c;
 361      }
 362  
 363      result = (char *)GC_MALLOC_ATOMIC(len + 1);
 364      if (NULL == result)
 365        OUT_OF_MEMORY;
 366      memcpy(result, buf, len);
 367      result[len] = '\0';
 368      return (CordRep *)result;
 369    }
 370  gen_case:
 371    {
 372      CordRep *result = GC_NEW(CordRep);
 373  
 374      if (NULL == result)
 375        OUT_OF_MEMORY;
 376      result->generic.header = FN_HDR;
 377      /* The depth is already zero. */
 378      result->generic.len = (unsigned long)len;
 379      result->data.function.fn = fn;
 380      GC_PTR_STORE_AND_DIRTY(&result->data.function.client_data, client_data);
 381      return result;
 382    }
 383  }
 384  
 385  CORD
 386  CORD_from_fn(CORD_fn fn, void *client_data, size_t len)
 387  {
 388    return (/* const */ CORD)CORD_from_fn_inner(fn, client_data, len);
 389  }
 390  
 391  size_t
 392  CORD_len(CORD x)
 393  {
 394    return x == 0 ? 0 : GEN_LEN(x);
 395  }
 396  
 397  struct substr_args {
 398    CordRep *sa_cord;
 399    size_t sa_index;
 400  };
 401  
 402  static char
 403  CORD_index_access_fn(size_t i, void *client_data)
 404  {
 405    struct substr_args *descr = (struct substr_args *)client_data;
 406  
 407    return ((char *)descr->sa_cord)[i + descr->sa_index];
 408  }
 409  
 410  static char
 411  CORD_apply_access_fn(size_t i, void *client_data)
 412  {
 413    struct substr_args *descr = (struct substr_args *)client_data;
 414    struct Function *fn_cord = &descr->sa_cord->data.function;
 415  
 416    return fn_cord->fn(i + descr->sa_index, fn_cord->client_data);
 417  }
 418  
 419  /*
 420   * A variant of `CORD_substr` that simply returns a function node,
 421   * thus postponing its work.  `f` argument is a function that may
 422   * be used for efficient access to the `i`-th character.
 423   * Assumes `i >= 0` and `i + n < CORD_len(x)`.
 424   */
 425  static CORD
 426  CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
 427  {
 428    struct substr_args *sa = GC_NEW(struct substr_args);
 429    CordRep *result;
 430  
 431    if (sa == 0)
 432      OUT_OF_MEMORY;
 433    sa->sa_index = i;
 434    GC_PTR_STORE_AND_DIRTY(&sa->sa_cord, x);
 435    result = CORD_from_fn_inner(f, sa, n);
 436    if ((CORD)result != CORD_EMPTY && 0 == result->generic.nul)
 437      result->generic.header = SUBSTR_HDR;
 438    return (CORD)result;
 439  }
 440  
 441  /*
 442   * Substrings of function nodes and flat strings shorter than
 443   * this are flat strings.  Otherwise we use a functional
 444   * representation, which is significantly slower to access.
 445   */
 446  #define SUBSTR_LIMIT (10 * SHORT_LIMIT)
 447  
 448  /*
 449   * A variant of `CORD_substr` that assumes `i >= 0`, `n > 0` and
 450   * `i + n < CORD_len(x)`.
 451   */
 452  static CORD
 453  CORD_substr_checked(CORD x, size_t i, size_t n)
 454  {
 455    if (CORD_IS_STRING(x)) {
 456      if (n > SUBSTR_LIMIT) {
 457        return CORD_substr_closure(x, i, n, CORD_index_access_fn);
 458      } else {
 459        char *result = (char *)GC_MALLOC_ATOMIC(n + 1);
 460  
 461        if (NULL == result)
 462          OUT_OF_MEMORY;
 463        strncpy(result, x + i, n);
 464        result[n] = '\0';
 465        return result;
 466      }
 467    } else if (IS_CONCATENATION(x)) {
 468      const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
 469      size_t left_len = LEFT_LEN(x);
 470      size_t right_len = (size_t)LEN(x) - left_len;
 471  
 472      if (i >= left_len) {
 473        if (n == right_len)
 474          return conc->right;
 475        return CORD_substr_checked(conc->right, i - left_len, n);
 476      } else if (i + n <= left_len) {
 477        if (n == left_len)
 478          return conc->left;
 479        return CORD_substr_checked(conc->left, i, n);
 480      } else {
 481        /* Need at least one character from each side. */
 482        CORD left_part;
 483        CORD right_part;
 484        size_t left_part_len = left_len - i;
 485  
 486        if (i == 0) {
 487          left_part = conc->left;
 488        } else {
 489          left_part = CORD_substr_checked(conc->left, i, left_part_len);
 490        }
 491        if (i + n == right_len + left_len) {
 492          right_part = conc->right;
 493        } else {
 494          right_part = CORD_substr_checked(conc->right, 0, n - left_part_len);
 495        }
 496        return CORD_cat(left_part, right_part);
 497      }
 498    } else /* function */ {
 499      if (n > SUBSTR_LIMIT) {
 500        if (IS_SUBSTR(x)) {
 501          /* Avoid nesting substring nodes. */
 502          const struct Function *f = &((const CordRep *)x)->data.function;
 503          const struct substr_args *descr = (struct substr_args *)f->client_data;
 504  
 505          return CORD_substr_closure((CORD)descr->sa_cord, i + descr->sa_index,
 506                                     n, f->fn);
 507        } else {
 508          return CORD_substr_closure(x, i, n, CORD_apply_access_fn);
 509        }
 510      } else {
 511        char *result;
 512        const struct Function *f = &((const CordRep *)x)->data.function;
 513        char buf[SUBSTR_LIMIT + 1];
 514        char *p = buf;
 515        size_t j;
 516        size_t lim = i + n;
 517  
 518        for (j = i; j < lim; j++) {
 519          char c = f->fn(j, f->client_data);
 520  
 521          if (c == '\0') {
 522            return CORD_substr_closure(x, i, n, CORD_apply_access_fn);
 523          }
 524          *p++ = c;
 525        }
 526        result = (char *)GC_MALLOC_ATOMIC(n + 1);
 527        if (NULL == result)
 528          OUT_OF_MEMORY;
 529        memcpy(result, buf, n);
 530        result[n] = '\0';
 531        return result;
 532      }
 533    }
 534  }
 535  
 536  CORD
 537  CORD_substr(CORD x, size_t i, size_t n)
 538  {
 539    size_t len = CORD_len(x);
 540  
 541    if (i >= len || 0 == n)
 542      return 0;
 543    if (i + n > len)
 544      n = len - i;
 545    return CORD_substr_checked(x, i, n);
 546  }
 547  
 548  int
 549  CORD_iter5(CORD x, size_t i, CORD_iter_fn f1, CORD_batched_iter_fn f2,
 550             void *client_data)
 551  {
 552    if (0 == x)
 553      return 0;
 554    if (CORD_IS_STRING(x)) {
 555      const char *p = x + i;
 556  
 557      if (*p == '\0')
 558        ABORT("2nd arg to CORD_iter5 too big");
 559      if (f2 != CORD_NO_FN) {
 560        return f2(p, client_data);
 561      } else {
 562        while (*p) {
 563          if (f1(*p, client_data))
 564            return 1;
 565          p++;
 566        }
 567        return 0;
 568      }
 569    } else if (IS_CONCATENATION(x)) {
 570      const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
 571  
 572      if (i > 0) {
 573        size_t left_len = LEFT_LEN(x);
 574  
 575        if (i >= left_len) {
 576          return CORD_iter5(conc->right, i - left_len, f1, f2, client_data);
 577        }
 578      }
 579      if (CORD_iter5(conc->left, i, f1, f2, client_data)) {
 580        return 1;
 581      }
 582      return CORD_iter5(conc->right, 0, f1, f2, client_data);
 583    } else /* function */ {
 584      const struct Function *f = &((const CordRep *)x)->data.function;
 585      size_t j;
 586      size_t lim = (size_t)LEN(x);
 587  
 588      for (j = i; j < lim; j++) {
 589        if (f1(f->fn(j, f->client_data), client_data)) {
 590          return 1;
 591        }
 592      }
 593      return 0;
 594    }
 595  }
 596  
 597  #undef CORD_iter
 598  int
 599  CORD_iter(CORD x, CORD_iter_fn f1, void *client_data)
 600  {
 601    return CORD_iter5(x, 0, f1, CORD_NO_FN, client_data);
 602  }
 603  
 604  int
 605  CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void *client_data)
 606  {
 607    if (0 == x)
 608      return 0;
 609    if (CORD_IS_STRING(x)) {
 610      const char *p = x + i;
 611  
 612      for (;;) {
 613        char c = *p;
 614  
 615        if (c == '\0')
 616          ABORT("2nd arg to CORD_riter4 too big");
 617        if (f1(c, client_data))
 618          return 1;
 619        if (p == x)
 620          break;
 621        p--;
 622      }
 623    } else if (IS_CONCATENATION(x)) {
 624      const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
 625      CORD left_part = conc->left;
 626      size_t left_len = LEFT_LEN(x);
 627  
 628      if (i >= left_len) {
 629        if (CORD_riter4(conc->right, i - left_len, f1, client_data)) {
 630          return 1;
 631        }
 632        return CORD_riter4(left_part, left_len - 1, f1, client_data);
 633      } else {
 634        return CORD_riter4(left_part, i, f1, client_data);
 635      }
 636    } else /* function */ {
 637      const struct Function *f = &((const CordRep *)x)->data.function;
 638      size_t j;
 639  
 640      for (j = i;; j--) {
 641        if (f1(f->fn(j, f->client_data), client_data)) {
 642          return 1;
 643        }
 644        if (0 == j)
 645          break;
 646      }
 647    }
 648    return 0;
 649  }
 650  
 651  int
 652  CORD_riter(CORD x, CORD_iter_fn f1, void *client_data)
 653  {
 654    size_t len = CORD_len(x);
 655    if (0 == len)
 656      return 0;
 657    return CORD_riter4(x, len - 1, f1, client_data);
 658  }
 659  
 660  /*
 661   * The following functions are concerned with balancing cords.
 662   * Strategy:
 663   * Scan the cord from left to right, keeping the cord scanned so far
 664   * as a forest of balanced trees of exponentially decreasing length.
 665   * When a new subtree needs to be added to the forest, we concatenate all
 666   * shorter ones to the new tree in the appropriate order, and then insert
 667   * the result into the forest.
 668   * Crucial invariants:
 669   *   1. The concatenation of the forest (in decreasing order) with the
 670   *      unscanned part of the rope is equal to the rope being balanced;
 671   *   2. All trees in the forest are balanced;
 672   *   3. `forest[i]` has depth at most `i`.
 673   */
 674  
 675  typedef struct {
 676    CORD c;
 677    /* The actual length of `c`. */
 678    size_t len;
 679  } ForestElement;
 680  
 681  static size_t min_len[CORD_MAX_DEPTH];
 682  
 683  static int min_len_init = 0;
 684  
 685  /*
 686   * The string is the concatenation of `forest` in order of decreasing indices.
 687   * `forest[i].len >= fib(i + 1)` is assumed.
 688   */
 689  typedef ForestElement Forest[CORD_MAX_DEPTH];
 690  
 691  static void
 692  CORD_init_min_len(void)
 693  {
 694    int i;
 695    size_t last, previous;
 696  
 697    min_len[0] = previous = 1;
 698    min_len[1] = last = 2;
 699    for (i = 2; i < CORD_MAX_DEPTH; i++) {
 700      size_t current = last < (~(size_t)0) - previous ? last + previous
 701                                                      : last /* overflow */;
 702  
 703      min_len[i] = current;
 704      previous = last;
 705      last = current;
 706    }
 707    min_len_init = 1;
 708  }
 709  
 710  static void
 711  CORD_init_forest(ForestElement *forest, size_t max_len)
 712  {
 713    int i;
 714  
 715    for (i = 0; i < CORD_MAX_DEPTH; i++) {
 716      forest[i].c = 0;
 717      if (min_len[i] > max_len)
 718        return;
 719    }
 720    ABORT("Cord too long");
 721  }
 722  
 723  /*
 724   * Add a leaf to the appropriate level in `forest`, cleaning out lower
 725   * levels as necessary.  Also works if `x` is a balanced tree of
 726   * concatenations; however in this case an extra concatenation node
 727   * may be inserted above `x`; this node should not be counted in the
 728   * statement of the invariants.
 729   */
 730  static void
 731  CORD_add_forest(ForestElement *forest, CORD x, size_t len)
 732  {
 733    int i = 0;
 734    CORD sum = CORD_EMPTY;
 735    size_t sum_len = 0;
 736  
 737    while (len > min_len[i + 1]) {
 738      if (forest[i].c != 0) {
 739        sum = CORD_cat(forest[i].c, sum);
 740        sum_len += forest[i].len;
 741        forest[i].c = 0;
 742      }
 743      i++;
 744    }
 745    /*
 746     * `sum` has depth at most 1 greater than what would be required for the
 747     * balance.
 748     */
 749    sum = CORD_cat(sum, x);
 750    sum_len += len;
 751    /*
 752     * If `x` was a leaf, then `sum` is now balanced.  To see this, consider
 753     * the two cases in which `forest[i - 1]` either is or is not empty.
 754     */
 755    while (sum_len >= min_len[i]) {
 756      if (forest[i].c != 0) {
 757        sum = CORD_cat(forest[i].c, sum);
 758        sum_len += forest[i].len;
 759        /*
 760         * This is again balanced, since `sum` was balanced, and has
 761         * allowable depth that differs from `i` by at most 1.
 762         */
 763        forest[i].c = 0;
 764      }
 765      i++;
 766    }
 767    i--;
 768    forest[i].c = sum;
 769    forest[i].len = sum_len;
 770  }
 771  
 772  static CORD
 773  CORD_concat_forest(ForestElement *forest, size_t expected_len)
 774  {
 775    int i = 0;
 776    CORD sum = 0;
 777    size_t sum_len = 0;
 778  
 779    while (sum_len != expected_len) {
 780      if (forest[i].c != 0) {
 781        sum = CORD_cat(forest[i].c, sum);
 782        sum_len += forest[i].len;
 783      }
 784      i++;
 785    }
 786    return sum;
 787  }
 788  
 789  /*
 790   * Insert the frontier of `x` into `forest`.  Balanced subtrees are treated
 791   * as leaves.  This potentially adds one to the depth of the final tree.
 792   */
 793  static void
 794  CORD_balance_insert(CORD x, size_t len, ForestElement *forest)
 795  {
 796    int depth;
 797  
 798    if (CORD_IS_STRING(x)) {
 799      CORD_add_forest(forest, x, len);
 800    } else if (IS_CONCATENATION(x)
 801               && ((depth = DEPTH(x)) >= CORD_MAX_DEPTH
 802                   || len < min_len[depth])) {
 803      const struct Concatenation *conc = &((const CordRep *)x)->data.concat;
 804      size_t left_len = LEFT_LEN(x);
 805  
 806      CORD_balance_insert(conc->left, left_len, forest);
 807      CORD_balance_insert(conc->right, len - left_len, forest);
 808    } else /* function or balanced */ {
 809      CORD_add_forest(forest, x, len);
 810    }
 811  }
 812  
 813  CORD
 814  CORD_balance(CORD x)
 815  {
 816    Forest forest;
 817    size_t len;
 818  
 819    if (0 == x)
 820      return 0;
 821    if (CORD_IS_STRING(x))
 822      return x;
 823    if (!min_len_init)
 824      CORD_init_min_len();
 825    len = LEN(x);
 826    CORD_init_forest(forest, len);
 827    CORD_balance_insert(x, len, forest);
 828    return CORD_concat_forest(forest, len);
 829  }
 830  
 831  /* The position primitives. */
 832  
 833  /* Private routines to deal with the hard cases only: */
 834  
 835  /*
 836   * `p` contains a prefix of the path to `cur_pos`. Extend it to a full path
 837   * and set up leaf info.
 838   */
 839  static void
 840  CORD_extend_path(CORD_pos p)
 841  {
 842    struct CORD_pe *current_pe = &p[0].path[p[0].path_len];
 843    CORD top = current_pe->pe_cord;
 844    size_t pos = p[0].cur_pos;
 845    size_t top_pos = current_pe->pe_start_pos;
 846    size_t top_len = GEN_LEN(top);
 847  
 848    /* Fill in the rest of the path. */
 849    while (!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
 850      const struct Concatenation *conc = &((const CordRep *)top)->data.concat;
 851      size_t left_len;
 852  
 853      left_len = LEFT_LEN(top);
 854      current_pe++;
 855      if (pos >= top_pos + left_len) {
 856        current_pe->pe_cord = top = conc->right;
 857        current_pe->pe_start_pos = top_pos = top_pos + left_len;
 858        top_len -= left_len;
 859      } else {
 860        current_pe->pe_cord = top = conc->left;
 861        current_pe->pe_start_pos = top_pos;
 862        top_len = left_len;
 863      }
 864      p[0].path_len++;
 865    }
 866    /* Fill in leaf description for fast access. */
 867    if (CORD_IS_STRING(top)) {
 868      p[0].cur_leaf = top;
 869      p[0].cur_start = top_pos;
 870      p[0].cur_end = top_pos + top_len;
 871    } else {
 872      p[0].cur_end = 0;
 873    }
 874    if (pos >= top_pos + top_len)
 875      p[0].path_len = CORD_POS_INVALID;
 876  }
 877  
 878  char
 879  CORD__pos_fetch(CORD_pos p)
 880  {
 881    /* Leaf is a function node. */
 882    const struct CORD_pe *pe;
 883    CORD leaf;
 884    const struct Function *f;
 885  
 886    if (!CORD_pos_valid(p))
 887      ABORT("CORD_pos_fetch: invalid argument");
 888    pe = &p[0].path[p[0].path_len];
 889    leaf = pe->pe_cord;
 890    if (!IS_FUNCTION(leaf))
 891      ABORT("CORD_pos_fetch: bad leaf");
 892    f = &((const CordRep *)leaf)->data.function;
 893    return f->fn(p[0].cur_pos - pe->pe_start_pos, f->client_data);
 894  }
 895  
 896  void
 897  CORD__next(CORD_pos p)
 898  {
 899    size_t cur_pos = p[0].cur_pos + 1;
 900    const struct CORD_pe *current_pe;
 901    CORD leaf;
 902  
 903    if (!CORD_pos_valid(p))
 904      ABORT("CORD_next: invalid argument");
 905    current_pe = &p[0].path[p[0].path_len];
 906    leaf = current_pe->pe_cord;
 907  
 908    /* Leaf is not a string or we are at end of leaf. */
 909    p[0].cur_pos = cur_pos;
 910    if (!CORD_IS_STRING(leaf)) {
 911      /* Function leaf. */
 912      const struct Function *f = &((const CordRep *)leaf)->data.function;
 913      size_t start_pos = current_pe->pe_start_pos;
 914      size_t end_pos = start_pos + (size_t)LEN(leaf);
 915  
 916      if (cur_pos < end_pos) {
 917        /* Fill cache and return. */
 918        size_t i;
 919        size_t limit = CORD_FUNCTION_BUF_SZ;
 920        CORD_fn fn = f->fn;
 921        void *client_data = f->client_data;
 922  
 923        if (end_pos - cur_pos < CORD_FUNCTION_BUF_SZ) {
 924          limit = end_pos - cur_pos;
 925        }
 926        for (i = 0; i < limit; i++) {
 927          p[0].function_buf[i] = fn(i + cur_pos - start_pos, client_data);
 928        }
 929        p[0].cur_start = cur_pos;
 930        p[0].cur_leaf = p[0].function_buf;
 931        p[0].cur_end = cur_pos + limit;
 932        return;
 933      }
 934    }
 935    /*
 936     * End of leaf.  Pop the stack until we find two concatenation nodes with
 937     * the same start position: this implies we were in left part.
 938     */
 939    {
 940      while (p[0].path_len > 0
 941             && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
 942        p[0].path_len--;
 943        current_pe--;
 944      }
 945      if (p[0].path_len == 0) {
 946        p[0].path_len = CORD_POS_INVALID;
 947        return;
 948      }
 949    }
 950    p[0].path_len--;
 951    CORD_extend_path(p);
 952  }
 953  
 954  void
 955  CORD__prev(CORD_pos p)
 956  {
 957    const struct CORD_pe *pe = &p[0].path[p[0].path_len];
 958  
 959    if (p[0].cur_pos == 0) {
 960      p[0].path_len = CORD_POS_INVALID;
 961      return;
 962    }
 963    p[0].cur_pos--;
 964    if (p[0].cur_pos >= pe->pe_start_pos)
 965      return;
 966  
 967    /* Beginning of leaf. */
 968  
 969    /*
 970     * Pop the stack until we find two concatenation nodes with the
 971     * different start position: this implies we were in right part.
 972     */
 973    {
 974      const struct CORD_pe *current_pe = &p[0].path[p[0].path_len];
 975  
 976      while (p[0].path_len > 0
 977             && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
 978        p[0].path_len--;
 979        current_pe--;
 980      }
 981    }
 982    p[0].path_len--;
 983    CORD_extend_path(p);
 984  }
 985  
 986  #undef CORD_pos_fetch
 987  #undef CORD_next
 988  #undef CORD_prev
 989  #undef CORD_pos_to_index
 990  #undef CORD_pos_to_cord
 991  #undef CORD_pos_valid
 992  
 993  char
 994  CORD_pos_fetch(CORD_pos p)
 995  {
 996    if (p[0].cur_end != 0) {
 997      return p[0].cur_leaf[p[0].cur_pos - p[0].cur_start];
 998    } else {
 999      return CORD__pos_fetch(p);
1000    }
1001  }
1002  
1003  void
1004  CORD_next(CORD_pos p)
1005  {
1006    if (p[0].cur_pos + 1 < p[0].cur_end) {
1007      p[0].cur_pos++;
1008    } else {
1009      CORD__next(p);
1010    }
1011  }
1012  
1013  void
1014  CORD_prev(CORD_pos p)
1015  {
1016    if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
1017      p[0].cur_pos--;
1018    } else {
1019      CORD__prev(p);
1020    }
1021  }
1022  
1023  size_t
1024  CORD_pos_to_index(CORD_pos p)
1025  {
1026    return p[0].cur_pos;
1027  }
1028  
1029  CORD
1030  CORD_pos_to_cord(CORD_pos p)
1031  {
1032    return p[0].path[0].pe_cord;
1033  }
1034  
1035  int
1036  CORD_pos_valid(CORD_pos p)
1037  {
1038    return p[0].path_len != CORD_POS_INVALID;
1039  }
1040  
1041  void
1042  CORD_set_pos(CORD_pos p, CORD x, size_t i)
1043  {
1044    if (x == CORD_EMPTY) {
1045      p[0].path_len = CORD_POS_INVALID;
1046      return;
1047    }
1048    p[0].path[0].pe_cord = x;
1049    p[0].path[0].pe_start_pos = 0;
1050    p[0].path_len = 0;
1051    p[0].cur_pos = i;
1052    CORD_extend_path(p);
1053  }
1054