qsort.c raw

   1  /* Copyright (C) 2011 by Valentin Ochs
   2   *
   3   * Permission is hereby granted, free of charge, to any person obtaining a copy
   4   * of this software and associated documentation files (the "Software"), to
   5   * deal in the Software without restriction, including without limitation the
   6   * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
   7   * sell copies of the Software, and to permit persons to whom the Software is
   8   * furnished to do so, subject to the following conditions:
   9   *
  10   * The above copyright notice and this permission notice shall be included in
  11   * all copies or substantial portions of the Software.
  12   *
  13   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19   * IN THE SOFTWARE.
  20   */
  21  
  22  /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
  23  
  24  /* Smoothsort, an adaptive variant of Heapsort.  Memory usage: O(1).
  25     Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
  26  
  27  #define _BSD_SOURCE
  28  #include <stdint.h>
  29  #include <stdlib.h>
  30  #include <string.h>
  31  
  32  #include "atomic.h"
  33  #define ntz(x) a_ctz_l((x))
  34  
  35  typedef int (*cmpfun)(const void *, const void *, void *);
  36  
  37  static inline int pntz(size_t p[2]) {
  38  	int r = ntz(p[0] - 1);
  39  	if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
  40  		return r;
  41  	}
  42  	return 0;
  43  }
  44  
  45  static void cycle(size_t width, unsigned char* ar[], int n)
  46  {
  47  	unsigned char tmp[256];
  48  	size_t l;
  49  	int i;
  50  
  51  	if(n < 2) {
  52  		return;
  53  	}
  54  
  55  	ar[n] = tmp;
  56  	while(width) {
  57  		l = sizeof(tmp) < width ? sizeof(tmp) : width;
  58  		memcpy(ar[n], ar[0], l);
  59  		for(i = 0; i < n; i++) {
  60  			memcpy(ar[i], ar[i + 1], l);
  61  			ar[i] += l;
  62  		}
  63  		width -= l;
  64  	}
  65  }
  66  
  67  /* shl() and shr() need n > 0 */
  68  static inline void shl(size_t p[2], int n)
  69  {
  70  	if(n >= 8 * sizeof(size_t)) {
  71  		n -= 8 * sizeof(size_t);
  72  		p[1] = p[0];
  73  		p[0] = 0;
  74  	}
  75  	p[1] <<= n;
  76  	p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
  77  	p[0] <<= n;
  78  }
  79  
  80  static inline void shr(size_t p[2], int n)
  81  {
  82  	if(n >= 8 * sizeof(size_t)) {
  83  		n -= 8 * sizeof(size_t);
  84  		p[0] = p[1];
  85  		p[1] = 0;
  86  	}
  87  	p[0] >>= n;
  88  	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
  89  	p[1] >>= n;
  90  }
  91  
  92  static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[])
  93  {
  94  	unsigned char *rt, *lf;
  95  	unsigned char *ar[14 * sizeof(size_t) + 1];
  96  	int i = 1;
  97  
  98  	ar[0] = head;
  99  	while(pshift > 1) {
 100  		rt = head - width;
 101  		lf = head - width - lp[pshift - 2];
 102  
 103  		if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) {
 104  			break;
 105  		}
 106  		if(cmp(lf, rt, arg) >= 0) {
 107  			ar[i++] = lf;
 108  			head = lf;
 109  			pshift -= 1;
 110  		} else {
 111  			ar[i++] = rt;
 112  			head = rt;
 113  			pshift -= 2;
 114  		}
 115  	}
 116  	cycle(width, ar, i);
 117  }
 118  
 119  static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
 120  {
 121  	unsigned char *stepson,
 122  	              *rt, *lf;
 123  	size_t p[2];
 124  	unsigned char *ar[14 * sizeof(size_t) + 1];
 125  	int i = 1;
 126  	int trail;
 127  
 128  	p[0] = pp[0];
 129  	p[1] = pp[1];
 130  
 131  	ar[0] = head;
 132  	while(p[0] != 1 || p[1] != 0) {
 133  		stepson = head - lp[pshift];
 134  		if(cmp(stepson, ar[0], arg) <= 0) {
 135  			break;
 136  		}
 137  		if(!trusty && pshift > 1) {
 138  			rt = head - width;
 139  			lf = head - width - lp[pshift - 2];
 140  			if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
 141  				break;
 142  			}
 143  		}
 144  
 145  		ar[i++] = stepson;
 146  		head = stepson;
 147  		trail = pntz(p);
 148  		shr(p, trail);
 149  		pshift += trail;
 150  		trusty = 0;
 151  	}
 152  	if(!trusty) {
 153  		cycle(width, ar, i);
 154  		sift(head, width, cmp, arg, pshift, lp);
 155  	}
 156  }
 157  
 158  void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 159  {
 160  	size_t lp[12*sizeof(size_t)];
 161  	size_t i, size = width * nel;
 162  	unsigned char *head, *high;
 163  	size_t p[2] = {1, 0};
 164  	int pshift = 1;
 165  	int trail;
 166  
 167  	if (!size) return;
 168  
 169  	head = base;
 170  	high = head + size - width;
 171  
 172  	/* Precompute Leonardo numbers, scaled by element width */
 173  	for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
 174  
 175  	while(head < high) {
 176  		if((p[0] & 3) == 3) {
 177  			sift(head, width, cmp, arg, pshift, lp);
 178  			shr(p, 2);
 179  			pshift += 2;
 180  		} else {
 181  			if(lp[pshift - 1] >= high - head) {
 182  				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 183  			} else {
 184  				sift(head, width, cmp, arg, pshift, lp);
 185  			}
 186  
 187  			if(pshift == 1) {
 188  				shl(p, 1);
 189  				pshift = 0;
 190  			} else {
 191  				shl(p, pshift - 1);
 192  				pshift = 1;
 193  			}
 194  		}
 195  
 196  		p[0] |= 1;
 197  		head += width;
 198  	}
 199  
 200  	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 201  
 202  	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
 203  		if(pshift <= 1) {
 204  			trail = pntz(p);
 205  			shr(p, trail);
 206  			pshift += trail;
 207  		} else {
 208  			shl(p, 2);
 209  			pshift -= 2;
 210  			p[0] ^= 7;
 211  			shr(p, 1);
 212  			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
 213  			shl(p, 1);
 214  			p[0] |= 1;
 215  			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
 216  		}
 217  		head -= width;
 218  	}
 219  }
 220  
 221  weak_alias(__qsort_r, qsort_r);
 222