| OLD | NEW |
| 1 /* Copyright (C) 2011 by Valentin Ochs | 1 /* Copyright (C) 2011 by Valentin Ochs |
| 2 * | 2 * |
| 3 * Permission is hereby granted, free of charge, to any person obtaining a copy | 3 * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 * of this software and associated documentation files (the "Software"), to | 4 * of this software and associated documentation files (the "Software"), to |
| 5 * deal in the Software without restriction, including without limitation the | 5 * deal in the Software without restriction, including without limitation the |
| 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 7 * sell copies of the Software, and to permit persons to whom the Software is | 7 * sell copies of the Software, and to permit persons to whom the Software is |
| 8 * furnished to do so, subject to the following conditions: | 8 * furnished to do so, subject to the following conditions: |
| 9 * | 9 * |
| 10 * The above copyright notice and this permission notice shall be included in | 10 * The above copyright notice and this permission notice shall be included in |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 | 21 |
| 22 /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ | 22 /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ |
| 23 | 23 |
| 24 #include <stdint.h> | 24 #include <stdint.h> |
| 25 #include <stdlib.h> | 25 #include <stdlib.h> |
| 26 #include <string.h> | 26 #include <string.h> |
| 27 | 27 |
| 28 #include "atomic.h" | 28 #include "atomic.h" |
| 29 #define ntz(x) a_ctz_l((x)) | 29 #define ntz(x) a_ctz_l((x)) |
| 30 | 30 |
| 31 typedef int (*cmpfun)(const void *, const void *); | 31 typedef int (*cmpfun)(const void*, const void*); |
| 32 | 32 |
| 33 static inline int pntz(size_t p[2]) { | 33 static inline int pntz(size_t p[2]) { |
| 34 » int r = ntz(p[0] - 1); | 34 int r = ntz(p[0] - 1); |
| 35 » if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { | 35 if (r != 0 || (r = 8 * sizeof(size_t) + ntz(p[1])) != 8 * sizeof(size_t)) { |
| 36 » » return r; | 36 return r; |
| 37 » } | 37 } |
| 38 » return 0; | 38 return 0; |
| 39 } | 39 } |
| 40 | 40 |
| 41 static void cycle(size_t width, unsigned char* ar[], int n) | 41 static void cycle(size_t width, unsigned char* ar[], int n) { |
| 42 { | 42 unsigned char tmp[256]; |
| 43 » unsigned char tmp[256]; | 43 size_t l; |
| 44 » size_t l; | 44 int i; |
| 45 » int i; | |
| 46 | 45 |
| 47 » if(n < 2) { | 46 if (n < 2) { |
| 48 » » return; | 47 return; |
| 49 » } | 48 } |
| 50 | 49 |
| 51 » ar[n] = tmp; | 50 ar[n] = tmp; |
| 52 » while(width) { | 51 while (width) { |
| 53 » » l = sizeof(tmp) < width ? sizeof(tmp) : width; | 52 l = sizeof(tmp) < width ? sizeof(tmp) : width; |
| 54 » » memcpy(ar[n], ar[0], l); | 53 memcpy(ar[n], ar[0], l); |
| 55 » » for(i = 0; i < n; i++) { | 54 for (i = 0; i < n; i++) { |
| 56 » » » memcpy(ar[i], ar[i + 1], l); | 55 memcpy(ar[i], ar[i + 1], l); |
| 57 » » » ar[i] += l; | 56 ar[i] += l; |
| 58 » » } | 57 } |
| 59 » » width -= l; | 58 width -= l; |
| 60 » } | 59 } |
| 61 } | 60 } |
| 62 | 61 |
| 63 /* shl() and shr() need n > 0 */ | 62 /* shl() and shr() need n > 0 */ |
| 64 static inline void shl(size_t p[2], int n) | 63 static inline void shl(size_t p[2], int n) { |
| 65 { | 64 if (n >= 8 * sizeof(size_t)) { |
| 66 » if(n >= 8 * sizeof(size_t)) { | 65 n -= 8 * sizeof(size_t); |
| 67 » » n -= 8 * sizeof(size_t); | 66 p[1] = p[0]; |
| 68 » » p[1] = p[0]; | 67 p[0] = 0; |
| 69 » » p[0] = 0; | 68 } |
| 70 » } | 69 p[1] <<= n; |
| 71 » p[1] <<= n; | 70 p[1] |= p[0] >> (sizeof(size_t) * 8 - n); |
| 72 » p[1] |= p[0] >> (sizeof(size_t) * 8 - n); | 71 p[0] <<= n; |
| 73 » p[0] <<= n; | |
| 74 } | 72 } |
| 75 | 73 |
| 76 static inline void shr(size_t p[2], int n) | 74 static inline void shr(size_t p[2], int n) { |
| 77 { | 75 if (n >= 8 * sizeof(size_t)) { |
| 78 » if(n >= 8 * sizeof(size_t)) { | 76 n -= 8 * sizeof(size_t); |
| 79 » » n -= 8 * sizeof(size_t); | 77 p[0] = p[1]; |
| 80 » » p[0] = p[1]; | 78 p[1] = 0; |
| 81 » » p[1] = 0; | 79 } |
| 82 » } | 80 p[0] >>= n; |
| 83 » p[0] >>= n; | 81 p[0] |= p[1] << (sizeof(size_t) * 8 - n); |
| 84 » p[0] |= p[1] << (sizeof(size_t) * 8 - n); | 82 p[1] >>= n; |
| 85 » p[1] >>= n; | |
| 86 } | 83 } |
| 87 | 84 |
| 88 static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
_t lp[]) | 85 static void sift(unsigned char* head, |
| 89 { | 86 size_t width, |
| 90 » unsigned char *rt, *lf; | 87 cmpfun cmp, |
| 91 » unsigned char *ar[14 * sizeof(size_t) + 1]; | 88 int pshift, |
| 92 » int i = 1; | 89 size_t lp[]) { |
| 90 unsigned char *rt, *lf; |
| 91 unsigned char* ar[14 * sizeof(size_t) + 1]; |
| 92 int i = 1; |
| 93 | 93 |
| 94 » ar[0] = head; | 94 ar[0] = head; |
| 95 » while(pshift > 1) { | 95 while (pshift > 1) { |
| 96 » » rt = head - width; | 96 rt = head - width; |
| 97 » » lf = head - width - lp[pshift - 2]; | 97 lf = head - width - lp[pshift - 2]; |
| 98 | 98 |
| 99 » » if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { | 99 if ((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { |
| 100 » » » break; | 100 break; |
| 101 » » } | 101 } |
| 102 » » if((*cmp)(lf, rt) >= 0) { | 102 if ((*cmp)(lf, rt) >= 0) { |
| 103 » » » ar[i++] = lf; | 103 ar[i++] = lf; |
| 104 » » » head = lf; | 104 head = lf; |
| 105 » » » pshift -= 1; | 105 pshift -= 1; |
| 106 » » } else { | 106 } else { |
| 107 » » » ar[i++] = rt; | 107 ar[i++] = rt; |
| 108 » » » head = rt; | 108 head = rt; |
| 109 » » » pshift -= 2; | 109 pshift -= 2; |
| 110 » » } | 110 } |
| 111 » } | 111 } |
| 112 » cycle(width, ar, i); | 112 cycle(width, ar, i); |
| 113 } | 113 } |
| 114 | 114 |
| 115 static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
int pshift, int trusty, size_t lp[]) | 115 static void trinkle(unsigned char* head, |
| 116 { | 116 size_t width, |
| 117 » unsigned char *stepson, | 117 cmpfun cmp, |
| 118 » *rt, *lf; | 118 size_t pp[2], |
| 119 » size_t p[2]; | 119 int pshift, |
| 120 » unsigned char *ar[14 * sizeof(size_t) + 1]; | 120 int trusty, |
| 121 » int i = 1; | 121 size_t lp[]) { |
| 122 » int trail; | 122 unsigned char *stepson, *rt, *lf; |
| 123 size_t p[2]; |
| 124 unsigned char* ar[14 * sizeof(size_t) + 1]; |
| 125 int i = 1; |
| 126 int trail; |
| 123 | 127 |
| 124 » p[0] = pp[0]; | 128 p[0] = pp[0]; |
| 125 » p[1] = pp[1]; | 129 p[1] = pp[1]; |
| 126 | 130 |
| 127 » ar[0] = head; | 131 ar[0] = head; |
| 128 » while(p[0] != 1 || p[1] != 0) { | 132 while (p[0] != 1 || p[1] != 0) { |
| 129 » » stepson = head - lp[pshift]; | 133 stepson = head - lp[pshift]; |
| 130 » » if((*cmp)(stepson, ar[0]) <= 0) { | 134 if ((*cmp)(stepson, ar[0]) <= 0) { |
| 131 » » » break; | 135 break; |
| 132 » » } | 136 } |
| 133 » » if(!trusty && pshift > 1) { | 137 if (!trusty && pshift > 1) { |
| 134 » » » rt = head - width; | 138 rt = head - width; |
| 135 » » » lf = head - width - lp[pshift - 2]; | 139 lf = head - width - lp[pshift - 2]; |
| 136 » » » if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0)
{ | 140 if ((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) { |
| 137 » » » » break; | 141 break; |
| 138 » » » } | 142 } |
| 139 » » } | 143 } |
| 140 | 144 |
| 141 » » ar[i++] = stepson; | 145 ar[i++] = stepson; |
| 142 » » head = stepson; | 146 head = stepson; |
| 143 » » trail = pntz(p); | 147 trail = pntz(p); |
| 144 » » shr(p, trail); | 148 shr(p, trail); |
| 145 » » pshift += trail; | 149 pshift += trail; |
| 146 » » trusty = 0; | 150 trusty = 0; |
| 147 » } | 151 } |
| 148 » if(!trusty) { | 152 if (!trusty) { |
| 149 » » cycle(width, ar, i); | 153 cycle(width, ar, i); |
| 150 » » sift(head, width, cmp, pshift, lp); | 154 sift(head, width, cmp, pshift, lp); |
| 151 » } | 155 } |
| 152 } | 156 } |
| 153 | 157 |
| 154 void qsort(void *base, size_t nel, size_t width, cmpfun cmp) | 158 void qsort(void* base, size_t nel, size_t width, cmpfun cmp) { |
| 155 { | 159 size_t lp[12 * sizeof(size_t)]; |
| 156 » size_t lp[12*sizeof(size_t)]; | 160 size_t i, size = width * nel; |
| 157 » size_t i, size = width * nel; | 161 unsigned char *head, *high; |
| 158 » unsigned char *head, *high; | 162 size_t p[2] = {1, 0}; |
| 159 » size_t p[2] = {1, 0}; | 163 int pshift = 1; |
| 160 » int pshift = 1; | 164 int trail; |
| 161 » int trail; | |
| 162 | 165 |
| 163 » if (!size) return; | 166 if (!size) |
| 167 return; |
| 164 | 168 |
| 165 » head = base; | 169 head = base; |
| 166 » high = head + size - width; | 170 high = head + size - width; |
| 167 | 171 |
| 168 » /* Precompute Leonardo numbers, scaled by element width */ | 172 /* Precompute Leonardo numbers, scaled by element width */ |
| 169 » for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++); | 173 for (lp[0] = lp[1] = width, i = 2; |
| 174 (lp[i] = lp[i - 2] + lp[i - 1] + width) < size; i++) |
| 175 ; |
| 170 | 176 |
| 171 » while(head < high) { | 177 while (head < high) { |
| 172 » » if((p[0] & 3) == 3) { | 178 if ((p[0] & 3) == 3) { |
| 173 » » » sift(head, width, cmp, pshift, lp); | 179 sift(head, width, cmp, pshift, lp); |
| 174 » » » shr(p, 2); | 180 shr(p, 2); |
| 175 » » » pshift += 2; | 181 pshift += 2; |
| 176 » » } else { | 182 } else { |
| 177 » » » if(lp[pshift - 1] >= high - head) { | 183 if (lp[pshift - 1] >= high - head) { |
| 178 » » » » trinkle(head, width, cmp, p, pshift, 0, lp); | 184 trinkle(head, width, cmp, p, pshift, 0, lp); |
| 179 » » » } else { | 185 } else { |
| 180 » » » » sift(head, width, cmp, pshift, lp); | 186 sift(head, width, cmp, pshift, lp); |
| 181 » » » } | 187 } |
| 182 » » » | |
| 183 » » » if(pshift == 1) { | |
| 184 » » » » shl(p, 1); | |
| 185 » » » » pshift = 0; | |
| 186 » » » } else { | |
| 187 » » » » shl(p, pshift - 1); | |
| 188 » » » » pshift = 1; | |
| 189 » » » } | |
| 190 » » } | |
| 191 » » | |
| 192 » » p[0] |= 1; | |
| 193 » » head += width; | |
| 194 » } | |
| 195 | 188 |
| 196 » trinkle(head, width, cmp, p, pshift, 0, lp); | 189 if (pshift == 1) { |
| 190 shl(p, 1); |
| 191 pshift = 0; |
| 192 } else { |
| 193 shl(p, pshift - 1); |
| 194 pshift = 1; |
| 195 } |
| 196 } |
| 197 | 197 |
| 198 » while(pshift != 1 || p[0] != 1 || p[1] != 0) { | 198 p[0] |= 1; |
| 199 » » if(pshift <= 1) { | 199 head += width; |
| 200 » » » trail = pntz(p); | 200 } |
| 201 » » » shr(p, trail); | 201 |
| 202 » » » pshift += trail; | 202 trinkle(head, width, cmp, p, pshift, 0, lp); |
| 203 » » } else { | 203 |
| 204 » » » shl(p, 2); | 204 while (pshift != 1 || p[0] != 1 || p[1] != 0) { |
| 205 » » » pshift -= 2; | 205 if (pshift <= 1) { |
| 206 » » » p[0] ^= 7; | 206 trail = pntz(p); |
| 207 » » » shr(p, 1); | 207 shr(p, trail); |
| 208 » » » trinkle(head - lp[pshift] - width, width, cmp, p, pshift
+ 1, 1, lp); | 208 pshift += trail; |
| 209 » » » shl(p, 1); | 209 } else { |
| 210 » » » p[0] |= 1; | 210 shl(p, 2); |
| 211 » » » trinkle(head - width, width, cmp, p, pshift, 1, lp); | 211 pshift -= 2; |
| 212 » » } | 212 p[0] ^= 7; |
| 213 » » head -= width; | 213 shr(p, 1); |
| 214 » } | 214 trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); |
| 215 shl(p, 1); |
| 216 p[0] |= 1; |
| 217 trinkle(head - width, width, cmp, p, pshift, 1, lp); |
| 218 } |
| 219 head -= width; |
| 220 } |
| 215 } | 221 } |
| OLD | NEW |