OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2013-05-28 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 ** This file contains code to implement the percentile(Y,P) SQL function |
| 14 ** as described below: |
| 15 ** |
| 16 ** (1) The percentile(Y,P) function is an aggregate function taking |
| 17 ** exactly two arguments. |
| 18 ** |
| 19 ** (2) If the P argument to percentile(Y,P) is not the same for every |
| 20 ** row in the aggregate then an error is thrown. The word "same" |
| 21 ** in the previous sentence means that the value differ by less |
| 22 ** than 0.001. |
| 23 ** |
| 24 ** (3) If the P argument to percentile(Y,P) evaluates to anything other |
| 25 ** than a number in the range of 0.0 to 100.0 inclusive then an |
| 26 ** error is thrown. |
| 27 ** |
| 28 ** (4) If any Y argument to percentile(Y,P) evaluates to a value that |
| 29 ** is not NULL and is not numeric then an error is thrown. |
| 30 ** |
| 31 ** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus |
| 32 ** infinity then an error is thrown. (SQLite always interprets NaN |
| 33 ** values as NULL.) |
| 34 ** |
| 35 ** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions, |
| 36 ** including CASE WHEN expressions. |
| 37 ** |
| 38 ** (7) The percentile(Y,P) aggregate is able to handle inputs of at least |
| 39 ** one million (1,000,000) rows. |
| 40 ** |
| 41 ** (8) If there are no non-NULL values for Y, then percentile(Y,P) |
| 42 ** returns NULL. |
| 43 ** |
| 44 ** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P) |
| 45 ** returns the one Y value. |
| 46 ** |
| 47 ** (10) If there N non-NULL values of Y where N is two or more and |
| 48 ** the Y values are ordered from least to greatest and a graph is |
| 49 ** drawn from 0 to N-1 such that the height of the graph at J is |
| 50 ** the J-th Y value and such that straight lines are drawn between |
| 51 ** adjacent Y values, then the percentile(Y,P) function returns |
| 52 ** the height of the graph at P*(N-1)/100. |
| 53 ** |
| 54 ** (11) The percentile(Y,P) function always returns either a floating |
| 55 ** point number or NULL. |
| 56 ** |
| 57 ** (12) The percentile(Y,P) is implemented as a single C99 source-code |
| 58 ** file that compiles into a shared-library or DLL that can be loaded |
| 59 ** into SQLite using the sqlite3_load_extension() interface. |
| 60 */ |
| 61 #include "sqlite3ext.h" |
| 62 SQLITE_EXTENSION_INIT1 |
| 63 #include <assert.h> |
| 64 #include <string.h> |
| 65 #include <stdlib.h> |
| 66 |
| 67 /* The following object is the session context for a single percentile() |
| 68 ** function. We have to remember all input Y values until the very end. |
| 69 ** Those values are accumulated in the Percentile.a[] array. |
| 70 */ |
| 71 typedef struct Percentile Percentile; |
| 72 struct Percentile { |
| 73 unsigned nAlloc; /* Number of slots allocated for a[] */ |
| 74 unsigned nUsed; /* Number of slots actually used in a[] */ |
| 75 double rPct; /* 1.0 more than the value for P */ |
| 76 double *a; /* Array of Y values */ |
| 77 }; |
| 78 |
| 79 /* |
| 80 ** Return TRUE if the input floating-point number is an infinity. |
| 81 */ |
| 82 static int isInfinity(double r){ |
| 83 sqlite3_uint64 u; |
| 84 assert( sizeof(u)==sizeof(r) ); |
| 85 memcpy(&u, &r, sizeof(u)); |
| 86 return ((u>>52)&0x7ff)==0x7ff; |
| 87 } |
| 88 |
| 89 /* |
| 90 ** Return TRUE if two doubles differ by 0.001 or less |
| 91 */ |
| 92 static int sameValue(double a, double b){ |
| 93 a -= b; |
| 94 return a>=-0.001 && a<=0.001; |
| 95 } |
| 96 |
| 97 /* |
| 98 ** The "step" function for percentile(Y,P) is called once for each |
| 99 ** input row. |
| 100 */ |
| 101 static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ |
| 102 Percentile *p; |
| 103 double rPct; |
| 104 int eType; |
| 105 double y; |
| 106 assert( argc==2 ); |
| 107 |
| 108 /* Requirement 3: P must be a number between 0 and 100 */ |
| 109 eType = sqlite3_value_numeric_type(argv[1]); |
| 110 rPct = sqlite3_value_double(argv[1]); |
| 111 if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) || |
| 112 ((rPct = sqlite3_value_double(argv[1]))<0.0 || rPct>100.0) ){ |
| 113 sqlite3_result_error(pCtx, "2nd argument to percentile() is not " |
| 114 "a number between 0.0 and 100.0", -1); |
| 115 return; |
| 116 } |
| 117 |
| 118 /* Allocate the session context. */ |
| 119 p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); |
| 120 if( p==0 ) return; |
| 121 |
| 122 /* Remember the P value. Throw an error if the P value is different |
| 123 ** from any prior row, per Requirement (2). */ |
| 124 if( p->rPct==0.0 ){ |
| 125 p->rPct = rPct+1.0; |
| 126 }else if( !sameValue(p->rPct,rPct+1.0) ){ |
| 127 sqlite3_result_error(pCtx, "2nd argument to percentile() is not the " |
| 128 "same for all input rows", -1); |
| 129 return; |
| 130 } |
| 131 |
| 132 /* Ignore rows for which Y is NULL */ |
| 133 eType = sqlite3_value_type(argv[0]); |
| 134 if( eType==SQLITE_NULL ) return; |
| 135 |
| 136 /* If not NULL, then Y must be numeric. Otherwise throw an error. |
| 137 ** Requirement 4 */ |
| 138 if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){ |
| 139 sqlite3_result_error(pCtx, "1st argument to percentile() is not " |
| 140 "numeric", -1); |
| 141 return; |
| 142 } |
| 143 |
| 144 /* Throw an error if the Y value is infinity or NaN */ |
| 145 y = sqlite3_value_double(argv[0]); |
| 146 if( isInfinity(y) ){ |
| 147 sqlite3_result_error(pCtx, "Inf input to percentile()", -1); |
| 148 return; |
| 149 } |
| 150 |
| 151 /* Allocate and store the Y */ |
| 152 if( p->nUsed>=p->nAlloc ){ |
| 153 unsigned n = p->nAlloc*2 + 250; |
| 154 double *a = sqlite3_realloc(p->a, sizeof(double)*n); |
| 155 if( a==0 ){ |
| 156 sqlite3_free(p->a); |
| 157 memset(p, 0, sizeof(*p)); |
| 158 sqlite3_result_error_nomem(pCtx); |
| 159 return; |
| 160 } |
| 161 p->nAlloc = n; |
| 162 p->a = a; |
| 163 } |
| 164 p->a[p->nUsed++] = y; |
| 165 } |
| 166 |
| 167 /* |
| 168 ** Compare to doubles for sorting using qsort() |
| 169 */ |
| 170 static int doubleCmp(const void *pA, const void *pB){ |
| 171 double a = *(double*)pA; |
| 172 double b = *(double*)pB; |
| 173 if( a==b ) return 0; |
| 174 if( a<b ) return -1; |
| 175 return +1; |
| 176 } |
| 177 |
| 178 /* |
| 179 ** Called to compute the final output of percentile() and to clean |
| 180 ** up all allocated memory. |
| 181 */ |
| 182 static void percentFinal(sqlite3_context *pCtx){ |
| 183 Percentile *p; |
| 184 unsigned i1, i2; |
| 185 double v1, v2; |
| 186 double ix, vx; |
| 187 p = (Percentile*)sqlite3_aggregate_context(pCtx, 0); |
| 188 if( p==0 ) return; |
| 189 if( p->a==0 ) return; |
| 190 if( p->nUsed ){ |
| 191 qsort(p->a, p->nUsed, sizeof(double), doubleCmp); |
| 192 ix = (p->rPct-1.0)*(p->nUsed-1)*0.01; |
| 193 i1 = (unsigned)ix; |
| 194 i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1; |
| 195 v1 = p->a[i1]; |
| 196 v2 = p->a[i2]; |
| 197 vx = v1 + (v2-v1)*(ix-i1); |
| 198 sqlite3_result_double(pCtx, vx); |
| 199 } |
| 200 sqlite3_free(p->a); |
| 201 memset(p, 0, sizeof(*p)); |
| 202 } |
| 203 |
| 204 |
| 205 #ifdef _WIN32 |
| 206 __declspec(dllexport) |
| 207 #endif |
| 208 int sqlite3_percentile_init( |
| 209 sqlite3 *db, |
| 210 char **pzErrMsg, |
| 211 const sqlite3_api_routines *pApi |
| 212 ){ |
| 213 int rc = SQLITE_OK; |
| 214 SQLITE_EXTENSION_INIT2(pApi); |
| 215 (void)pzErrMsg; /* Unused parameter */ |
| 216 rc = sqlite3_create_function(db, "percentile", 2, SQLITE_UTF8, 0, |
| 217 0, percentStep, percentFinal); |
| 218 return rc; |
| 219 } |
OLD | NEW |