OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2013-05-28 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ****************************************************************************** | |
12 ** | |
13 ** This file contains code to implement the percentile(Y,P) SQL function | |
14 ** as described below: | |
15 ** | |
16 ** (1) The percentile(Y,P) function is an aggregate function taking | |
17 ** exactly two arguments. | |
18 ** | |
19 ** (2) If the P argument to percentile(Y,P) is not the same for every | |
20 ** row in the aggregate then an error is thrown. The word "same" | |
21 ** in the previous sentence means that the value differ by less | |
22 ** than 0.001. | |
23 ** | |
24 ** (3) If the P argument to percentile(Y,P) evaluates to anything other | |
25 ** than a number in the range of 0.0 to 100.0 inclusive then an | |
26 ** error is thrown. | |
27 ** | |
28 ** (4) If any Y argument to percentile(Y,P) evaluates to a value that | |
29 ** is not NULL and is not numeric then an error is thrown. | |
30 ** | |
31 ** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus | |
32 ** infinity then an error is thrown. (SQLite always interprets NaN | |
33 ** values as NULL.) | |
34 ** | |
35 ** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions, | |
36 ** including CASE WHEN expressions. | |
37 ** | |
38 ** (7) The percentile(Y,P) aggregate is able to handle inputs of at least | |
39 ** one million (1,000,000) rows. | |
40 ** | |
41 ** (8) If there are no non-NULL values for Y, then percentile(Y,P) | |
42 ** returns NULL. | |
43 ** | |
44 ** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P) | |
45 ** returns the one Y value. | |
46 ** | |
47 ** (10) If there N non-NULL values of Y where N is two or more and | |
48 ** the Y values are ordered from least to greatest and a graph is | |
49 ** drawn from 0 to N-1 such that the height of the graph at J is | |
50 ** the J-th Y value and such that straight lines are drawn between | |
51 ** adjacent Y values, then the percentile(Y,P) function returns | |
52 ** the height of the graph at P*(N-1)/100. | |
53 ** | |
54 ** (11) The percentile(Y,P) function always returns either a floating | |
55 ** point number or NULL. | |
56 ** | |
57 ** (12) The percentile(Y,P) is implemented as a single C99 source-code | |
58 ** file that compiles into a shared-library or DLL that can be loaded | |
59 ** into SQLite using the sqlite3_load_extension() interface. | |
60 */ | |
61 #include "sqlite3ext.h" | |
62 SQLITE_EXTENSION_INIT1 | |
63 #include <assert.h> | |
64 #include <string.h> | |
65 #include <stdlib.h> | |
66 | |
67 /* The following object is the session context for a single percentile() | |
68 ** function. We have to remember all input Y values until the very end. | |
69 ** Those values are accumulated in the Percentile.a[] array. | |
70 */ | |
71 typedef struct Percentile Percentile; | |
72 struct Percentile { | |
73 unsigned nAlloc; /* Number of slots allocated for a[] */ | |
74 unsigned nUsed; /* Number of slots actually used in a[] */ | |
75 double rPct; /* 1.0 more than the value for P */ | |
76 double *a; /* Array of Y values */ | |
77 }; | |
78 | |
79 /* | |
80 ** Return TRUE if the input floating-point number is an infinity. | |
81 */ | |
82 static int isInfinity(double r){ | |
83 sqlite3_uint64 u; | |
84 assert( sizeof(u)==sizeof(r) ); | |
85 memcpy(&u, &r, sizeof(u)); | |
86 return ((u>>52)&0x7ff)==0x7ff; | |
87 } | |
88 | |
89 /* | |
90 ** Return TRUE if two doubles differ by 0.001 or less | |
91 */ | |
92 static int sameValue(double a, double b){ | |
93 a -= b; | |
94 return a>=-0.001 && a<=0.001; | |
95 } | |
96 | |
97 /* | |
98 ** The "step" function for percentile(Y,P) is called once for each | |
99 ** input row. | |
100 */ | |
101 static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ | |
102 Percentile *p; | |
103 double rPct; | |
104 int eType; | |
105 double y; | |
106 assert( argc==2 ); | |
107 | |
108 /* Requirement 3: P must be a number between 0 and 100 */ | |
109 eType = sqlite3_value_numeric_type(argv[1]); | |
110 rPct = sqlite3_value_double(argv[1]); | |
111 if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) || | |
112 ((rPct = sqlite3_value_double(argv[1]))<0.0 || rPct>100.0) ){ | |
113 sqlite3_result_error(pCtx, "2nd argument to percentile() is not " | |
114 "a number between 0.0 and 100.0", -1); | |
115 return; | |
116 } | |
117 | |
118 /* Allocate the session context. */ | |
119 p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); | |
120 if( p==0 ) return; | |
121 | |
122 /* Remember the P value. Throw an error if the P value is different | |
123 ** from any prior row, per Requirement (2). */ | |
124 if( p->rPct==0.0 ){ | |
125 p->rPct = rPct+1.0; | |
126 }else if( !sameValue(p->rPct,rPct+1.0) ){ | |
127 sqlite3_result_error(pCtx, "2nd argument to percentile() is not the " | |
128 "same for all input rows", -1); | |
129 return; | |
130 } | |
131 | |
132 /* Ignore rows for which Y is NULL */ | |
133 eType = sqlite3_value_type(argv[0]); | |
134 if( eType==SQLITE_NULL ) return; | |
135 | |
136 /* If not NULL, then Y must be numeric. Otherwise throw an error. | |
137 ** Requirement 4 */ | |
138 if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){ | |
139 sqlite3_result_error(pCtx, "1st argument to percentile() is not " | |
140 "numeric", -1); | |
141 return; | |
142 } | |
143 | |
144 /* Throw an error if the Y value is infinity or NaN */ | |
145 y = sqlite3_value_double(argv[0]); | |
146 if( isInfinity(y) ){ | |
147 sqlite3_result_error(pCtx, "Inf input to percentile()", -1); | |
148 return; | |
149 } | |
150 | |
151 /* Allocate and store the Y */ | |
152 if( p->nUsed>=p->nAlloc ){ | |
153 unsigned n = p->nAlloc*2 + 250; | |
154 double *a = sqlite3_realloc(p->a, sizeof(double)*n); | |
155 if( a==0 ){ | |
156 sqlite3_free(p->a); | |
157 memset(p, 0, sizeof(*p)); | |
158 sqlite3_result_error_nomem(pCtx); | |
159 return; | |
160 } | |
161 p->nAlloc = n; | |
162 p->a = a; | |
163 } | |
164 p->a[p->nUsed++] = y; | |
165 } | |
166 | |
167 /* | |
168 ** Compare to doubles for sorting using qsort() | |
169 */ | |
170 static int doubleCmp(const void *pA, const void *pB){ | |
171 double a = *(double*)pA; | |
172 double b = *(double*)pB; | |
173 if( a==b ) return 0; | |
174 if( a<b ) return -1; | |
175 return +1; | |
176 } | |
177 | |
178 /* | |
179 ** Called to compute the final output of percentile() and to clean | |
180 ** up all allocated memory. | |
181 */ | |
182 static void percentFinal(sqlite3_context *pCtx){ | |
183 Percentile *p; | |
184 unsigned i1, i2; | |
185 double v1, v2; | |
186 double ix, vx; | |
187 p = (Percentile*)sqlite3_aggregate_context(pCtx, 0); | |
188 if( p==0 ) return; | |
189 if( p->a==0 ) return; | |
190 if( p->nUsed ){ | |
191 qsort(p->a, p->nUsed, sizeof(double), doubleCmp); | |
192 ix = (p->rPct-1.0)*(p->nUsed-1)*0.01; | |
193 i1 = (unsigned)ix; | |
194 i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1; | |
195 v1 = p->a[i1]; | |
196 v2 = p->a[i2]; | |
197 vx = v1 + (v2-v1)*(ix-i1); | |
198 sqlite3_result_double(pCtx, vx); | |
199 } | |
200 sqlite3_free(p->a); | |
201 memset(p, 0, sizeof(*p)); | |
202 } | |
203 | |
204 | |
205 #ifdef _WIN32 | |
206 __declspec(dllexport) | |
207 #endif | |
208 int sqlite3_percentile_init( | |
209 sqlite3 *db, | |
210 char **pzErrMsg, | |
211 const sqlite3_api_routines *pApi | |
212 ){ | |
213 int rc = SQLITE_OK; | |
214 SQLITE_EXTENSION_INIT2(pApi); | |
215 (void)pzErrMsg; /* Unused parameter */ | |
216 rc = sqlite3_create_function(db, "percentile", 2, SQLITE_UTF8, 0, | |
217 0, percentStep, percentFinal); | |
218 return rc; | |
219 } | |
OLD | NEW |