Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: third_party/protobuf/ruby/ext/google/protobuf_c/upb.c

Issue 1322483002: Revert https://codereview.chromium.org/1291903002 (protobuf roll). (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Amalgamated source file
2 #include "upb.h"
3
4
5 #include <stdlib.h>
6 #include <string.h>
7
8 typedef struct {
9 size_t len;
10 char str[1]; /* Null-terminated string data follows. */
11 } str_t;
12
13 static str_t *newstr(const char *data, size_t len) {
14 str_t *ret = malloc(sizeof(*ret) + len);
15 if (!ret) return NULL;
16 ret->len = len;
17 memcpy(ret->str, data, len);
18 ret->str[len] = '\0';
19 return ret;
20 }
21
22 static void freestr(str_t *s) { free(s); }
23
24 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
25 static bool upb_isbetween(char c, char low, char high) {
26 return c >= low && c <= high;
27 }
28
29 static bool upb_isletter(char c) {
30 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
31 }
32
33 static bool upb_isalphanum(char c) {
34 return upb_isletter(c) || upb_isbetween(c, '0', '9');
35 }
36
37 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
38 bool start = true;
39 size_t i;
40 for (i = 0; i < len; i++) {
41 char c = str[i];
42 if (c == '.') {
43 if (start || !full) {
44 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
45 return false;
46 }
47 start = true;
48 } else if (start) {
49 if (!upb_isletter(c)) {
50 upb_status_seterrf(
51 s, "invalid name: path components must start with a letter (%s)",
52 str);
53 return false;
54 }
55 start = false;
56 } else {
57 if (!upb_isalphanum(c)) {
58 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
59 str);
60 return false;
61 }
62 }
63 }
64 return !start;
65 }
66
67
68 /* upb_def ********************************************************************/
69
70 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
71
72 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
73
74 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
75 assert(!upb_def_isfrozen(def));
76 if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
77 free((void*)def->fullname);
78 def->fullname = upb_strdup(fullname);
79 return true;
80 }
81
82 upb_def *upb_def_dup(const upb_def *def, const void *o) {
83 switch (def->type) {
84 case UPB_DEF_MSG:
85 return upb_msgdef_upcast_mutable(
86 upb_msgdef_dup(upb_downcast_msgdef(def), o));
87 case UPB_DEF_FIELD:
88 return upb_fielddef_upcast_mutable(
89 upb_fielddef_dup(upb_downcast_fielddef(def), o));
90 case UPB_DEF_ENUM:
91 return upb_enumdef_upcast_mutable(
92 upb_enumdef_dup(upb_downcast_enumdef(def), o));
93 default: assert(false); return NULL;
94 }
95 }
96
97 static bool upb_def_init(upb_def *def, upb_deftype_t type,
98 const struct upb_refcounted_vtbl *vtbl,
99 const void *owner) {
100 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return fal se;
101 def->type = type;
102 def->fullname = NULL;
103 def->came_from_user = false;
104 return true;
105 }
106
107 static void upb_def_uninit(upb_def *def) {
108 free((void*)def->fullname);
109 }
110
111 static const char *msgdef_name(const upb_msgdef *m) {
112 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
113 return name ? name : "(anonymous)";
114 }
115
116 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
117 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
118 upb_status_seterrmsg(s, "fielddef must have name and number set");
119 return false;
120 }
121
122 if (!f->type_is_set_) {
123 upb_status_seterrmsg(s, "fielddef type was not initialized");
124 return false;
125 }
126
127 if (upb_fielddef_lazy(f) &&
128 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
129 upb_status_seterrmsg(s,
130 "only length-delimited submessage fields may be lazy");
131 return false;
132 }
133
134 if (upb_fielddef_hassubdef(f)) {
135 const upb_def *subdef;
136
137 if (f->subdef_is_symbolic) {
138 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
139 msgdef_name(f->msg.def), upb_fielddef_name(f));
140 return false;
141 }
142
143 subdef = upb_fielddef_subdef(f);
144 if (subdef == NULL) {
145 upb_status_seterrf(s, "field %s.%s is missing required subdef",
146 msgdef_name(f->msg.def), upb_fielddef_name(f));
147 return false;
148 }
149
150 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
151 upb_status_seterrf(s,
152 "subdef of field %s.%s is not frozen or being frozen",
153 msgdef_name(f->msg.def), upb_fielddef_name(f));
154 return false;
155 }
156 }
157
158 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
159 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
160 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
161
162 /* Previously verified by upb_validate_enumdef(). */
163 assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
164
165 /* We've already validated that we have an associated enumdef and that it
166 * has at least one member, so at least one of these should be true.
167 * Because if the user didn't set anything, we'll pick up the enum's
168 * default, but if the user *did* set something we should at least pick up
169 * the one they set (int32 or string). */
170 assert(has_default_name || has_default_number);
171
172 if (!has_default_name) {
173 upb_status_seterrf(s,
174 "enum default for field %s.%s (%d) is not in the enum",
175 msgdef_name(f->msg.def), upb_fielddef_name(f),
176 upb_fielddef_defaultint32(f));
177 return false;
178 }
179
180 if (!has_default_number) {
181 upb_status_seterrf(s,
182 "enum default for field %s.%s (%s) is not in the enum",
183 msgdef_name(f->msg.def), upb_fielddef_name(f),
184 upb_fielddef_defaultstr(f, NULL));
185 return false;
186 }
187
188 /* Lift the effective numeric default into the field's default slot, in case
189 * we were only getting it "by reference" from the enumdef. */
190 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
191 }
192
193 /* Ensure that MapEntry submessages only appear as repeated fields, not
194 * optional/required (singular) fields. */
195 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
196 upb_fielddef_msgsubdef(f) != NULL) {
197 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
198 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
199 upb_status_seterrf(s,
200 "Field %s refers to mapentry message but is not "
201 "a repeated field",
202 upb_fielddef_name(f) ? upb_fielddef_name(f) :
203 "(unnamed)");
204 return false;
205 }
206 }
207
208 return true;
209 }
210
211 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
212 if (upb_enumdef_numvals(e) == 0) {
213 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
214 upb_enumdef_fullname(e));
215 return false;
216 }
217
218 return true;
219 }
220
221 /* All submessage fields are lower than all other fields.
222 * Secondly, fields are increasing in order. */
223 uint32_t field_rank(const upb_fielddef *f) {
224 uint32_t ret = upb_fielddef_number(f);
225 const uint32_t high_bit = 1 << 30;
226 assert(ret < high_bit);
227 if (!upb_fielddef_issubmsg(f))
228 ret |= high_bit;
229 return ret;
230 }
231
232 int cmp_fields(const void *p1, const void *p2) {
233 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
234 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
235 return field_rank(f1) - field_rank(f2);
236 }
237
238 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
239 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
240 * lowest indexes, but we do not publicly guarantee this. */
241 upb_msg_field_iter j;
242 int i;
243 uint32_t selector;
244 int n = upb_msgdef_numfields(m);
245 upb_fielddef **fields = malloc(n * sizeof(*fields));
246 if (!fields) return false;
247
248 m->submsg_field_count = 0;
249 for(i = 0, upb_msg_field_begin(&j, m);
250 !upb_msg_field_done(&j);
251 upb_msg_field_next(&j), i++) {
252 upb_fielddef *f = upb_msg_iter_field(&j);
253 assert(f->msg.def == m);
254 if (!upb_validate_field(f, s)) {
255 free(fields);
256 return false;
257 }
258 if (upb_fielddef_issubmsg(f)) {
259 m->submsg_field_count++;
260 }
261 fields[i] = f;
262 }
263
264 qsort(fields, n, sizeof(*fields), cmp_fields);
265
266 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
267 for (i = 0; i < n; i++) {
268 upb_fielddef *f = fields[i];
269 f->index_ = i;
270 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
271 selector += upb_handlers_selectorcount(f);
272 }
273 m->selector_count = selector;
274
275 #ifndef NDEBUG
276 {
277 /* Verify that all selectors for the message are distinct. */
278 #define TRY(type) \
279 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v) ;
280
281 upb_inttable t;
282 upb_value v;
283 upb_selector_t sel;
284
285 upb_inttable_init(&t, UPB_CTYPE_BOOL);
286 v = upb_value_bool(true);
287 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
288 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
289 for(upb_msg_field_begin(&j, m);
290 !upb_msg_field_done(&j);
291 upb_msg_field_next(&j)) {
292 upb_fielddef *f = upb_msg_iter_field(&j);
293 /* These calls will assert-fail in upb_table if the value already
294 * exists. */
295 TRY(UPB_HANDLER_INT32);
296 TRY(UPB_HANDLER_INT64)
297 TRY(UPB_HANDLER_UINT32)
298 TRY(UPB_HANDLER_UINT64)
299 TRY(UPB_HANDLER_FLOAT)
300 TRY(UPB_HANDLER_DOUBLE)
301 TRY(UPB_HANDLER_BOOL)
302 TRY(UPB_HANDLER_STARTSTR)
303 TRY(UPB_HANDLER_STRING)
304 TRY(UPB_HANDLER_ENDSTR)
305 TRY(UPB_HANDLER_STARTSUBMSG)
306 TRY(UPB_HANDLER_ENDSUBMSG)
307 TRY(UPB_HANDLER_STARTSEQ)
308 TRY(UPB_HANDLER_ENDSEQ)
309 }
310 upb_inttable_uninit(&t);
311 }
312 #undef TRY
313 #endif
314
315 free(fields);
316 return true;
317 }
318
319 bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
320 int i;
321 int maxdepth;
322 bool ret;
323 upb_status_clear(s);
324
325 /* First perform validation, in two passes so we can check that we have a
326 * transitive closure without needing to search. */
327 for (i = 0; i < n; i++) {
328 upb_def *def = defs[i];
329 if (upb_def_isfrozen(def)) {
330 /* Could relax this requirement if it's annoying. */
331 upb_status_seterrmsg(s, "def is already frozen");
332 goto err;
333 } else if (def->type == UPB_DEF_FIELD) {
334 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
335 goto err;
336 } else if (def->type == UPB_DEF_ENUM) {
337 if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
338 goto err;
339 }
340 } else {
341 /* Set now to detect transitive closure in the second pass. */
342 def->came_from_user = true;
343 }
344 }
345
346 /* Second pass of validation. Also assign selector bases and indexes, and
347 * compact tables. */
348 for (i = 0; i < n; i++) {
349 upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
350 upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
351 if (m) {
352 upb_inttable_compact(&m->itof);
353 if (!assign_msg_indices(m, s)) {
354 goto err;
355 }
356 } else if (e) {
357 upb_inttable_compact(&e->iton);
358 }
359 }
360
361 /* Def graph contains FieldDefs between each MessageDef, so double the
362 * limit. */
363 maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
364
365 /* Validation all passed; freeze the defs. */
366 ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
367 assert(!(s && ret != upb_ok(s)));
368 return ret;
369
370 err:
371 for (i = 0; i < n; i++) {
372 defs[i]->came_from_user = false;
373 }
374 assert(!(s && upb_ok(s)));
375 return false;
376 }
377
378
379 /* upb_enumdef ****************************************************************/
380
381 static void upb_enumdef_free(upb_refcounted *r) {
382 upb_enumdef *e = (upb_enumdef*)r;
383 upb_inttable_iter i;
384 upb_inttable_begin(&i, &e->iton);
385 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
386 /* To clean up the upb_strdup() from upb_enumdef_addval(). */
387 free(upb_value_getcstr(upb_inttable_iter_value(&i)));
388 }
389 upb_strtable_uninit(&e->ntoi);
390 upb_inttable_uninit(&e->iton);
391 upb_def_uninit(upb_enumdef_upcast_mutable(e));
392 free(e);
393 }
394
395 upb_enumdef *upb_enumdef_new(const void *owner) {
396 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
397 upb_enumdef *e = malloc(sizeof(*e));
398 if (!e) return NULL;
399 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
400 goto err2;
401 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
402 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
403 return e;
404
405 err1:
406 upb_strtable_uninit(&e->ntoi);
407 err2:
408 free(e);
409 return NULL;
410 }
411
412 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
413 upb_enum_iter i;
414 upb_enumdef *new_e = upb_enumdef_new(owner);
415 if (!new_e) return NULL;
416 for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
417 bool success = upb_enumdef_addval(
418 new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
419 if (!success) {
420 upb_enumdef_unref(new_e, owner);
421 return NULL;
422 }
423 }
424 return new_e;
425 }
426
427 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
428 upb_def *d = upb_enumdef_upcast_mutable(e);
429 return upb_def_freeze(&d, 1, status);
430 }
431
432 const char *upb_enumdef_fullname(const upb_enumdef *e) {
433 return upb_def_fullname(upb_enumdef_upcast(e));
434 }
435
436 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
437 upb_status *s) {
438 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
439 }
440
441 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
442 upb_status *status) {
443 if (!upb_isident(name, strlen(name), false, status)) {
444 return false;
445 }
446 if (upb_enumdef_ntoiz(e, name, NULL)) {
447 upb_status_seterrf(status, "name '%s' is already defined", name);
448 return false;
449 }
450 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
451 upb_status_seterrmsg(status, "out of memory");
452 return false;
453 }
454 if (!upb_inttable_lookup(&e->iton, num, NULL) &&
455 !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
456 upb_status_seterrmsg(status, "out of memory");
457 upb_strtable_remove(&e->ntoi, name, NULL);
458 return false;
459 }
460 if (upb_enumdef_numvals(e) == 1) {
461 bool ok = upb_enumdef_setdefault(e, num, NULL);
462 UPB_ASSERT_VAR(ok, ok);
463 }
464 return true;
465 }
466
467 int32_t upb_enumdef_default(const upb_enumdef *e) {
468 assert(upb_enumdef_iton(e, e->defaultval));
469 return e->defaultval;
470 }
471
472 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
473 assert(!upb_enumdef_isfrozen(e));
474 if (!upb_enumdef_iton(e, val)) {
475 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
476 return false;
477 }
478 e->defaultval = val;
479 return true;
480 }
481
482 int upb_enumdef_numvals(const upb_enumdef *e) {
483 return upb_strtable_count(&e->ntoi);
484 }
485
486 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
487 /* We iterate over the ntoi table, to account for duplicate numbers. */
488 upb_strtable_begin(i, &e->ntoi);
489 }
490
491 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
492 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
493
494 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
495 size_t len, int32_t *num) {
496 upb_value v;
497 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
498 return false;
499 }
500 if (num) *num = upb_value_getint32(v);
501 return true;
502 }
503
504 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
505 upb_value v;
506 return upb_inttable_lookup32(&def->iton, num, &v) ?
507 upb_value_getcstr(v) : NULL;
508 }
509
510 const char *upb_enum_iter_name(upb_enum_iter *iter) {
511 return upb_strtable_iter_key(iter);
512 }
513
514 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
515 return upb_value_getint32(upb_strtable_iter_value(iter));
516 }
517
518
519 /* upb_fielddef ***************************************************************/
520
521 static void upb_fielddef_init_default(upb_fielddef *f);
522
523 static void upb_fielddef_uninit_default(upb_fielddef *f) {
524 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
525 freestr(f->defaultval.bytes);
526 }
527
528 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
529 void *closure) {
530 const upb_fielddef *f = (const upb_fielddef*)r;
531 if (upb_fielddef_containingtype(f)) {
532 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
533 }
534 if (upb_fielddef_containingoneof(f)) {
535 visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
536 }
537 if (upb_fielddef_subdef(f)) {
538 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
539 }
540 }
541
542 static void freefield(upb_refcounted *r) {
543 upb_fielddef *f = (upb_fielddef*)r;
544 upb_fielddef_uninit_default(f);
545 if (f->subdef_is_symbolic)
546 free(f->sub.name);
547 upb_def_uninit(upb_fielddef_upcast_mutable(f));
548 free(f);
549 }
550
551 static const char *enumdefaultstr(const upb_fielddef *f) {
552 const upb_enumdef *e;
553 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
554 e = upb_fielddef_enumsubdef(f);
555 if (f->default_is_string && f->defaultval.bytes) {
556 /* Default was explicitly set as a string. */
557 str_t *s = f->defaultval.bytes;
558 return s->str;
559 } else if (e) {
560 if (!f->default_is_string) {
561 /* Default was explicitly set as an integer; look it up in enumdef. */
562 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
563 if (name) {
564 return name;
565 }
566 } else {
567 /* Default is completely unset; pull enumdef default. */
568 if (upb_enumdef_numvals(e) > 0) {
569 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
570 assert(name);
571 return name;
572 }
573 }
574 }
575 return NULL;
576 }
577
578 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
579 const upb_enumdef *e;
580 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
581 e = upb_fielddef_enumsubdef(f);
582 if (!f->default_is_string) {
583 /* Default was explicitly set as an integer. */
584 *val = f->defaultval.sint;
585 return true;
586 } else if (e) {
587 if (f->defaultval.bytes) {
588 /* Default was explicitly set as a str; try to lookup corresponding int. * /
589 str_t *s = f->defaultval.bytes;
590 if (upb_enumdef_ntoiz(e, s->str, val)) {
591 return true;
592 }
593 } else {
594 /* Default is unset; try to pull in enumdef default. */
595 if (upb_enumdef_numvals(e) > 0) {
596 *val = upb_enumdef_default(e);
597 return true;
598 }
599 }
600 }
601 return false;
602 }
603
604 upb_fielddef *upb_fielddef_new(const void *o) {
605 static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
606 upb_fielddef *f = malloc(sizeof(*f));
607 if (!f) return NULL;
608 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
609 free(f);
610 return NULL;
611 }
612 f->msg.def = NULL;
613 f->sub.def = NULL;
614 f->oneof = NULL;
615 f->subdef_is_symbolic = false;
616 f->msg_is_symbolic = false;
617 f->label_ = UPB_LABEL_OPTIONAL;
618 f->type_ = UPB_TYPE_INT32;
619 f->number_ = 0;
620 f->type_is_set_ = false;
621 f->tagdelim = false;
622 f->is_extension_ = false;
623 f->lazy_ = false;
624 f->packed_ = true;
625
626 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
627 * with all integer types and is in some since more "default" since the most
628 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
629 *
630 * Other options to consider:
631 * - there is no default; users must set this manually (like type).
632 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
633 * be an optimal default for signed integers. */
634 f->intfmt = UPB_INTFMT_VARIABLE;
635 return f;
636 }
637
638 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
639 const char *srcname;
640 upb_fielddef *newf = upb_fielddef_new(owner);
641 if (!newf) return NULL;
642 upb_fielddef_settype(newf, upb_fielddef_type(f));
643 upb_fielddef_setlabel(newf, upb_fielddef_label(f));
644 upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
645 upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
646 if (f->default_is_string && f->defaultval.bytes) {
647 str_t *s = f->defaultval.bytes;
648 upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
649 } else {
650 newf->default_is_string = f->default_is_string;
651 newf->defaultval = f->defaultval;
652 }
653
654 if (f->subdef_is_symbolic) {
655 srcname = f->sub.name; /* Might be NULL. */
656 } else {
657 srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
658 }
659 if (srcname) {
660 char *newname = malloc(strlen(f->sub.def->fullname) + 2);
661 if (!newname) {
662 upb_fielddef_unref(newf, owner);
663 return NULL;
664 }
665 strcpy(newname, ".");
666 strcat(newname, f->sub.def->fullname);
667 upb_fielddef_setsubdefname(newf, newname, NULL);
668 free(newname);
669 }
670
671 return newf;
672 }
673
674 bool upb_fielddef_typeisset(const upb_fielddef *f) {
675 return f->type_is_set_;
676 }
677
678 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
679 assert(f->type_is_set_);
680 return f->type_;
681 }
682
683 uint32_t upb_fielddef_index(const upb_fielddef *f) {
684 return f->index_;
685 }
686
687 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
688 return f->label_;
689 }
690
691 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
692 return f->intfmt;
693 }
694
695 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
696 return f->tagdelim;
697 }
698
699 uint32_t upb_fielddef_number(const upb_fielddef *f) {
700 return f->number_;
701 }
702
703 bool upb_fielddef_isextension(const upb_fielddef *f) {
704 return f->is_extension_;
705 }
706
707 bool upb_fielddef_lazy(const upb_fielddef *f) {
708 return f->lazy_;
709 }
710
711 bool upb_fielddef_packed(const upb_fielddef *f) {
712 return f->packed_;
713 }
714
715 const char *upb_fielddef_name(const upb_fielddef *f) {
716 return upb_def_fullname(upb_fielddef_upcast(f));
717 }
718
719 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
720 return f->msg_is_symbolic ? NULL : f->msg.def;
721 }
722
723 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
724 return f->oneof;
725 }
726
727 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
728 return (upb_msgdef*)upb_fielddef_containingtype(f);
729 }
730
731 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
732 return f->msg_is_symbolic ? f->msg.name : NULL;
733 }
734
735 static void release_containingtype(upb_fielddef *f) {
736 if (f->msg_is_symbolic) free(f->msg.name);
737 }
738
739 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
740 upb_status *s) {
741 assert(!upb_fielddef_isfrozen(f));
742 if (upb_fielddef_containingtype(f)) {
743 upb_status_seterrmsg(s, "field has already been added to a message.");
744 return false;
745 }
746 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
747 * may have a leading "."). */
748 release_containingtype(f);
749 f->msg.name = upb_strdup(name);
750 f->msg_is_symbolic = true;
751 return true;
752 }
753
754 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
755 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
756 upb_status_seterrmsg(s, "Already added to message or oneof");
757 return false;
758 }
759 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
760 }
761
762 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
763 UPB_UNUSED(f);
764 UPB_UNUSED(type);
765 assert(f->type_is_set_ && upb_fielddef_type(f) == type);
766 }
767
768 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
769 chkdefaulttype(f, UPB_TYPE_INT64);
770 return f->defaultval.sint;
771 }
772
773 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
774 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
775 int32_t val;
776 bool ok = enumdefaultint32(f, &val);
777 UPB_ASSERT_VAR(ok, ok);
778 return val;
779 } else {
780 chkdefaulttype(f, UPB_TYPE_INT32);
781 return f->defaultval.sint;
782 }
783 }
784
785 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
786 chkdefaulttype(f, UPB_TYPE_UINT64);
787 return f->defaultval.uint;
788 }
789
790 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
791 chkdefaulttype(f, UPB_TYPE_UINT32);
792 return f->defaultval.uint;
793 }
794
795 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
796 chkdefaulttype(f, UPB_TYPE_BOOL);
797 return f->defaultval.uint;
798 }
799
800 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
801 chkdefaulttype(f, UPB_TYPE_FLOAT);
802 return f->defaultval.flt;
803 }
804
805 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
806 chkdefaulttype(f, UPB_TYPE_DOUBLE);
807 return f->defaultval.dbl;
808 }
809
810 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
811 assert(f->type_is_set_);
812 assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
813 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
814 upb_fielddef_type(f) == UPB_TYPE_ENUM);
815
816 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
817 const char *ret = enumdefaultstr(f);
818 assert(ret);
819 /* Enum defaults can't have embedded NULLs. */
820 if (len) *len = strlen(ret);
821 return ret;
822 }
823
824 if (f->default_is_string) {
825 str_t *str = f->defaultval.bytes;
826 if (len) *len = str->len;
827 return str->str;
828 }
829
830 return NULL;
831 }
832
833 static void upb_fielddef_init_default(upb_fielddef *f) {
834 f->default_is_string = false;
835 switch (upb_fielddef_type(f)) {
836 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
837 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
838 case UPB_TYPE_INT32:
839 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
840 case UPB_TYPE_UINT64:
841 case UPB_TYPE_UINT32:
842 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
843 case UPB_TYPE_STRING:
844 case UPB_TYPE_BYTES:
845 f->defaultval.bytes = newstr("", 0);
846 f->default_is_string = true;
847 break;
848 case UPB_TYPE_MESSAGE: break;
849 case UPB_TYPE_ENUM:
850 /* This is our special sentinel that indicates "not set" for an enum. */
851 f->default_is_string = true;
852 f->defaultval.bytes = NULL;
853 break;
854 }
855 }
856
857 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
858 return f->subdef_is_symbolic ? NULL : f->sub.def;
859 }
860
861 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
862 const upb_def *def = upb_fielddef_subdef(f);
863 return def ? upb_dyncast_msgdef(def) : NULL;
864 }
865
866 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
867 const upb_def *def = upb_fielddef_subdef(f);
868 return def ? upb_dyncast_enumdef(def) : NULL;
869 }
870
871 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
872 return (upb_def*)upb_fielddef_subdef(f);
873 }
874
875 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
876 if (f->subdef_is_symbolic) {
877 return f->sub.name;
878 } else if (f->sub.def) {
879 return upb_def_fullname(f->sub.def);
880 } else {
881 return NULL;
882 }
883 }
884
885 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
886 if (upb_fielddef_containingtype(f)) {
887 upb_status_seterrmsg(
888 s, "cannot change field number after adding to a message");
889 return false;
890 }
891 if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
892 upb_status_seterrf(s, "invalid field number (%u)", number);
893 return false;
894 }
895 f->number_ = number;
896 return true;
897 }
898
899 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
900 assert(!upb_fielddef_isfrozen(f));
901 assert(upb_fielddef_checktype(type));
902 upb_fielddef_uninit_default(f);
903 f->type_ = type;
904 f->type_is_set_ = true;
905 upb_fielddef_init_default(f);
906 }
907
908 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
909 assert(!upb_fielddef_isfrozen(f));
910 switch (type) {
911 case UPB_DESCRIPTOR_TYPE_DOUBLE:
912 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
913 break;
914 case UPB_DESCRIPTOR_TYPE_FLOAT:
915 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
916 break;
917 case UPB_DESCRIPTOR_TYPE_INT64:
918 case UPB_DESCRIPTOR_TYPE_SFIXED64:
919 case UPB_DESCRIPTOR_TYPE_SINT64:
920 upb_fielddef_settype(f, UPB_TYPE_INT64);
921 break;
922 case UPB_DESCRIPTOR_TYPE_UINT64:
923 case UPB_DESCRIPTOR_TYPE_FIXED64:
924 upb_fielddef_settype(f, UPB_TYPE_UINT64);
925 break;
926 case UPB_DESCRIPTOR_TYPE_INT32:
927 case UPB_DESCRIPTOR_TYPE_SFIXED32:
928 case UPB_DESCRIPTOR_TYPE_SINT32:
929 upb_fielddef_settype(f, UPB_TYPE_INT32);
930 break;
931 case UPB_DESCRIPTOR_TYPE_UINT32:
932 case UPB_DESCRIPTOR_TYPE_FIXED32:
933 upb_fielddef_settype(f, UPB_TYPE_UINT32);
934 break;
935 case UPB_DESCRIPTOR_TYPE_BOOL:
936 upb_fielddef_settype(f, UPB_TYPE_BOOL);
937 break;
938 case UPB_DESCRIPTOR_TYPE_STRING:
939 upb_fielddef_settype(f, UPB_TYPE_STRING);
940 break;
941 case UPB_DESCRIPTOR_TYPE_BYTES:
942 upb_fielddef_settype(f, UPB_TYPE_BYTES);
943 break;
944 case UPB_DESCRIPTOR_TYPE_GROUP:
945 case UPB_DESCRIPTOR_TYPE_MESSAGE:
946 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
947 break;
948 case UPB_DESCRIPTOR_TYPE_ENUM:
949 upb_fielddef_settype(f, UPB_TYPE_ENUM);
950 break;
951 default: assert(false);
952 }
953
954 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
955 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
956 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
957 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
958 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
959 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
960 type == UPB_DESCRIPTOR_TYPE_SINT32) {
961 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
962 } else {
963 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
964 }
965
966 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
967 }
968
969 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
970 switch (upb_fielddef_type(f)) {
971 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
972 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
973 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
974 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
975 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
976 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
977 case UPB_TYPE_INT32:
978 switch (upb_fielddef_intfmt(f)) {
979 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
980 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
981 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
982 }
983 case UPB_TYPE_INT64:
984 switch (upb_fielddef_intfmt(f)) {
985 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
986 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
987 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
988 }
989 case UPB_TYPE_UINT32:
990 switch (upb_fielddef_intfmt(f)) {
991 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
992 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
993 case UPB_INTFMT_ZIGZAG: return -1;
994 }
995 case UPB_TYPE_UINT64:
996 switch (upb_fielddef_intfmt(f)) {
997 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
998 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
999 case UPB_INTFMT_ZIGZAG: return -1;
1000 }
1001 case UPB_TYPE_MESSAGE:
1002 return upb_fielddef_istagdelim(f) ?
1003 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1004 }
1005 return 0;
1006 }
1007
1008 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1009 assert(!upb_fielddef_isfrozen(f));
1010 f->is_extension_ = is_extension;
1011 }
1012
1013 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1014 assert(!upb_fielddef_isfrozen(f));
1015 f->lazy_ = lazy;
1016 }
1017
1018 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1019 assert(!upb_fielddef_isfrozen(f));
1020 f->packed_ = packed;
1021 }
1022
1023 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1024 assert(!upb_fielddef_isfrozen(f));
1025 assert(upb_fielddef_checklabel(label));
1026 f->label_ = label;
1027 }
1028
1029 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1030 assert(!upb_fielddef_isfrozen(f));
1031 assert(upb_fielddef_checkintfmt(fmt));
1032 f->intfmt = fmt;
1033 }
1034
1035 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1036 assert(!upb_fielddef_isfrozen(f));
1037 f->tagdelim = tag_delim;
1038 f->tagdelim = tag_delim;
1039 }
1040
1041 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1042 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1043 upb_fielddef_type(f) != type) {
1044 assert(false);
1045 return false;
1046 }
1047 if (f->default_is_string) {
1048 str_t *s = f->defaultval.bytes;
1049 assert(s || type == UPB_TYPE_ENUM);
1050 if (s) freestr(s);
1051 }
1052 f->default_is_string = false;
1053 return true;
1054 }
1055
1056 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1057 if (checksetdefault(f, UPB_TYPE_INT64))
1058 f->defaultval.sint = value;
1059 }
1060
1061 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1062 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1063 checksetdefault(f, UPB_TYPE_ENUM)) ||
1064 checksetdefault(f, UPB_TYPE_INT32)) {
1065 f->defaultval.sint = value;
1066 }
1067 }
1068
1069 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1070 if (checksetdefault(f, UPB_TYPE_UINT64))
1071 f->defaultval.uint = value;
1072 }
1073
1074 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1075 if (checksetdefault(f, UPB_TYPE_UINT32))
1076 f->defaultval.uint = value;
1077 }
1078
1079 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1080 if (checksetdefault(f, UPB_TYPE_BOOL))
1081 f->defaultval.uint = value;
1082 }
1083
1084 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1085 if (checksetdefault(f, UPB_TYPE_FLOAT))
1086 f->defaultval.flt = value;
1087 }
1088
1089 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1090 if (checksetdefault(f, UPB_TYPE_DOUBLE))
1091 f->defaultval.dbl = value;
1092 }
1093
1094 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1095 upb_status *s) {
1096 str_t *str2;
1097 assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1098 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1099 return false;
1100
1101 if (f->default_is_string) {
1102 str_t *s = f->defaultval.bytes;
1103 assert(s || f->type_ == UPB_TYPE_ENUM);
1104 if (s) freestr(s);
1105 } else {
1106 assert(f->type_ == UPB_TYPE_ENUM);
1107 }
1108
1109 str2 = newstr(str, len);
1110 f->defaultval.bytes = str2;
1111 f->default_is_string = true;
1112 return true;
1113 }
1114
1115 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1116 upb_status *s) {
1117 assert(f->type_is_set_);
1118 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1119 }
1120
1121 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1122 int32_t val;
1123 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1124 return enumdefaultint32(f, &val);
1125 }
1126
1127 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1128 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1129 return enumdefaultstr(f) != NULL;
1130 }
1131
1132 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1133 upb_status *s) {
1134 if (f->type_ == UPB_TYPE_MESSAGE) {
1135 if (upb_dyncast_msgdef(subdef)) return true;
1136 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1137 return false;
1138 } else if (f->type_ == UPB_TYPE_ENUM) {
1139 if (upb_dyncast_enumdef(subdef)) return true;
1140 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1141 return false;
1142 } else {
1143 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1144 return false;
1145 }
1146 }
1147
1148 static void release_subdef(upb_fielddef *f) {
1149 if (f->subdef_is_symbolic) {
1150 free(f->sub.name);
1151 } else if (f->sub.def) {
1152 upb_unref2(f->sub.def, f);
1153 }
1154 }
1155
1156 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1157 upb_status *s) {
1158 assert(!upb_fielddef_isfrozen(f));
1159 assert(upb_fielddef_hassubdef(f));
1160 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1161 release_subdef(f);
1162 f->sub.def = subdef;
1163 f->subdef_is_symbolic = false;
1164 if (f->sub.def) upb_ref2(f->sub.def, f);
1165 return true;
1166 }
1167
1168 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1169 upb_status *s) {
1170 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1171 }
1172
1173 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1174 upb_status *s) {
1175 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1176 }
1177
1178 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1179 upb_status *s) {
1180 assert(!upb_fielddef_isfrozen(f));
1181 if (!upb_fielddef_hassubdef(f)) {
1182 upb_status_seterrmsg(s, "field type does not accept a subdef");
1183 return false;
1184 }
1185 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1186 * may have a leading "."). */
1187 release_subdef(f);
1188 f->sub.name = upb_strdup(name);
1189 f->subdef_is_symbolic = true;
1190 return true;
1191 }
1192
1193 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1194 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1195 }
1196
1197 bool upb_fielddef_isstring(const upb_fielddef *f) {
1198 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1199 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1200 }
1201
1202 bool upb_fielddef_isseq(const upb_fielddef *f) {
1203 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1204 }
1205
1206 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1207 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1208 }
1209
1210 bool upb_fielddef_ismap(const upb_fielddef *f) {
1211 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1212 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1213 }
1214
1215 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1216 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1217 }
1218
1219 static bool between(int32_t x, int32_t low, int32_t high) {
1220 return x >= low && x <= high;
1221 }
1222
1223 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
1224 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
1225 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1226
1227 bool upb_fielddef_checkdescriptortype(int32_t type) {
1228 return between(type, 1, 18);
1229 }
1230
1231 /* upb_msgdef *****************************************************************/
1232
1233 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1234 void *closure) {
1235 upb_msg_oneof_iter o;
1236 const upb_msgdef *m = (const upb_msgdef*)r;
1237 upb_msg_field_iter i;
1238 for(upb_msg_field_begin(&i, m);
1239 !upb_msg_field_done(&i);
1240 upb_msg_field_next(&i)) {
1241 upb_fielddef *f = upb_msg_iter_field(&i);
1242 visit(r, upb_fielddef_upcast2(f), closure);
1243 }
1244 for(upb_msg_oneof_begin(&o, m);
1245 !upb_msg_oneof_done(&o);
1246 upb_msg_oneof_next(&o)) {
1247 upb_oneofdef *f = upb_msg_iter_oneof(&o);
1248 visit(r, upb_oneofdef_upcast2(f), closure);
1249 }
1250 }
1251
1252 static void freemsg(upb_refcounted *r) {
1253 upb_msgdef *m = (upb_msgdef*)r;
1254 upb_strtable_uninit(&m->ntoo);
1255 upb_strtable_uninit(&m->ntof);
1256 upb_inttable_uninit(&m->itof);
1257 upb_def_uninit(upb_msgdef_upcast_mutable(m));
1258 free(m);
1259 }
1260
1261 upb_msgdef *upb_msgdef_new(const void *owner) {
1262 static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
1263 upb_msgdef *m = malloc(sizeof(*m));
1264 if (!m) return NULL;
1265 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
1266 goto err2;
1267 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
1268 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
1269 if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
1270 m->map_entry = false;
1271 return m;
1272
1273 err1:
1274 upb_strtable_uninit(&m->ntof);
1275 err2:
1276 upb_inttable_uninit(&m->itof);
1277 err3:
1278 free(m);
1279 return NULL;
1280 }
1281
1282 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283 bool ok;
1284 upb_msg_field_iter i;
1285 upb_msg_oneof_iter o;
1286
1287 upb_msgdef *newm = upb_msgdef_new(owner);
1288 if (!newm) return NULL;
1289 ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1290 upb_def_fullname(upb_msgdef_upcast(m)),
1291 NULL);
1292 newm->map_entry = m->map_entry;
1293 UPB_ASSERT_VAR(ok, ok);
1294 for(upb_msg_field_begin(&i, m);
1295 !upb_msg_field_done(&i);
1296 upb_msg_field_next(&i)) {
1297 upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1298 /* Fields in oneofs are dup'd below. */
1299 if (upb_fielddef_containingoneof(f)) continue;
1300 if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1301 upb_msgdef_unref(newm, owner);
1302 return NULL;
1303 }
1304 }
1305 for(upb_msg_oneof_begin(&o, m);
1306 !upb_msg_oneof_done(&o);
1307 upb_msg_oneof_next(&o)) {
1308 upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1309 if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1310 upb_msgdef_unref(newm, owner);
1311 return NULL;
1312 }
1313 }
1314 return newm;
1315 }
1316
1317 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1318 upb_def *d = upb_msgdef_upcast_mutable(m);
1319 return upb_def_freeze(&d, 1, status);
1320 }
1321
1322 const char *upb_msgdef_fullname(const upb_msgdef *m) {
1323 return upb_def_fullname(upb_msgdef_upcast(m));
1324 }
1325
1326 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1327 upb_status *s) {
1328 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1329 }
1330
1331 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1332 * on status |s| and return false if not. */
1333 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1334 upb_status *s) {
1335 if (upb_fielddef_containingtype(f) != NULL) {
1336 upb_status_seterrmsg(s, "fielddef already belongs to a message");
1337 return false;
1338 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1339 upb_status_seterrmsg(s, "field name or number were not set");
1340 return false;
1341 } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
1342 upb_msgdef_itof(m, upb_fielddef_number(f))) {
1343 upb_status_seterrmsg(s, "duplicate field name or number for field");
1344 return false;
1345 }
1346 return true;
1347 }
1348
1349 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1350 release_containingtype(f);
1351 f->msg.def = m;
1352 f->msg_is_symbolic = false;
1353 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1354 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1355 upb_ref2(f, m);
1356 upb_ref2(m, f);
1357 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1358 }
1359
1360 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1361 upb_status *s) {
1362 /* TODO: extensions need to have a separate namespace, because proto2 allows a
1363 * top-level extension (ie. one not in any package) to have the same name as a
1364 * field from the message.
1365 *
1366 * This also implies that there needs to be a separate lookup-by-name method
1367 * for extensions. It seems desirable for iteration to return both extensions
1368 * and non-extensions though.
1369 *
1370 * We also need to validate that the field number is in an extension range iff
1371 * it is an extension.
1372 *
1373 * This method is idempotent. Check if |f| is already part of this msgdef and
1374 * return immediately if so. */
1375 if (upb_fielddef_containingtype(f) == m) {
1376 return true;
1377 }
1378
1379 /* Check constraints for all fields before performing any action. */
1380 if (!check_field_add(m, f, s)) {
1381 return false;
1382 } else if (upb_fielddef_containingoneof(f) != NULL) {
1383 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1384 upb_status_seterrmsg(s, "fielddef is part of a oneof");
1385 return false;
1386 }
1387
1388 /* Constraint checks ok, perform the action. */
1389 add_field(m, f, ref_donor);
1390 return true;
1391 }
1392
1393 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1394 upb_status *s) {
1395 upb_oneof_iter it;
1396
1397 /* Check various conditions that would prevent this oneof from being added. */
1398 if (upb_oneofdef_containingtype(o)) {
1399 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1400 return false;
1401 } else if (upb_oneofdef_name(o) == NULL) {
1402 upb_status_seterrmsg(s, "oneofdef name was not set");
1403 return false;
1404 } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
1405 upb_status_seterrmsg(s, "duplicate oneof name");
1406 return false;
1407 }
1408
1409 /* Check that all of the oneof's fields do not conflict with names or numbers
1410 * of fields already in the message. */
1411 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1412 const upb_fielddef *f = upb_oneof_iter_field(&it);
1413 if (!check_field_add(m, f, s)) {
1414 return false;
1415 }
1416 }
1417
1418 /* Everything checks out -- commit now. */
1419
1420 /* Add oneof itself first. */
1421 o->parent = m;
1422 upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
1423 upb_ref2(o, m);
1424 upb_ref2(m, o);
1425
1426 /* Add each field of the oneof directly to the msgdef. */
1427 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1428 upb_fielddef *f = upb_oneof_iter_field(&it);
1429 add_field(m, f, NULL);
1430 }
1431
1432 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1433
1434 return true;
1435 }
1436
1437 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1438 upb_value val;
1439 return upb_inttable_lookup32(&m->itof, i, &val) ?
1440 upb_value_getptr(val) : NULL;
1441 }
1442
1443 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1444 size_t len) {
1445 upb_value val;
1446 return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
1447 upb_value_getptr(val) : NULL;
1448 }
1449
1450 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1451 size_t len) {
1452 upb_value val;
1453 return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
1454 upb_value_getptr(val) : NULL;
1455 }
1456
1457 int upb_msgdef_numfields(const upb_msgdef *m) {
1458 return upb_strtable_count(&m->ntof);
1459 }
1460
1461 int upb_msgdef_numoneofs(const upb_msgdef *m) {
1462 return upb_strtable_count(&m->ntoo);
1463 }
1464
1465 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1466 assert(!upb_msgdef_isfrozen(m));
1467 m->map_entry = map_entry;
1468 }
1469
1470 bool upb_msgdef_mapentry(const upb_msgdef *m) {
1471 return m->map_entry;
1472 }
1473
1474 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1475 upb_inttable_begin(iter, &m->itof);
1476 }
1477
1478 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1479
1480 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1481 return upb_inttable_done(iter);
1482 }
1483
1484 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1485 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1486 }
1487
1488 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1489 upb_inttable_iter_setdone(iter);
1490 }
1491
1492 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1493 upb_strtable_begin(iter, &m->ntoo);
1494 }
1495
1496 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
1497
1498 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1499 return upb_strtable_done(iter);
1500 }
1501
1502 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1503 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1504 }
1505
1506 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1507 upb_strtable_iter_setdone(iter);
1508 }
1509
1510 /* upb_oneofdef ***************************************************************/
1511
1512 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1513 void *closure) {
1514 const upb_oneofdef *o = (const upb_oneofdef*)r;
1515 upb_oneof_iter i;
1516 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1517 const upb_fielddef *f = upb_oneof_iter_field(&i);
1518 visit(r, upb_fielddef_upcast2(f), closure);
1519 }
1520 if (o->parent) {
1521 visit(r, upb_msgdef_upcast2(o->parent), closure);
1522 }
1523 }
1524
1525 static void freeoneof(upb_refcounted *r) {
1526 upb_oneofdef *o = (upb_oneofdef*)r;
1527 upb_strtable_uninit(&o->ntof);
1528 upb_inttable_uninit(&o->itof);
1529 upb_def_uninit(upb_oneofdef_upcast_mutable(o));
1530 free(o);
1531 }
1532
1533 upb_oneofdef *upb_oneofdef_new(const void *owner) {
1534 static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
1535 upb_oneofdef *o = malloc(sizeof(*o));
1536 o->parent = NULL;
1537 if (!o) return NULL;
1538 if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
1539 owner))
1540 goto err2;
1541 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1542 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1543 return o;
1544
1545 err1:
1546 upb_inttable_uninit(&o->itof);
1547 err2:
1548 free(o);
1549 return NULL;
1550 }
1551
1552 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1553 bool ok;
1554 upb_oneof_iter i;
1555 upb_oneofdef *newo = upb_oneofdef_new(owner);
1556 if (!newo) return NULL;
1557 ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
1558 upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
1559 UPB_ASSERT_VAR(ok, ok);
1560 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1561 upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1562 if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1563 upb_oneofdef_unref(newo, owner);
1564 return NULL;
1565 }
1566 }
1567 return newo;
1568 }
1569
1570 const char *upb_oneofdef_name(const upb_oneofdef *o) {
1571 return upb_def_fullname(upb_oneofdef_upcast(o));
1572 }
1573
1574 bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
1575 upb_status *s) {
1576 if (upb_oneofdef_containingtype(o)) {
1577 upb_status_seterrmsg(s, "oneof already added to a message");
1578 return false;
1579 }
1580 return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
1581 }
1582
1583 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1584 return o->parent;
1585 }
1586
1587 int upb_oneofdef_numfields(const upb_oneofdef *o) {
1588 return upb_strtable_count(&o->ntof);
1589 }
1590
1591 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1592 const void *ref_donor,
1593 upb_status *s) {
1594 assert(!upb_oneofdef_isfrozen(o));
1595 assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1596
1597 /* This method is idempotent. Check if |f| is already part of this oneofdef
1598 * and return immediately if so. */
1599 if (upb_fielddef_containingoneof(f) == o) {
1600 return true;
1601 }
1602
1603 /* The field must have an OPTIONAL label. */
1604 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1605 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1606 return false;
1607 }
1608
1609 /* Check that no field with this name or number exists already in the oneof.
1610 * Also check that the field is not already part of a oneof. */
1611 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1612 upb_status_seterrmsg(s, "field name or number were not set");
1613 return false;
1614 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1615 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1616 upb_status_seterrmsg(s, "duplicate field name or number");
1617 return false;
1618 } else if (upb_fielddef_containingoneof(f) != NULL) {
1619 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1620 return false;
1621 }
1622
1623 /* We allow adding a field to the oneof either if the field is not part of a
1624 * msgdef, or if it is and we are also part of the same msgdef. */
1625 if (o->parent == NULL) {
1626 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1627 * need to magically add this oneof to a msgdef to remain consistent, which
1628 * is surprising behavior. */
1629 if (upb_fielddef_containingtype(f) != NULL) {
1630 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1631 "oneof does not");
1632 return false;
1633 }
1634 } else {
1635 /* If we're in a msgdef, the user can add fields that either aren't in any
1636 * msgdef (in which case they're added to our msgdef) or already a part of
1637 * our msgdef. */
1638 if (upb_fielddef_containingtype(f) != NULL &&
1639 upb_fielddef_containingtype(f) != o->parent) {
1640 upb_status_seterrmsg(s, "fielddef belongs to a different message "
1641 "than oneof");
1642 return false;
1643 }
1644 }
1645
1646 /* Commit phase. First add the field to our parent msgdef, if any, because
1647 * that may fail; then add the field to our own tables. */
1648
1649 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1650 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1651 return false;
1652 }
1653 }
1654
1655 release_containingtype(f);
1656 f->oneof = o;
1657 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1658 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1659 upb_ref2(f, o);
1660 upb_ref2(o, f);
1661 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1662
1663 return true;
1664 }
1665
1666 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1667 const char *name, size_t length) {
1668 upb_value val;
1669 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1670 upb_value_getptr(val) : NULL;
1671 }
1672
1673 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1674 upb_value val;
1675 return upb_inttable_lookup32(&o->itof, num, &val) ?
1676 upb_value_getptr(val) : NULL;
1677 }
1678
1679 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1680 upb_inttable_begin(iter, &o->itof);
1681 }
1682
1683 void upb_oneof_next(upb_oneof_iter *iter) {
1684 upb_inttable_next(iter);
1685 }
1686
1687 bool upb_oneof_done(upb_oneof_iter *iter) {
1688 return upb_inttable_done(iter);
1689 }
1690
1691 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1692 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1693 }
1694
1695 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1696 upb_inttable_iter_setdone(iter);
1697 }
1698
1699
1700 #include <stdlib.h>
1701 #include <stdio.h>
1702 #include <string.h>
1703
1704 typedef struct cleanup_ent {
1705 upb_cleanup_func *cleanup;
1706 void *ud;
1707 struct cleanup_ent *next;
1708 } cleanup_ent;
1709
1710 static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1711
1712 /* Default allocator **********************************************************/
1713
1714 /* Just use realloc, keeping all allocated blocks in a linked list to destroy at
1715 * the end. */
1716
1717 typedef struct mem_block {
1718 /* List is doubly-linked, because in cases where realloc() moves an existing
1719 * block, we need to be able to remove the old pointer from the list
1720 * efficiently. */
1721 struct mem_block *prev, *next;
1722 #ifndef NDEBUG
1723 size_t size; /* Doesn't include mem_block structure. */
1724 #endif
1725 } mem_block;
1726
1727 typedef struct {
1728 mem_block *head;
1729 } default_alloc_ud;
1730
1731 static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1732 default_alloc_ud *ud = _ud;
1733 mem_block *from, *block;
1734 void *ret;
1735 UPB_UNUSED(oldsize);
1736
1737 from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1738
1739 #ifndef NDEBUG
1740 if (from) {
1741 assert(oldsize <= from->size);
1742 }
1743 #endif
1744
1745 /* TODO(haberman): we probably need to provide even better alignment here,
1746 * like 16-byte alignment of the returned data pointer. */
1747 block = realloc(from, size + sizeof(mem_block));
1748 if (!block) return NULL;
1749 ret = (char*)block + sizeof(*block);
1750
1751 #ifndef NDEBUG
1752 block->size = size;
1753 #endif
1754
1755 if (from) {
1756 if (block != from) {
1757 /* The block was moved, so pointers in next and prev blocks must be
1758 * updated to its new location. */
1759 if (block->next) block->next->prev = block;
1760 if (block->prev) block->prev->next = block;
1761 if (ud->head == from) ud->head = block;
1762 }
1763 } else {
1764 /* Insert at head of linked list. */
1765 block->prev = NULL;
1766 block->next = ud->head;
1767 if (block->next) block->next->prev = block;
1768 ud->head = block;
1769 }
1770
1771 return ret;
1772 }
1773
1774 static void default_alloc_cleanup(void *_ud) {
1775 default_alloc_ud *ud = _ud;
1776 mem_block *block = ud->head;
1777
1778 while (block) {
1779 void *to_free = block;
1780 block = block->next;
1781 free(to_free);
1782 }
1783 }
1784
1785
1786 /* Standard error functions ***************************************************/
1787
1788 static bool default_err(void *ud, const upb_status *status) {
1789 UPB_UNUSED(ud);
1790 UPB_UNUSED(status);
1791 return false;
1792 }
1793
1794 static bool write_err_to(void *ud, const upb_status *status) {
1795 upb_status *copy_to = ud;
1796 upb_status_copy(copy_to, status);
1797 return false;
1798 }
1799
1800
1801 /* upb_env ********************************************************************/
1802
1803 void upb_env_init(upb_env *e) {
1804 default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1805 e->ok_ = true;
1806 e->bytes_allocated = 0;
1807 e->cleanup_head = NULL;
1808
1809 ud->head = NULL;
1810
1811 /* Set default functions. */
1812 upb_env_setallocfunc(e, default_alloc, ud);
1813 upb_env_seterrorfunc(e, default_err, NULL);
1814 }
1815
1816 void upb_env_uninit(upb_env *e) {
1817 cleanup_ent *ent = e->cleanup_head;
1818
1819 while (ent) {
1820 ent->cleanup(ent->ud);
1821 ent = ent->next;
1822 }
1823
1824 /* Must do this after running cleanup functions, because this will delete
1825 the memory we store our cleanup entries in! */
1826 if (e->alloc == default_alloc) {
1827 default_alloc_cleanup(e->alloc_ud);
1828 }
1829 }
1830
1831 UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1832 void *ud) {
1833 e->alloc = alloc;
1834 e->alloc_ud = ud;
1835 }
1836
1837 UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1838 void *ud) {
1839 e->err = func;
1840 e->err_ud = ud;
1841 }
1842
1843 void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1844 e->err = write_err_to;
1845 e->err_ud = status;
1846 }
1847
1848 bool upb_env_ok(const upb_env *e) {
1849 return e->ok_;
1850 }
1851
1852 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1853 e->ok_ = false;
1854 return e->err(e->err_ud, status);
1855 }
1856
1857 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1858 cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1859 if (!ent) return false;
1860
1861 ent->cleanup = func;
1862 ent->ud = ud;
1863 ent->next = e->cleanup_head;
1864 e->cleanup_head = ent;
1865
1866 return true;
1867 }
1868
1869 void *upb_env_malloc(upb_env *e, size_t size) {
1870 e->bytes_allocated += size;
1871 if (e->alloc == seeded_alloc) {
1872 /* This is equivalent to the next branch, but allows inlining for a
1873 * measurable perf benefit. */
1874 return seeded_alloc(e->alloc_ud, NULL, 0, size);
1875 } else {
1876 return e->alloc(e->alloc_ud, NULL, 0, size);
1877 }
1878 }
1879
1880 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1881 char *ret;
1882 assert(oldsize <= size);
1883 ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1884
1885 #ifndef NDEBUG
1886 /* Overwrite non-preserved memory to ensure callers are passing the oldsize
1887 * that they truly require. */
1888 memset(ret + oldsize, 0xff, size - oldsize);
1889 #endif
1890
1891 return ret;
1892 }
1893
1894 size_t upb_env_bytesallocated(const upb_env *e) {
1895 return e->bytes_allocated;
1896 }
1897
1898
1899 /* upb_seededalloc ************************************************************/
1900
1901 /* Be conservative and choose 16 in case anyone is using SSE. */
1902 static const size_t maxalign = 16;
1903
1904 static size_t align_up(size_t size) {
1905 return ((size + maxalign - 1) / maxalign) * maxalign;
1906 }
1907
1908 UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1909 size_t size) {
1910 upb_seededalloc *a = ud;
1911
1912 size = align_up(size);
1913
1914 assert(a->mem_limit >= a->mem_ptr);
1915
1916 if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
1917 /* Fast path: we can satisfy from the initial allocation. */
1918 void *ret = a->mem_ptr;
1919 a->mem_ptr += size;
1920 return ret;
1921 } else {
1922 char *chptr = ptr;
1923 /* Slow path: fallback to other allocator. */
1924 a->need_cleanup = true;
1925 /* Is `ptr` part of the user-provided initial block? Don't pass it to the
1926 * default allocator if so; otherwise, it may try to realloc() the block. */
1927 if (chptr >= a->mem_base && chptr < a->mem_limit) {
1928 void *ret;
1929 assert(chptr + oldsize <= a->mem_limit);
1930 ret = a->alloc(a->alloc_ud, NULL, 0, size);
1931 if (ret) memcpy(ret, ptr, oldsize);
1932 return ret;
1933 } else {
1934 return a->alloc(a->alloc_ud, ptr, oldsize, size);
1935 }
1936 }
1937 }
1938
1939 void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
1940 default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
1941 a->mem_base = mem;
1942 a->mem_ptr = mem;
1943 a->mem_limit = (char*)mem + len;
1944 a->need_cleanup = false;
1945 a->returned_allocfunc = false;
1946
1947 ud->head = NULL;
1948
1949 upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
1950 }
1951
1952 void upb_seededalloc_uninit(upb_seededalloc *a) {
1953 if (a->alloc == default_alloc && a->need_cleanup) {
1954 default_alloc_cleanup(a->alloc_ud);
1955 }
1956 }
1957
1958 UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
1959 upb_alloc_func *alloc,
1960 void *ud) {
1961 assert(!a->returned_allocfunc);
1962 a->alloc = alloc;
1963 a->alloc_ud = ud;
1964 }
1965
1966 upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
1967 a->returned_allocfunc = true;
1968 return seeded_alloc;
1969 }
1970 /*
1971 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
1972 ** assert() or return false.
1973 */
1974
1975
1976 #include <stdlib.h>
1977 #include <string.h>
1978
1979
1980
1981 /* Defined for the sole purpose of having a unique pointer value for
1982 * UPB_NO_CLOSURE. */
1983 char _upb_noclosure;
1984
1985 static void freehandlers(upb_refcounted *r) {
1986 upb_handlers *h = (upb_handlers*)r;
1987
1988 upb_inttable_iter i;
1989 upb_inttable_begin(&i, &h->cleanup_);
1990 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1991 void *val = (void*)upb_inttable_iter_key(&i);
1992 upb_value func_val = upb_inttable_iter_value(&i);
1993 upb_handlerfree *func = upb_value_getfptr(func_val);
1994 func(val);
1995 }
1996
1997 upb_inttable_uninit(&h->cleanup_);
1998 upb_msgdef_unref(h->msg, h);
1999 free(h->sub);
2000 free(h);
2001 }
2002
2003 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2004 void *closure) {
2005 const upb_handlers *h = (const upb_handlers*)r;
2006 upb_msg_field_iter i;
2007 for(upb_msg_field_begin(&i, h->msg);
2008 !upb_msg_field_done(&i);
2009 upb_msg_field_next(&i)) {
2010 upb_fielddef *f = upb_msg_iter_field(&i);
2011 const upb_handlers *sub;
2012 if (!upb_fielddef_issubmsg(f)) continue;
2013 sub = upb_handlers_getsubhandlers(h, f);
2014 if (sub) visit(r, upb_handlers_upcast(sub), closure);
2015 }
2016 }
2017
2018 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2019
2020 typedef struct {
2021 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
2022 upb_handlers_callback *callback;
2023 const void *closure;
2024 } dfs_state;
2025
2026 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
2027 * handlers set and cannot reach any upb_handlers* object that does. This is
2028 * slightly tricky to do correctly. */
2029 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2030 dfs_state *s) {
2031 upb_msg_field_iter i;
2032 upb_handlers *h = upb_handlers_new(m, owner);
2033 if (!h) return NULL;
2034 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2035
2036 s->callback(s->closure, h);
2037
2038 /* For each submessage field, get or create a handlers object and set it as
2039 * the subhandlers. */
2040 for(upb_msg_field_begin(&i, m);
2041 !upb_msg_field_done(&i);
2042 upb_msg_field_next(&i)) {
2043 upb_fielddef *f = upb_msg_iter_field(&i);
2044 const upb_msgdef *subdef;
2045 upb_value subm_ent;
2046
2047 if (!upb_fielddef_issubmsg(f)) continue;
2048
2049 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2050 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2051 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2052 } else {
2053 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2054 if (!sub_mh) goto oom;
2055 upb_handlers_setsubhandlers(h, f, sub_mh);
2056 upb_handlers_unref(sub_mh, &sub_mh);
2057 }
2058 }
2059 return h;
2060
2061 oom:
2062 upb_handlers_unref(h, owner);
2063 return NULL;
2064 }
2065
2066 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2067 * subhandlers for this submessage field. */
2068 #define SUBH(h, selector) (h->sub[selector])
2069
2070 /* The selector for a submessage field is the field index. */
2071 #define SUBH_F(h, f) SUBH(h, f->index_)
2072
2073 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2074 upb_handlertype_t type) {
2075 upb_selector_t sel;
2076 assert(!upb_handlers_isfrozen(h));
2077 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2078 upb_status_seterrf(
2079 &h->status_, "type mismatch: field %s does not belong to message %s",
2080 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2081 return -1;
2082 }
2083 if (!upb_handlers_getselector(f, type, &sel)) {
2084 upb_status_seterrf(
2085 &h->status_,
2086 "type mismatch: cannot register handler type %d for field %s",
2087 type, upb_fielddef_name(f));
2088 return -1;
2089 }
2090 return sel;
2091 }
2092
2093 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2094 upb_handlertype_t type) {
2095 int32_t sel = trygetsel(h, f, type);
2096 assert(sel >= 0);
2097 return sel;
2098 }
2099
2100 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2101 upb_handlertype_t type) {
2102 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2103 }
2104
2105 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2106 upb_handlertype_t type, upb_func *func,
2107 upb_handlerattr *attr) {
2108 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2109 const void *closure_type;
2110 const void **context_closure_type;
2111
2112 assert(!upb_handlers_isfrozen(h));
2113
2114 if (sel < 0) {
2115 upb_status_seterrmsg(&h->status_,
2116 "incorrect handler type for this field.");
2117 return false;
2118 }
2119
2120 if (h->table[sel].func) {
2121 upb_status_seterrmsg(&h->status_,
2122 "cannot change handler once it has been set.");
2123 return false;
2124 }
2125
2126 if (attr) {
2127 set_attr = *attr;
2128 }
2129
2130 /* Check that the given closure type matches the closure type that has been
2131 * established for this context (if any). */
2132 closure_type = upb_handlerattr_closuretype(&set_attr);
2133
2134 if (type == UPB_HANDLER_STRING) {
2135 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2136 } else if (f && upb_fielddef_isseq(f) &&
2137 type != UPB_HANDLER_STARTSEQ &&
2138 type != UPB_HANDLER_ENDSEQ) {
2139 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2140 } else {
2141 context_closure_type = &h->top_closure_type;
2142 }
2143
2144 if (closure_type && *context_closure_type &&
2145 closure_type != *context_closure_type) {
2146 /* TODO(haberman): better message for debugging. */
2147 if (f) {
2148 upb_status_seterrf(&h->status_,
2149 "closure type does not match for field %s",
2150 upb_fielddef_name(f));
2151 } else {
2152 upb_status_seterrmsg(
2153 &h->status_, "closure type does not match for message-level handler");
2154 }
2155 return false;
2156 }
2157
2158 if (closure_type)
2159 *context_closure_type = closure_type;
2160
2161 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2162 * matches any pre-existing expectations about what type is expected. */
2163 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2164 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2165 const void *table_return_type =
2166 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2167 if (return_type && table_return_type && return_type != table_return_type) {
2168 upb_status_seterrmsg(&h->status_, "closure return type does not match");
2169 return false;
2170 }
2171
2172 if (table_return_type && !return_type)
2173 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2174 }
2175
2176 h->table[sel].func = (upb_func*)func;
2177 h->table[sel].attr = set_attr;
2178 return true;
2179 }
2180
2181 /* Returns the effective closure type for this handler (which will propagate
2182 * from outer frames if this frame has no START* handler). Not implemented for
2183 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
2184 * the effective closure type is unspecified (either no handler was registered
2185 * to specify it or the handler that was registered did not specify the closure
2186 * type). */
2187 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2188 upb_handlertype_t type) {
2189 const void *ret;
2190 upb_selector_t sel;
2191
2192 assert(type != UPB_HANDLER_STRING);
2193 ret = h->top_closure_type;
2194
2195 if (upb_fielddef_isseq(f) &&
2196 type != UPB_HANDLER_STARTSEQ &&
2197 type != UPB_HANDLER_ENDSEQ &&
2198 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2199 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2200 }
2201
2202 if (type == UPB_HANDLER_STRING &&
2203 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2204 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2205 }
2206
2207 /* The effective type of the submessage; not used yet.
2208 * if (type == SUBMESSAGE &&
2209 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2210 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2211 * } */
2212
2213 return ret;
2214 }
2215
2216 /* Checks whether the START* handler specified by f & type is missing even
2217 * though it is required to convert the established type of an outer frame
2218 * ("closure_type") into the established type of an inner frame (represented in
2219 * the return closure type of this handler's attr. */
2220 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2221 upb_status *status) {
2222 const void *closure_type;
2223 const upb_handlerattr *attr;
2224 const void *return_closure_type;
2225
2226 upb_selector_t sel = handlers_getsel(h, f, type);
2227 if (h->table[sel].func) return true;
2228 closure_type = effective_closure_type(h, f, type);
2229 attr = &h->table[sel].attr;
2230 return_closure_type = upb_handlerattr_returnclosuretype(attr);
2231 if (closure_type && return_closure_type &&
2232 closure_type != return_closure_type) {
2233 upb_status_seterrf(status,
2234 "expected start handler to return sub type for field %f",
2235 upb_fielddef_name(f));
2236 return false;
2237 }
2238 return true;
2239 }
2240
2241 /* Public interface ***********************************************************/
2242
2243 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2244 int extra;
2245 upb_handlers *h;
2246
2247 assert(upb_msgdef_isfrozen(md));
2248
2249 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2250 h = calloc(sizeof(*h) + extra, 1);
2251 if (!h) return NULL;
2252
2253 h->msg = md;
2254 upb_msgdef_ref(h->msg, h);
2255 upb_status_clear(&h->status_);
2256 h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
2257 if (!h->sub) goto oom;
2258 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2259 goto oom;
2260 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2261
2262 /* calloc() above initialized all handlers to NULL. */
2263 return h;
2264
2265 oom:
2266 freehandlers(upb_handlers_upcast_mutable(h));
2267 return NULL;
2268 }
2269
2270 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2271 const void *owner,
2272 upb_handlers_callback *callback,
2273 const void *closure) {
2274 dfs_state state;
2275 upb_handlers *ret;
2276 bool ok;
2277 upb_refcounted *r;
2278
2279 state.callback = callback;
2280 state.closure = closure;
2281 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2282
2283 ret = newformsg(m, owner, &state);
2284
2285 upb_inttable_uninit(&state.tab);
2286 if (!ret) return NULL;
2287
2288 r = upb_handlers_upcast_mutable(ret);
2289 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2290 UPB_ASSERT_VAR(ok, ok);
2291
2292 return ret;
2293 }
2294
2295 const upb_status *upb_handlers_status(upb_handlers *h) {
2296 assert(!upb_handlers_isfrozen(h));
2297 return &h->status_;
2298 }
2299
2300 void upb_handlers_clearerr(upb_handlers *h) {
2301 assert(!upb_handlers_isfrozen(h));
2302 upb_status_clear(&h->status_);
2303 }
2304
2305 #define SETTER(name, handlerctype, handlertype) \
2306 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2307 handlerctype func, upb_handlerattr *attr) { \
2308 int32_t sel = trygetsel(h, f, handlertype); \
2309 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2310 }
2311
2312 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
2313 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
2314 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
2315 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
2316 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
2317 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
2318 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
2319 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
2320 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
2321 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
2322 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
2323 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
2324 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
2325 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
2326
2327 #undef SETTER
2328
2329 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2330 upb_handlerattr *attr) {
2331 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2332 (upb_func *)func, attr);
2333 }
2334
2335 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2336 upb_handlerattr *attr) {
2337 assert(!upb_handlers_isfrozen(h));
2338 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2339 (upb_func *)func, attr);
2340 }
2341
2342 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2343 const upb_handlers *sub) {
2344 assert(sub);
2345 assert(!upb_handlers_isfrozen(h));
2346 assert(upb_fielddef_issubmsg(f));
2347 if (SUBH_F(h, f)) return false; /* Can't reset. */
2348 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2349 return false;
2350 }
2351 SUBH_F(h, f) = sub;
2352 upb_ref2(sub, h);
2353 return true;
2354 }
2355
2356 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2357 const upb_fielddef *f) {
2358 assert(upb_fielddef_issubmsg(f));
2359 return SUBH_F(h, f);
2360 }
2361
2362 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2363 upb_handlerattr *attr) {
2364 if (!upb_handlers_gethandler(h, sel))
2365 return false;
2366 *attr = h->table[sel].attr;
2367 return true;
2368 }
2369
2370 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2371 upb_selector_t sel) {
2372 /* STARTSUBMSG selector in sel is the field's selector base. */
2373 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2374 }
2375
2376 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2377
2378 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2379 bool ok;
2380 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2381 return false;
2382 }
2383 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2384 UPB_ASSERT_VAR(ok, ok);
2385 return true;
2386 }
2387
2388
2389 /* "Static" methods ***********************************************************/
2390
2391 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2392 /* TODO: verify we have a transitive closure. */
2393 int i;
2394 for (i = 0; i < n; i++) {
2395 upb_msg_field_iter j;
2396 upb_handlers *h = handlers[i];
2397
2398 if (!upb_ok(&h->status_)) {
2399 upb_status_seterrf(s, "handlers for message %s had error status: %s",
2400 upb_msgdef_fullname(upb_handlers_msgdef(h)),
2401 upb_status_errmsg(&h->status_));
2402 return false;
2403 }
2404
2405 /* Check that there are no closure mismatches due to missing Start* handlers
2406 * or subhandlers with different type-level types. */
2407 for(upb_msg_field_begin(&j, h->msg);
2408 !upb_msg_field_done(&j);
2409 upb_msg_field_next(&j)) {
2410
2411 const upb_fielddef *f = upb_msg_iter_field(&j);
2412 if (upb_fielddef_isseq(f)) {
2413 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2414 return false;
2415 }
2416
2417 if (upb_fielddef_isstring(f)) {
2418 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2419 return false;
2420 }
2421
2422 if (upb_fielddef_issubmsg(f)) {
2423 bool hashandler = false;
2424 if (upb_handlers_gethandler(
2425 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2426 upb_handlers_gethandler(
2427 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2428 hashandler = true;
2429 }
2430
2431 if (upb_fielddef_isseq(f) &&
2432 (upb_handlers_gethandler(
2433 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2434 upb_handlers_gethandler(
2435 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2436 hashandler = true;
2437 }
2438
2439 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2440 /* For now we add an empty subhandlers in this case. It makes the
2441 * decoder code generator simpler, because it only has to handle two
2442 * cases (submessage has handlers or not) as opposed to three
2443 * (submessage has handlers in enclosing message but no subhandlers).
2444 *
2445 * This makes parsing less efficient in the case that we want to
2446 * notice a submessage but skip its contents (like if we're testing
2447 * for submessage presence or counting the number of repeated
2448 * submessages). In this case we will end up parsing the submessage
2449 * field by field and throwing away the results for each, instead of
2450 * skipping the whole delimited thing at once. If this is an issue we
2451 * can revisit it, but do remember that this only arises when you have
2452 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2453 * submessage but no subhandlers. The uses cases for this are
2454 * limited. */
2455 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2456 upb_handlers_setsubhandlers(h, f, sub);
2457 upb_handlers_unref(sub, &sub);
2458 }
2459
2460 /* TODO(haberman): check type of submessage.
2461 * This is slightly tricky; also consider whether we should check that
2462 * they match at setsubhandlers time. */
2463 }
2464 }
2465 }
2466
2467 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2468 UPB_MAX_HANDLER_DEPTH)) {
2469 return false;
2470 }
2471
2472 return true;
2473 }
2474
2475 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2476 switch (upb_fielddef_type(f)) {
2477 case UPB_TYPE_INT32:
2478 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2479 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2480 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2481 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2482 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2483 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2484 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2485 default: assert(false); return -1; /* Invalid input. */
2486 }
2487 }
2488
2489 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2490 upb_selector_t *s) {
2491 switch (type) {
2492 case UPB_HANDLER_INT32:
2493 case UPB_HANDLER_INT64:
2494 case UPB_HANDLER_UINT32:
2495 case UPB_HANDLER_UINT64:
2496 case UPB_HANDLER_FLOAT:
2497 case UPB_HANDLER_DOUBLE:
2498 case UPB_HANDLER_BOOL:
2499 if (!upb_fielddef_isprimitive(f) ||
2500 upb_handlers_getprimitivehandlertype(f) != type)
2501 return false;
2502 *s = f->selector_base;
2503 break;
2504 case UPB_HANDLER_STRING:
2505 if (upb_fielddef_isstring(f)) {
2506 *s = f->selector_base;
2507 } else if (upb_fielddef_lazy(f)) {
2508 *s = f->selector_base + 3;
2509 } else {
2510 return false;
2511 }
2512 break;
2513 case UPB_HANDLER_STARTSTR:
2514 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2515 *s = f->selector_base + 1;
2516 } else {
2517 return false;
2518 }
2519 break;
2520 case UPB_HANDLER_ENDSTR:
2521 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2522 *s = f->selector_base + 2;
2523 } else {
2524 return false;
2525 }
2526 break;
2527 case UPB_HANDLER_STARTSEQ:
2528 if (!upb_fielddef_isseq(f)) return false;
2529 *s = f->selector_base - 2;
2530 break;
2531 case UPB_HANDLER_ENDSEQ:
2532 if (!upb_fielddef_isseq(f)) return false;
2533 *s = f->selector_base - 1;
2534 break;
2535 case UPB_HANDLER_STARTSUBMSG:
2536 if (!upb_fielddef_issubmsg(f)) return false;
2537 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2538 * selector can also be used as an index into the "sub" array of
2539 * subhandlers. The indexes for the two into these two tables are the
2540 * same, except that in the handler table the static selectors come first. */
2541 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2542 break;
2543 case UPB_HANDLER_ENDSUBMSG:
2544 if (!upb_fielddef_issubmsg(f)) return false;
2545 *s = f->selector_base;
2546 break;
2547 }
2548 assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2549 return true;
2550 }
2551
2552 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2553 return upb_fielddef_isseq(f) ? 2 : 0;
2554 }
2555
2556 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2557 uint32_t ret = 1;
2558 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
2559 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2560 if (upb_fielddef_issubmsg(f)) {
2561 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2562 ret += 0;
2563 if (upb_fielddef_lazy(f)) {
2564 /* STARTSTR/ENDSTR/STRING (for lazy) */
2565 ret += 3;
2566 }
2567 }
2568 return ret;
2569 }
2570
2571
2572 /* upb_handlerattr ************************************************************/
2573
2574 void upb_handlerattr_init(upb_handlerattr *attr) {
2575 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2576 memcpy(attr, &from, sizeof(*attr));
2577 }
2578
2579 void upb_handlerattr_uninit(upb_handlerattr *attr) {
2580 UPB_UNUSED(attr);
2581 }
2582
2583 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2584 attr->handler_data_ = hd;
2585 return true;
2586 }
2587
2588 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2589 attr->closure_type_ = type;
2590 return true;
2591 }
2592
2593 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2594 return attr->closure_type_;
2595 }
2596
2597 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2598 const void *type) {
2599 attr->return_closure_type_ = type;
2600 return true;
2601 }
2602
2603 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2604 return attr->return_closure_type_;
2605 }
2606
2607 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2608 attr->alwaysok_ = alwaysok;
2609 return true;
2610 }
2611
2612 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2613 return attr->alwaysok_;
2614 }
2615
2616 /* upb_bufhandle **************************************************************/
2617
2618 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2619 return h->objofs_;
2620 }
2621
2622 /* upb_byteshandler ***********************************************************/
2623
2624 void upb_byteshandler_init(upb_byteshandler* h) {
2625 memset(h, 0, sizeof(*h));
2626 }
2627
2628 /* For when we support handlerfree callbacks. */
2629 void upb_byteshandler_uninit(upb_byteshandler* h) {
2630 UPB_UNUSED(h);
2631 }
2632
2633 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2634 upb_startstr_handlerfunc *func, void *d) {
2635 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2636 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2637 return true;
2638 }
2639
2640 bool upb_byteshandler_setstring(upb_byteshandler *h,
2641 upb_string_handlerfunc *func, void *d) {
2642 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2643 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2644 return true;
2645 }
2646
2647 bool upb_byteshandler_setendstr(upb_byteshandler *h,
2648 upb_endfield_handlerfunc *func, void *d) {
2649 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2650 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2651 return true;
2652 }
2653 /*
2654 ** upb::RefCounted Implementation
2655 **
2656 ** Our key invariants are:
2657 ** 1. reference cycles never span groups
2658 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2659 **
2660 ** The previous two are how we avoid leaking cycles. Other important
2661 ** invariants are:
2662 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2663 ** this implies group(from) == group(to). (In practice, what we implement
2664 ** is even stronger; "from" and "to" will share a group if there has *ever*
2665 ** been a ref2(to, from), but all that is necessary for correctness is the
2666 ** weaker one).
2667 ** 4. mutable and immutable objects are never in the same group.
2668 */
2669
2670
2671 #include <setjmp.h>
2672 #include <stdlib.h>
2673
2674 static void freeobj(upb_refcounted *o);
2675
2676 const char untracked_val;
2677 const void *UPB_UNTRACKED_REF = &untracked_val;
2678
2679 /* arch-specific atomic primitives *******************************************/
2680
2681 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2682
2683 static void atomic_inc(uint32_t *a) { (*a)++; }
2684 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2685
2686 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2687
2688 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
2689 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2690
2691 #elif defined(WIN32) /*-------------------------------------------------------*/
2692
2693 #include <Windows.h>
2694
2695 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
2696 static bool atomic_dec(upb_atomic_t *a) {
2697 return InterlockedDecrement(&a->val) == 0;
2698 }
2699
2700 #else
2701 #error Atomic primitives not defined for your platform/CPU. \
2702 Implement them or compile with UPB_THREAD_UNSAFE.
2703 #endif
2704
2705 /* All static objects point to this refcount.
2706 * It is special-cased in ref/unref below. */
2707 uint32_t static_refcount = -1;
2708
2709 /* We can avoid atomic ops for statically-declared objects.
2710 * This is a minor optimization but nice since we can avoid degrading under
2711 * contention in this case. */
2712
2713 static void refgroup(uint32_t *group) {
2714 if (group != &static_refcount)
2715 atomic_inc(group);
2716 }
2717
2718 static bool unrefgroup(uint32_t *group) {
2719 if (group == &static_refcount) {
2720 return false;
2721 } else {
2722 return atomic_dec(group);
2723 }
2724 }
2725
2726
2727 /* Reference tracking (debug only) ********************************************/
2728
2729 #ifdef UPB_DEBUG_REFS
2730
2731 #ifdef UPB_THREAD_UNSAFE
2732
2733 static void upb_lock() {}
2734 static void upb_unlock() {}
2735
2736 #else
2737
2738 /* User must define functions that lock/unlock a global mutex and link this
2739 * file against them. */
2740 void upb_lock();
2741 void upb_unlock();
2742
2743 #endif
2744
2745 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2746 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
2747 * we have no way to propagage out-of-memory errors back to the user, and since
2748 * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
2749 #define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
2750
2751 typedef struct {
2752 int count; /* How many refs there are (duplicates only allowed for ref2). */
2753 bool is_ref2;
2754 } trackedref;
2755
2756 static trackedref *trackedref_new(bool is_ref2) {
2757 trackedref *ret = malloc(sizeof(*ret));
2758 CHECK_OOM(ret);
2759 ret->count = 1;
2760 ret->is_ref2 = is_ref2;
2761 return ret;
2762 }
2763
2764 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2765 upb_value v;
2766
2767 assert(owner);
2768 if (owner == UPB_UNTRACKED_REF) return;
2769
2770 upb_lock();
2771 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2772 trackedref *ref = upb_value_getptr(v);
2773 /* Since we allow multiple ref2's for the same to/from pair without
2774 * allocating separate memory for each one, we lose the fine-grained
2775 * tracking behavior we get with regular refs. Since ref2s only happen
2776 * inside upb, we'll accept this limitation until/unless there is a really
2777 * difficult upb-internal bug that can't be figured out without it. */
2778 assert(ref2);
2779 assert(ref->is_ref2);
2780 ref->count++;
2781 } else {
2782 trackedref *ref = trackedref_new(ref2);
2783 bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
2784 CHECK_OOM(ok);
2785 if (ref2) {
2786 /* We know this cast is safe when it is a ref2, because it's coming from
2787 * another refcounted object. */
2788 const upb_refcounted *from = owner;
2789 assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2790 ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
2791 CHECK_OOM(ok);
2792 }
2793 }
2794 upb_unlock();
2795 }
2796
2797 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2798 upb_value v;
2799 bool found;
2800 trackedref *ref;
2801
2802 assert(owner);
2803 if (owner == UPB_UNTRACKED_REF) return;
2804
2805 upb_lock();
2806 found = upb_inttable_lookupptr(r->refs, owner, &v);
2807 /* This assert will fail if an owner attempts to release a ref it didn't have. */
2808 UPB_ASSERT_VAR(found, found);
2809 ref = upb_value_getptr(v);
2810 assert(ref->is_ref2 == ref2);
2811 if (--ref->count == 0) {
2812 free(ref);
2813 upb_inttable_removeptr(r->refs, owner, NULL);
2814 if (ref2) {
2815 /* We know this cast is safe when it is a ref2, because it's coming from
2816 * another refcounted object. */
2817 const upb_refcounted *from = owner;
2818 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2819 assert(removed);
2820 }
2821 }
2822 upb_unlock();
2823 }
2824
2825 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2826 upb_value v;
2827 bool found;
2828 trackedref *ref;
2829
2830 upb_lock();
2831 found = upb_inttable_lookupptr(r->refs, owner, &v);
2832 UPB_ASSERT_VAR(found, found);
2833 ref = upb_value_getptr(v);
2834 assert(ref->is_ref2 == ref2);
2835 upb_unlock();
2836 }
2837
2838 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
2839 * originate from the given owner. */
2840 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
2841 upb_inttable_iter i;
2842
2843 upb_lock();
2844 upb_inttable_begin(&i, owner->ref2s);
2845 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2846 upb_value v;
2847 upb_value count;
2848 trackedref *ref;
2849 bool ok;
2850 bool found;
2851
2852 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
2853
2854 /* To get the count we need to look in the target's table. */
2855 found = upb_inttable_lookupptr(to->refs, owner, &v);
2856 assert(found);
2857 ref = upb_value_getptr(v);
2858 count = upb_value_int32(ref->count);
2859
2860 ok = upb_inttable_insertptr(tab, to, count);
2861 CHECK_OOM(ok);
2862 }
2863 upb_unlock();
2864 }
2865
2866 typedef struct {
2867 upb_inttable ref2;
2868 const upb_refcounted *obj;
2869 } check_state;
2870
2871 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
2872 void *closure) {
2873 check_state *s = closure;
2874 upb_inttable *ref2 = &s->ref2;
2875 upb_value v;
2876 bool removed;
2877 int32_t newcount;
2878
2879 assert(obj == s->obj);
2880 assert(subobj);
2881 removed = upb_inttable_removeptr(ref2, subobj, &v);
2882 /* The following assertion will fail if the visit() function visits a subobj
2883 * that it did not have a ref2 on, or visits the same subobj too many times. * /
2884 assert(removed);
2885 newcount = upb_value_getint32(v) - 1;
2886 if (newcount > 0) {
2887 upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
2888 }
2889 }
2890
2891 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2892 void *closure) {
2893 bool ok;
2894
2895 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
2896 * exactly the set of nodes that visit() should visit. So we verify visit()'s
2897 * correctness here. */
2898 check_state state;
2899 state.obj = r;
2900 ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
2901 CHECK_OOM(ok);
2902 getref2s(r, &state.ref2);
2903
2904 /* This should visit any children in the ref2 table. */
2905 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
2906
2907 /* This assertion will fail if the visit() function missed any children. */
2908 assert(upb_inttable_count(&state.ref2) == 0);
2909 upb_inttable_uninit(&state.ref2);
2910 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2911 }
2912
2913 static bool trackinit(upb_refcounted *r) {
2914 r->refs = malloc(sizeof(*r->refs));
2915 r->ref2s = malloc(sizeof(*r->ref2s));
2916 if (!r->refs || !r->ref2s) goto err1;
2917
2918 if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
2919 if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
2920 return true;
2921
2922 err2:
2923 upb_inttable_uninit(r->refs);
2924 err1:
2925 free(r->refs);
2926 free(r->ref2s);
2927 return false;
2928 }
2929
2930 static void trackfree(const upb_refcounted *r) {
2931 upb_inttable_uninit(r->refs);
2932 upb_inttable_uninit(r->ref2s);
2933 free(r->refs);
2934 free(r->ref2s);
2935 }
2936
2937 #else
2938
2939 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2940 UPB_UNUSED(r);
2941 UPB_UNUSED(owner);
2942 UPB_UNUSED(ref2);
2943 }
2944
2945 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2946 UPB_UNUSED(r);
2947 UPB_UNUSED(owner);
2948 UPB_UNUSED(ref2);
2949 }
2950
2951 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2952 UPB_UNUSED(r);
2953 UPB_UNUSED(owner);
2954 UPB_UNUSED(ref2);
2955 }
2956
2957 static bool trackinit(upb_refcounted *r) {
2958 UPB_UNUSED(r);
2959 return true;
2960 }
2961
2962 static void trackfree(const upb_refcounted *r) {
2963 UPB_UNUSED(r);
2964 }
2965
2966 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2967 void *closure) {
2968 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2969 }
2970
2971 #endif /* UPB_DEBUG_REFS */
2972
2973
2974 /* freeze() *******************************************************************/
2975
2976 /* The freeze() operation is by far the most complicated part of this scheme.
2977 * We compute strongly-connected components and then mutate the graph such that
2978 * we preserve the invariants documented at the top of this file. And we must
2979 * handle out-of-memory errors gracefully (without leaving the graph
2980 * inconsistent), which adds to the fun. */
2981
2982 /* The state used by the freeze operation (shared across many functions). */
2983 typedef struct {
2984 int depth;
2985 int maxdepth;
2986 uint64_t index;
2987 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
2988 * color. */
2989 upb_inttable objattr;
2990 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
2991 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
2992 upb_status *status;
2993 jmp_buf err;
2994 } tarjan;
2995
2996 static void release_ref2(const upb_refcounted *obj,
2997 const upb_refcounted *subobj,
2998 void *closure);
2999
3000 /* Node attributes -----------------------------------------------------------*/
3001
3002 /* After our analysis phase all nodes will be either GRAY or WHITE. */
3003
3004 typedef enum {
3005 BLACK = 0, /* Object has not been seen. */
3006 GRAY, /* Object has been found via a refgroup but may not be reachable. */
3007 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
3008 WHITE /* Object is reachable and has been assigned a group (SCC). */
3009 } color_t;
3010
3011 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
3012 UPB_NORETURN static void oom(tarjan *t) {
3013 upb_status_seterrmsg(t->status, "out of memory");
3014 err(t);
3015 }
3016
3017 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3018 upb_value v;
3019 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3020 upb_value_getuint64(v) : 0;
3021 }
3022
3023 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3024 upb_value v;
3025 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3026 UPB_ASSERT_VAR(found, found);
3027 return upb_value_getuint64(v);
3028 }
3029
3030 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3031 upb_inttable_removeptr(&t->objattr, r, NULL);
3032 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3033 }
3034
3035 static color_t color(tarjan *t, const upb_refcounted *r) {
3036 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. * /
3037 }
3038
3039 static void set_gray(tarjan *t, const upb_refcounted *r) {
3040 assert(color(t, r) == BLACK);
3041 setattr(t, r, GRAY);
3042 }
3043
3044 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
3045 static void push(tarjan *t, const upb_refcounted *r) {
3046 assert(color(t, r) == BLACK || color(t, r) == GRAY);
3047 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
3048 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3049 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3050 if (++t->index == 0x80000000) {
3051 upb_status_seterrmsg(t->status, "too many objects to freeze");
3052 err(t);
3053 }
3054 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3055 }
3056
3057 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3058 * SCC group. */
3059 static upb_refcounted *pop(tarjan *t) {
3060 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3061 assert(color(t, r) == GREEN);
3062 /* This defines the attr layout for nodes in the WHITE state.
3063 * Top of group stack is [group, NULL]; we point at group. */
3064 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3065 return r;
3066 }
3067
3068 static void tarjan_newgroup(tarjan *t) {
3069 uint32_t *group = malloc(sizeof(*group));
3070 if (!group) oom(t);
3071 /* Push group and empty group leader (we'll fill in leader later). */
3072 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3073 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3074 free(group);
3075 oom(t);
3076 }
3077 *group = 0;
3078 }
3079
3080 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3081 assert(color(t, r) == GREEN);
3082 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3083 }
3084
3085 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3086 if (color(t, r) == GREEN) {
3087 return getattr(t, r) >> 33;
3088 } else {
3089 return UINT32_MAX;
3090 }
3091 }
3092
3093 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3094 assert(color(t, r) == GREEN);
3095 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3096 }
3097
3098 static uint32_t *group(tarjan *t, upb_refcounted *r) {
3099 uint64_t groupnum;
3100 upb_value v;
3101 bool found;
3102
3103 assert(color(t, r) == WHITE);
3104 groupnum = getattr(t, r) >> 8;
3105 found = upb_inttable_lookup(&t->groups, groupnum, &v);
3106 UPB_ASSERT_VAR(found, found);
3107 return upb_value_getptr(v);
3108 }
3109
3110 /* If the group leader for this object's group has not previously been set,
3111 * the given object is assigned to be its leader. */
3112 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3113 uint64_t leader_slot;
3114 upb_value v;
3115 bool found;
3116
3117 assert(color(t, r) == WHITE);
3118 leader_slot = (getattr(t, r) >> 8) + 1;
3119 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3120 UPB_ASSERT_VAR(found, found);
3121 if (upb_value_getptr(v)) {
3122 return upb_value_getptr(v);
3123 } else {
3124 upb_inttable_remove(&t->groups, leader_slot, NULL);
3125 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3126 return r;
3127 }
3128 }
3129
3130
3131 /* Tarjan's algorithm --------------------------------------------------------*/
3132
3133 /* See:
3134 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algor ithm */
3135 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3136
3137 static void tarjan_visit(const upb_refcounted *obj,
3138 const upb_refcounted *subobj,
3139 void *closure) {
3140 tarjan *t = closure;
3141 if (++t->depth > t->maxdepth) {
3142 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3143 err(t);
3144 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3145 /* Do nothing: we don't want to visit or color already-frozen nodes,
3146 * and WHITE nodes have already been assigned a SCC. */
3147 } else if (color(t, subobj) < GREEN) {
3148 /* Subdef has not yet been visited; recurse on it. */
3149 do_tarjan(subobj, t);
3150 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3151 } else if (color(t, subobj) == GREEN) {
3152 /* Subdef is in the stack and hence in the current SCC. */
3153 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3154 }
3155 --t->depth;
3156 }
3157
3158 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3159 if (color(t, obj) == BLACK) {
3160 /* We haven't seen this object's group; mark the whole group GRAY. */
3161 const upb_refcounted *o = obj;
3162 do { set_gray(t, o); } while ((o = o->next) != obj);
3163 }
3164
3165 push(t, obj);
3166 visit(obj, tarjan_visit, t);
3167 if (lowlink(t, obj) == idx(t, obj)) {
3168 tarjan_newgroup(t);
3169 while (pop(t) != obj)
3170 ;
3171 }
3172 }
3173
3174
3175 /* freeze() ------------------------------------------------------------------*/
3176
3177 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3178 void *_t) {
3179 tarjan *t = _t;
3180 assert(color(t, r) > BLACK);
3181 if (color(t, subobj) > BLACK && r->group != subobj->group) {
3182 /* Previously this ref was not reflected in subobj->group because they
3183 * were in the same group; now that they are split a ref must be taken. */
3184 refgroup(subobj->group);
3185 }
3186 }
3187
3188 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3189 int maxdepth) {
3190 volatile bool ret = false;
3191 int i;
3192 upb_inttable_iter iter;
3193
3194 /* We run in two passes so that we can allocate all memory before performing
3195 * any mutation of the input -- this allows us to leave the input unchanged
3196 * in the case of memory allocation failure. */
3197 tarjan t;
3198 t.index = 0;
3199 t.depth = 0;
3200 t.maxdepth = maxdepth;
3201 t.status = s;
3202 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3203 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3204 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3205 if (setjmp(t.err) != 0) goto err4;
3206
3207
3208 for (i = 0; i < n; i++) {
3209 if (color(&t, roots[i]) < GREEN) {
3210 do_tarjan(roots[i], &t);
3211 }
3212 }
3213
3214 /* If we've made it this far, no further errors are possible so it's safe to
3215 * mutate the objects without risk of leaving them in an inconsistent state. * /
3216 ret = true;
3217
3218 /* The transformation that follows requires care. The preconditions are:
3219 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3220 * (groups of all mutable objs)
3221 * - no ref2(to, from) refs have incremented count(to) if both "to" and
3222 * "from" are in our attr map (this follows from invariants (2) and (3)) */
3223
3224 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3225 * new groups according to the SCC's we computed. These new groups will
3226 * consist of only frozen objects. None will be immediately collectible,
3227 * because WHITE objects are by definition reachable from one of "roots",
3228 * which the caller must own refs on. */
3229 upb_inttable_begin(&iter, &t.objattr);
3230 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3231 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3232 /* Since removal from a singly-linked list requires access to the object's
3233 * predecessor, we consider obj->next instead of obj for moving. With the
3234 * while() loop we guarantee that we will visit every node's predecessor.
3235 * Proof:
3236 * 1. every node's predecessor is in our attr map.
3237 * 2. though the loop body may change a node's predecessor, it will only
3238 * change it to be the node we are currently operating on, so with a
3239 * while() loop we guarantee ourselves the chance to remove each node. * /
3240 while (color(&t, obj->next) == WHITE &&
3241 group(&t, obj->next) != obj->next->group) {
3242 upb_refcounted *leader;
3243
3244 /* Remove from old group. */
3245 upb_refcounted *move = obj->next;
3246 if (obj == move) {
3247 /* Removing the last object from a group. */
3248 assert(*obj->group == obj->individual_count);
3249 free(obj->group);
3250 } else {
3251 obj->next = move->next;
3252 /* This may decrease to zero; we'll collect GRAY objects (if any) that
3253 * remain in the group in the third pass. */
3254 assert(*move->group >= move->individual_count);
3255 *move->group -= move->individual_count;
3256 }
3257
3258 /* Add to new group. */
3259 leader = groupleader(&t, move);
3260 if (move == leader) {
3261 /* First object added to new group is its leader. */
3262 move->group = group(&t, move);
3263 move->next = move;
3264 *move->group = move->individual_count;
3265 } else {
3266 /* Group already has at least one object in it. */
3267 assert(leader->group == group(&t, move));
3268 move->group = group(&t, move);
3269 move->next = leader->next;
3270 leader->next = move;
3271 *move->group += move->individual_count;
3272 }
3273
3274 move->is_frozen = true;
3275 }
3276 }
3277
3278 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3279 * increment count(to) if group(obj) != group(to) (which could now be the
3280 * case if "to" was just frozen). */
3281 upb_inttable_begin(&iter, &t.objattr);
3282 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3283 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3284 visit(obj, crossref, &t);
3285 }
3286
3287 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3288 * zero when we removed its white nodes. This can happen if they had only
3289 * been kept alive by virtue of sharing a group with an object that was just
3290 * frozen.
3291 *
3292 * It is important that we do this last, since the GRAY object's free()
3293 * function could call unref2() on just-frozen objects, which will decrement
3294 * refs that were added in pass 2. */
3295 upb_inttable_begin(&iter, &t.objattr);
3296 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3297 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3298 if (obj->group == NULL || *obj->group == 0) {
3299 if (obj->group) {
3300 upb_refcounted *o;
3301
3302 /* We eagerly free() the group's count (since we can't easily determine
3303 * the group's remaining size it's the easiest way to ensure it gets
3304 * done). */
3305 free(obj->group);
3306
3307 /* Visit to release ref2's (done in a separate pass since release_ref2
3308 * depends on o->group being unmodified so it can test merged()). */
3309 o = obj;
3310 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3311
3312 /* Mark "group" fields as NULL so we know to free the objects later in
3313 * this loop, but also don't try to delete the group twice. */
3314 o = obj;
3315 do { o->group = NULL; } while ((o = o->next) != obj);
3316 }
3317 freeobj(obj);
3318 }
3319 }
3320
3321 err4:
3322 if (!ret) {
3323 upb_inttable_begin(&iter, &t.groups);
3324 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3325 free(upb_value_getptr(upb_inttable_iter_value(&iter)));
3326 }
3327 upb_inttable_uninit(&t.groups);
3328 err3:
3329 upb_inttable_uninit(&t.stack);
3330 err2:
3331 upb_inttable_uninit(&t.objattr);
3332 err1:
3333 return ret;
3334 }
3335
3336
3337 /* Misc internal functions ***************************************************/
3338
3339 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3340 return r->group == r2->group;
3341 }
3342
3343 static void merge(upb_refcounted *r, upb_refcounted *from) {
3344 upb_refcounted *base;
3345 upb_refcounted *tmp;
3346
3347 if (merged(r, from)) return;
3348 *r->group += *from->group;
3349 free(from->group);
3350 base = from;
3351
3352 /* Set all refcount pointers in the "from" chain to the merged refcount.
3353 *
3354 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3355 * if the user continuously extends a group by one object. Prevent this by
3356 * using one of the techniques in this paper:
3357 * ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3358 do { from->group = r->group; } while ((from = from->next) != base);
3359
3360 /* Merge the two circularly linked lists by swapping their next pointers. */
3361 tmp = r->next;
3362 r->next = base->next;
3363 base->next = tmp;
3364 }
3365
3366 static void unref(const upb_refcounted *r);
3367
3368 static void release_ref2(const upb_refcounted *obj,
3369 const upb_refcounted *subobj,
3370 void *closure) {
3371 UPB_UNUSED(closure);
3372 untrack(subobj, obj, true);
3373 if (!merged(obj, subobj)) {
3374 assert(subobj->is_frozen);
3375 unref(subobj);
3376 }
3377 }
3378
3379 static void unref(const upb_refcounted *r) {
3380 if (unrefgroup(r->group)) {
3381 const upb_refcounted *o;
3382
3383 free(r->group);
3384
3385 /* In two passes, since release_ref2 needs a guarantee that any subobjs
3386 * are alive. */
3387 o = r;
3388 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3389
3390 o = r;
3391 do {
3392 const upb_refcounted *next = o->next;
3393 assert(o->is_frozen || o->individual_count == 0);
3394 freeobj((upb_refcounted*)o);
3395 o = next;
3396 } while(o != r);
3397 }
3398 }
3399
3400 static void freeobj(upb_refcounted *o) {
3401 trackfree(o);
3402 o->vtbl->free((upb_refcounted*)o);
3403 }
3404
3405
3406 /* Public interface ***********************************************************/
3407
3408 bool upb_refcounted_init(upb_refcounted *r,
3409 const struct upb_refcounted_vtbl *vtbl,
3410 const void *owner) {
3411 #ifndef NDEBUG
3412 /* Endianness check. This is unrelated to upb_refcounted, it's just a
3413 * convenient place to put the check that we can be assured will run for
3414 * basically every program using upb. */
3415 const int x = 1;
3416 #ifdef UPB_BIG_ENDIAN
3417 assert(*(char*)&x != 1);
3418 #else
3419 assert(*(char*)&x == 1);
3420 #endif
3421 #endif
3422
3423 r->next = r;
3424 r->vtbl = vtbl;
3425 r->individual_count = 0;
3426 r->is_frozen = false;
3427 r->group = malloc(sizeof(*r->group));
3428 if (!r->group) return false;
3429 *r->group = 0;
3430 if (!trackinit(r)) {
3431 free(r->group);
3432 return false;
3433 }
3434 upb_refcounted_ref(r, owner);
3435 return true;
3436 }
3437
3438 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3439 return r->is_frozen;
3440 }
3441
3442 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3443 track(r, owner, false);
3444 if (!r->is_frozen)
3445 ((upb_refcounted*)r)->individual_count++;
3446 refgroup(r->group);
3447 }
3448
3449 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3450 untrack(r, owner, false);
3451 if (!r->is_frozen)
3452 ((upb_refcounted*)r)->individual_count--;
3453 unref(r);
3454 }
3455
3456 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3457 assert(!from->is_frozen); /* Non-const pointer implies this. */
3458 track(r, from, true);
3459 if (r->is_frozen) {
3460 refgroup(r->group);
3461 } else {
3462 merge((upb_refcounted*)r, from);
3463 }
3464 }
3465
3466 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3467 assert(!from->is_frozen); /* Non-const pointer implies this. */
3468 untrack(r, from, true);
3469 if (r->is_frozen) {
3470 unref(r);
3471 } else {
3472 assert(merged(r, from));
3473 }
3474 }
3475
3476 void upb_refcounted_donateref(
3477 const upb_refcounted *r, const void *from, const void *to) {
3478 assert(from != to);
3479 if (to != NULL)
3480 upb_refcounted_ref(r, to);
3481 if (from != NULL)
3482 upb_refcounted_unref(r, from);
3483 }
3484
3485 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3486 checkref(r, owner, false);
3487 }
3488
3489 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3490 int maxdepth) {
3491 int i;
3492 for (i = 0; i < n; i++) {
3493 assert(!roots[i]->is_frozen);
3494 }
3495 return freeze(roots, n, s, maxdepth);
3496 }
3497
3498
3499 #include <stdlib.h>
3500
3501 /* Fallback implementation if the shim is not specialized by the JIT. */
3502 #define SHIM_WRITER(type, ctype) \
3503 bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \
3504 uint8_t *m = c; \
3505 const upb_shim_data *d = hd; \
3506 if (d->hasbit > 0) \
3507 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
3508 *(ctype*)&m[d->offset] = val; \
3509 return true; \
3510 } \
3511
3512 SHIM_WRITER(double, double)
3513 SHIM_WRITER(float, float)
3514 SHIM_WRITER(int32, int32_t)
3515 SHIM_WRITER(int64, int64_t)
3516 SHIM_WRITER(uint32, uint32_t)
3517 SHIM_WRITER(uint64, uint64_t)
3518 SHIM_WRITER(bool, bool)
3519 #undef SHIM_WRITER
3520
3521 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3522 int32_t hasbit) {
3523 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3524 bool ok;
3525
3526 upb_shim_data *d = malloc(sizeof(*d));
3527 if (!d) return false;
3528 d->offset = offset;
3529 d->hasbit = hasbit;
3530
3531 upb_handlerattr_sethandlerdata(&attr, d);
3532 upb_handlerattr_setalwaysok(&attr, true);
3533 upb_handlers_addcleanup(h, d, free);
3534
3535 #define TYPE(u, l) \
3536 case UPB_TYPE_##u: \
3537 ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3538
3539 ok = false;
3540
3541 switch (upb_fielddef_type(f)) {
3542 TYPE(INT64, int64);
3543 TYPE(INT32, int32);
3544 TYPE(ENUM, int32);
3545 TYPE(UINT64, uint64);
3546 TYPE(UINT32, uint32);
3547 TYPE(DOUBLE, double);
3548 TYPE(FLOAT, float);
3549 TYPE(BOOL, bool);
3550 default: assert(false); break;
3551 }
3552 #undef TYPE
3553
3554 upb_handlerattr_uninit(&attr);
3555 return ok;
3556 }
3557
3558 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3559 upb_fieldtype_t *type) {
3560 upb_func *f = upb_handlers_gethandler(h, s);
3561
3562 if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3563 *type = UPB_TYPE_INT64;
3564 } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3565 *type = UPB_TYPE_INT32;
3566 } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3567 *type = UPB_TYPE_UINT64;
3568 } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3569 *type = UPB_TYPE_UINT32;
3570 } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3571 *type = UPB_TYPE_DOUBLE;
3572 } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3573 *type = UPB_TYPE_FLOAT;
3574 } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3575 *type = UPB_TYPE_BOOL;
3576 } else {
3577 return NULL;
3578 }
3579
3580 return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3581 }
3582
3583
3584 #include <stdlib.h>
3585 #include <string.h>
3586
3587 static void upb_symtab_free(upb_refcounted *r) {
3588 upb_symtab *s = (upb_symtab*)r;
3589 upb_strtable_iter i;
3590 upb_strtable_begin(&i, &s->symtab);
3591 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3592 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3593 upb_def_unref(def, s);
3594 }
3595 upb_strtable_uninit(&s->symtab);
3596 free(s);
3597 }
3598
3599
3600 upb_symtab *upb_symtab_new(const void *owner) {
3601 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3602 upb_symtab *s = malloc(sizeof(*s));
3603 upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3604 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3605 return s;
3606 }
3607
3608 void upb_symtab_freeze(upb_symtab *s) {
3609 upb_refcounted *r;
3610 bool ok;
3611
3612 assert(!upb_symtab_isfrozen(s));
3613 r = upb_symtab_upcast_mutable(s);
3614 /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3615 * defs cannot refer back to the table and therefore cannot create cycles. So
3616 * 0 will suffice for maxdepth here. */
3617 ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3618 UPB_ASSERT_VAR(ok, ok);
3619 }
3620
3621 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3622 upb_value v;
3623 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3624 upb_value_getptr(v) : NULL;
3625 return ret;
3626 }
3627
3628 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3629 upb_value v;
3630 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3631 upb_value_getptr(v) : NULL;
3632 return def ? upb_dyncast_msgdef(def) : NULL;
3633 }
3634
3635 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3636 upb_value v;
3637 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3638 upb_value_getptr(v) : NULL;
3639 return def ? upb_dyncast_enumdef(def) : NULL;
3640 }
3641
3642 /* Given a symbol and the base symbol inside which it is defined, find the
3643 * symbol's definition in t. */
3644 static upb_def *upb_resolvename(const upb_strtable *t,
3645 const char *base, const char *sym) {
3646 if(strlen(sym) == 0) return NULL;
3647 if(sym[0] == '.') {
3648 /* Symbols starting with '.' are absolute, so we do a single lookup.
3649 * Slice to omit the leading '.' */
3650 upb_value v;
3651 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3652 } else {
3653 /* Remove components from base until we find an entry or run out.
3654 * TODO: This branch is totally broken, but currently not used. */
3655 (void)base;
3656 assert(false);
3657 return NULL;
3658 }
3659 }
3660
3661 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3662 const char *sym) {
3663 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3664 return ret;
3665 }
3666
3667 /* Starts a depth-first traversal at "def", recursing into any subdefs
3668 * (ie. submessage types). Adds duplicates of existing defs to addtab
3669 * wherever necessary, so that the resulting symtab will be consistent once
3670 * addtab is added.
3671 *
3672 * More specifically, if any def D is found in the DFS that:
3673 *
3674 * 1. can reach a def that is being replaced by something in addtab, AND
3675 *
3676 * 2. is not itself being replaced already (ie. this name doesn't already
3677 * exist in addtab)
3678 *
3679 * ...then a duplicate (new copy) of D will be added to addtab.
3680 *
3681 * Returns true if this happened for any def reachable from "def."
3682 *
3683 * It is slightly tricky to do this correctly in the presence of cycles. If we
3684 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3685 * our stack can reach a def in addtab or not. Once we figure this out, that
3686 * answer needs to apply to *all* defs in these SCCs, even if we visited them
3687 * already. So a straight up one-pass cycle-detecting DFS won't work.
3688 *
3689 * To work around this problem, we traverse each SCC (which we already
3690 * computed, since these defs are frozen) as a single node. We first compute
3691 * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3692 * the entire SCC. This requires breaking the encapsulation of upb_refcounted,
3693 * since that is where we get the data about what SCC we are in. */
3694 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3695 const void *new_owner, upb_inttable *seen,
3696 upb_status *s) {
3697 upb_value v;
3698 bool need_dup;
3699 const upb_def *base;
3700 const void* memoize_key;
3701
3702 /* Memoize results of this function for efficiency (since we're traversing a
3703 * DAG this is not needed to limit the depth of the search).
3704 *
3705 * We memoize by SCC instead of by individual def. */
3706 memoize_key = def->base.group;
3707
3708 if (upb_inttable_lookupptr(seen, memoize_key, &v))
3709 return upb_value_getbool(v);
3710
3711 /* Visit submessages for all messages in the SCC. */
3712 need_dup = false;
3713 base = def;
3714 do {
3715 upb_value v;
3716 const upb_msgdef *m;
3717
3718 assert(upb_def_isfrozen(def));
3719 if (def->type == UPB_DEF_FIELD) continue;
3720 if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3721 need_dup = true;
3722 }
3723
3724 /* For messages, continue the recursion by visiting all subdefs, but only
3725 * ones in different SCCs. */
3726 m = upb_dyncast_msgdef(def);
3727 if (m) {
3728 upb_msg_field_iter i;
3729 for(upb_msg_field_begin(&i, m);
3730 !upb_msg_field_done(&i);
3731 upb_msg_field_next(&i)) {
3732 upb_fielddef *f = upb_msg_iter_field(&i);
3733 const upb_def *subdef;
3734
3735 if (!upb_fielddef_hassubdef(f)) continue;
3736 subdef = upb_fielddef_subdef(f);
3737
3738 /* Skip subdefs in this SCC. */
3739 if (def->base.group == subdef->base.group) continue;
3740
3741 /* |= to avoid short-circuit; we need its side-effects. */
3742 need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3743 if (!upb_ok(s)) return false;
3744 }
3745 }
3746 } while ((def = (upb_def*)def->base.next) != base);
3747
3748 if (need_dup) {
3749 /* Dup all defs in this SCC that don't already have entries in addtab. */
3750 def = base;
3751 do {
3752 const char *name;
3753
3754 if (def->type == UPB_DEF_FIELD) continue;
3755 name = upb_def_fullname(def);
3756 if (!upb_strtable_lookup(addtab, name, NULL)) {
3757 upb_def *newdef = upb_def_dup(def, new_owner);
3758 if (!newdef) goto oom;
3759 newdef->came_from_user = false;
3760 if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3761 goto oom;
3762 }
3763 } while ((def = (upb_def*)def->base.next) != base);
3764 }
3765
3766 upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3767 return need_dup;
3768
3769 oom:
3770 upb_status_seterrmsg(s, "out of memory");
3771 return false;
3772 }
3773
3774 /* TODO(haberman): we need a lot more testing of error conditions.
3775 * The came_from_user stuff in particular is not tested. */
3776 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
3777 upb_status *status) {
3778 int i;
3779 upb_strtable_iter iter;
3780 upb_def **add_defs = NULL;
3781 upb_strtable addtab;
3782 upb_inttable seen;
3783
3784 assert(!upb_symtab_isfrozen(s));
3785 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3786 upb_status_seterrmsg(status, "out of memory");
3787 return false;
3788 }
3789
3790 /* Add new defs to our "add" set. */
3791 for (i = 0; i < n; i++) {
3792 upb_def *def = defs[i];
3793 const char *fullname;
3794 upb_fielddef *f;
3795
3796 if (upb_def_isfrozen(def)) {
3797 upb_status_seterrmsg(status, "added defs must be mutable");
3798 goto err;
3799 }
3800 assert(!upb_def_isfrozen(def));
3801 fullname = upb_def_fullname(def);
3802 if (!fullname) {
3803 upb_status_seterrmsg(
3804 status, "Anonymous defs cannot be added to a symtab");
3805 goto err;
3806 }
3807
3808 f = upb_dyncast_fielddef_mutable(def);
3809
3810 if (f) {
3811 if (!upb_fielddef_containingtypename(f)) {
3812 upb_status_seterrmsg(status,
3813 "Standalone fielddefs must have a containing type "
3814 "(extendee) name set");
3815 goto err;
3816 }
3817 } else {
3818 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3819 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3820 goto err;
3821 }
3822 /* We need this to back out properly, because if there is a failure we
3823 * need to donate the ref back to the caller. */
3824 def->came_from_user = true;
3825 upb_def_donateref(def, ref_donor, s);
3826 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3827 goto oom_err;
3828 }
3829 }
3830
3831 /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
3832 * If the appropriate message only exists in the existing symtab, duplicate
3833 * it so we have a mutable copy we can add the fields to. */
3834 for (i = 0; i < n; i++) {
3835 upb_def *def = defs[i];
3836 upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
3837 const char *msgname;
3838 upb_value v;
3839 upb_msgdef *m;
3840
3841 if (!f) continue;
3842 msgname = upb_fielddef_containingtypename(f);
3843 /* We validated this earlier in this function. */
3844 assert(msgname);
3845
3846 /* If the extendee name is absolutely qualified, move past the initial ".".
3847 * TODO(haberman): it is not obvious what it would mean if this was not
3848 * absolutely qualified. */
3849 if (msgname[0] == '.') {
3850 msgname++;
3851 }
3852
3853 if (upb_strtable_lookup(&addtab, msgname, &v)) {
3854 /* Extendee is in the set of defs the user asked us to add. */
3855 m = upb_value_getptr(v);
3856 } else {
3857 /* Need to find and dup the extendee from the existing symtab. */
3858 const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
3859 if (!frozen_m) {
3860 upb_status_seterrf(status,
3861 "Tried to extend message %s that does not exist "
3862 "in this SymbolTable.",
3863 msgname);
3864 goto err;
3865 }
3866 m = upb_msgdef_dup(frozen_m, s);
3867 if (!m) goto oom_err;
3868 if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
3869 upb_msgdef_unref(m, s);
3870 goto oom_err;
3871 }
3872 }
3873
3874 if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
3875 goto err;
3876 }
3877 }
3878
3879 /* Add dups of any existing def that can reach a def with the same name as
3880 * anything in our "add" set. */
3881 if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
3882 upb_strtable_begin(&iter, &s->symtab);
3883 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3884 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3885 upb_resolve_dfs(def, &addtab, s, &seen, status);
3886 if (!upb_ok(status)) goto err;
3887 }
3888 upb_inttable_uninit(&seen);
3889
3890 /* Now using the table, resolve symbolic references for subdefs. */
3891 upb_strtable_begin(&iter, &addtab);
3892 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3893 const char *base;
3894 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3895 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3896 upb_msg_field_iter j;
3897
3898 if (!m) continue;
3899 /* Type names are resolved relative to the message in which they appear. */
3900 base = upb_msgdef_fullname(m);
3901
3902 for(upb_msg_field_begin(&j, m);
3903 !upb_msg_field_done(&j);
3904 upb_msg_field_next(&j)) {
3905 upb_fielddef *f = upb_msg_iter_field(&j);
3906 const char *name = upb_fielddef_subdefname(f);
3907 if (name && !upb_fielddef_subdef(f)) {
3908 /* Try the lookup in the current set of to-be-added defs first. If not
3909 * there, try existing defs. */
3910 upb_def *subdef = upb_resolvename(&addtab, base, name);
3911 if (subdef == NULL) {
3912 subdef = upb_resolvename(&s->symtab, base, name);
3913 }
3914 if (subdef == NULL) {
3915 upb_status_seterrf(
3916 status, "couldn't resolve name '%s' in message '%s'", name, base);
3917 goto err;
3918 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3919 goto err;
3920 }
3921 }
3922 }
3923 }
3924
3925 /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
3926 add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
3927 if (add_defs == NULL) goto oom_err;
3928 upb_strtable_begin(&iter, &addtab);
3929 for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3930 add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3931 }
3932
3933 if (!upb_def_freeze(add_defs, n, status)) goto err;
3934
3935 /* This must be delayed until all errors have been detected, since error
3936 * recovery code uses this table to cleanup defs. */
3937 upb_strtable_uninit(&addtab);
3938
3939 /* TODO(haberman) we don't properly handle errors after this point (like
3940 * OOM in upb_strtable_insert() below). */
3941 for (i = 0; i < n; i++) {
3942 upb_def *def = add_defs[i];
3943 const char *name = upb_def_fullname(def);
3944 upb_value v;
3945 bool success;
3946
3947 if (upb_strtable_remove(&s->symtab, name, &v)) {
3948 const upb_def *def = upb_value_getptr(v);
3949 upb_def_unref(def, s);
3950 }
3951 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3952 UPB_ASSERT_VAR(success, success == true);
3953 }
3954 free(add_defs);
3955 return true;
3956
3957 oom_err:
3958 upb_status_seterrmsg(status, "out of memory");
3959 err: {
3960 /* For defs the user passed in, we need to donate the refs back. For defs
3961 * we dup'd, we need to just unref them. */
3962 upb_strtable_begin(&iter, &addtab);
3963 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3964 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3965 bool came_from_user = def->came_from_user;
3966 def->came_from_user = false;
3967 if (came_from_user) {
3968 upb_def_donateref(def, s, ref_donor);
3969 } else {
3970 upb_def_unref(def, s);
3971 }
3972 }
3973 }
3974 upb_strtable_uninit(&addtab);
3975 free(add_defs);
3976 assert(!upb_ok(status));
3977 return false;
3978 }
3979
3980 /* Iteration. */
3981
3982 static void advance_to_matching(upb_symtab_iter *iter) {
3983 if (iter->type == UPB_DEF_ANY)
3984 return;
3985
3986 while (!upb_strtable_done(&iter->iter) &&
3987 iter->type != upb_symtab_iter_def(iter)->type) {
3988 upb_strtable_next(&iter->iter);
3989 }
3990 }
3991
3992 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3993 upb_deftype_t type) {
3994 upb_strtable_begin(&iter->iter, &s->symtab);
3995 iter->type = type;
3996 advance_to_matching(iter);
3997 }
3998
3999 void upb_symtab_next(upb_symtab_iter *iter) {
4000 upb_strtable_next(&iter->iter);
4001 advance_to_matching(iter);
4002 }
4003
4004 bool upb_symtab_done(const upb_symtab_iter *iter) {
4005 return upb_strtable_done(&iter->iter);
4006 }
4007
4008 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4009 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4010 }
4011 /*
4012 ** upb_table Implementation
4013 **
4014 ** Implementation is heavily inspired by Lua's ltable.c.
4015 */
4016
4017
4018 #include <stdlib.h>
4019 #include <string.h>
4020
4021 #define UPB_MAXARRSIZE 16 /* 64k. */
4022
4023 /* From Chromium. */
4024 #define ARRAY_SIZE(x) \
4025 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4026
4027 static const double MAX_LOAD = 0.85;
4028
4029 /* The minimum utilization of the array part of a mixed hash/array table. This
4030 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4031 * cache effects). The lower this is, the more memory we'll use. */
4032 static const double MIN_DENSITY = 0.1;
4033
4034 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4035
4036 int log2ceil(uint64_t v) {
4037 int ret = 0;
4038 bool pow2 = is_pow2(v);
4039 while (v >>= 1) ret++;
4040 ret = pow2 ? ret : ret + 1; /* Ceiling. */
4041 return UPB_MIN(UPB_MAXARRSIZE, ret);
4042 }
4043
4044 char *upb_strdup(const char *s) {
4045 return upb_strdup2(s, strlen(s));
4046 }
4047
4048 char *upb_strdup2(const char *s, size_t len) {
4049 size_t n;
4050 char *p;
4051
4052 /* Prevent overflow errors. */
4053 if (len == SIZE_MAX) return NULL;
4054 /* Always null-terminate, even if binary data; but don't rely on the input to
4055 * have a null-terminating byte since it may be a raw binary buffer. */
4056 n = len + 1;
4057 p = malloc(n);
4058 if (p) {
4059 memcpy(p, s, len);
4060 p[len] = 0;
4061 }
4062 return p;
4063 }
4064
4065 /* A type to represent the lookup key of either a strtable or an inttable. */
4066 typedef union {
4067 uintptr_t num;
4068 struct {
4069 const char *str;
4070 size_t len;
4071 } str;
4072 } lookupkey_t;
4073
4074 static lookupkey_t strkey2(const char *str, size_t len) {
4075 lookupkey_t k;
4076 k.str.str = str;
4077 k.str.len = len;
4078 return k;
4079 }
4080
4081 static lookupkey_t intkey(uintptr_t key) {
4082 lookupkey_t k;
4083 k.num = key;
4084 return k;
4085 }
4086
4087 typedef uint32_t hashfunc_t(upb_tabkey key);
4088 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4089
4090 /* Base table (shared code) ***************************************************/
4091
4092 /* For when we need to cast away const. */
4093 static upb_tabent *mutable_entries(upb_table *t) {
4094 return (upb_tabent*)t->entries;
4095 }
4096
4097 static bool isfull(upb_table *t) {
4098 return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
4099 }
4100
4101 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
4102 size_t bytes;
4103
4104 t->count = 0;
4105 t->ctype = ctype;
4106 t->size_lg2 = size_lg2;
4107 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4108 bytes = upb_table_size(t) * sizeof(upb_tabent);
4109 if (bytes > 0) {
4110 t->entries = malloc(bytes);
4111 if (!t->entries) return false;
4112 memset(mutable_entries(t), 0, bytes);
4113 } else {
4114 t->entries = NULL;
4115 }
4116 return true;
4117 }
4118
4119 static void uninit(upb_table *t) { free(mutable_entries(t)); }
4120
4121 static upb_tabent *emptyent(upb_table *t) {
4122 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4123 while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4124 }
4125
4126 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4127 return (upb_tabent*)upb_getentry(t, hash);
4128 }
4129
4130 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4131 uint32_t hash, eqlfunc_t *eql) {
4132 const upb_tabent *e;
4133
4134 if (t->size_lg2 == 0) return NULL;
4135 e = upb_getentry(t, hash);
4136 if (upb_tabent_isempty(e)) return NULL;
4137 while (1) {
4138 if (eql(e->key, key)) return e;
4139 if ((e = e->next) == NULL) return NULL;
4140 }
4141 }
4142
4143 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4144 uint32_t hash, eqlfunc_t *eql) {
4145 return (upb_tabent*)findentry(t, key, hash, eql);
4146 }
4147
4148 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4149 uint32_t hash, eqlfunc_t *eql) {
4150 const upb_tabent *e = findentry(t, key, hash, eql);
4151 if (e) {
4152 if (v) {
4153 _upb_value_setval(v, e->val.val, t->ctype);
4154 }
4155 return true;
4156 } else {
4157 return false;
4158 }
4159 }
4160
4161 /* The given key must not already exist in the table. */
4162 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4163 upb_value val, uint32_t hash,
4164 hashfunc_t *hashfunc, eqlfunc_t *eql) {
4165 upb_tabent *mainpos_e;
4166 upb_tabent *our_e;
4167
4168 UPB_UNUSED(eql);
4169 UPB_UNUSED(key);
4170 assert(findentry(t, key, hash, eql) == NULL);
4171 assert(val.ctype == t->ctype);
4172
4173 t->count++;
4174 mainpos_e = getentry_mutable(t, hash);
4175 our_e = mainpos_e;
4176
4177 if (upb_tabent_isempty(mainpos_e)) {
4178 /* Our main position is empty; use it. */
4179 our_e->next = NULL;
4180 } else {
4181 /* Collision. */
4182 upb_tabent *new_e = emptyent(t);
4183 /* Head of collider's chain. */
4184 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4185 if (chain == mainpos_e) {
4186 /* Existing ent is in its main posisiton (it has the same hash as us, and
4187 * is the head of our chain). Insert to new ent and append to this chain. */
4188 new_e->next = mainpos_e->next;
4189 mainpos_e->next = new_e;
4190 our_e = new_e;
4191 } else {
4192 /* Existing ent is not in its main position (it is a node in some other
4193 * chain). This implies that no existing ent in the table has our hash.
4194 * Evict it (updating its chain) and use its ent for head of our chain. */
4195 *new_e = *mainpos_e; /* copies next. */
4196 while (chain->next != mainpos_e) {
4197 chain = (upb_tabent*)chain->next;
4198 assert(chain);
4199 }
4200 chain->next = new_e;
4201 our_e = mainpos_e;
4202 our_e->next = NULL;
4203 }
4204 }
4205 our_e->key = tabkey;
4206 our_e->val.val = val.val;
4207 assert(findentry(t, key, hash, eql) == our_e);
4208 }
4209
4210 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4211 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4212 upb_tabent *chain = getentry_mutable(t, hash);
4213 if (upb_tabent_isempty(chain)) return false;
4214 if (eql(chain->key, key)) {
4215 /* Element to remove is at the head of its chain. */
4216 t->count--;
4217 if (val) {
4218 _upb_value_setval(val, chain->val.val, t->ctype);
4219 }
4220 if (chain->next) {
4221 upb_tabent *move = (upb_tabent*)chain->next;
4222 *chain = *move;
4223 if (removed) *removed = move->key;
4224 move->key = 0; /* Make the slot empty. */
4225 } else {
4226 if (removed) *removed = chain->key;
4227 chain->key = 0; /* Make the slot empty. */
4228 }
4229 return true;
4230 } else {
4231 /* Element to remove is either in a non-head position or not in the
4232 * table. */
4233 while (chain->next && !eql(chain->next->key, key))
4234 chain = (upb_tabent*)chain->next;
4235 if (chain->next) {
4236 /* Found element to remove. */
4237 upb_tabent *rm;
4238
4239 if (val) {
4240 _upb_value_setval(val, chain->next->val.val, t->ctype);
4241 }
4242 rm = (upb_tabent*)chain->next;
4243 if (removed) *removed = rm->key;
4244 rm->key = 0;
4245 chain->next = rm->next;
4246 t->count--;
4247 return true;
4248 } else {
4249 return false;
4250 }
4251 }
4252 }
4253
4254 static size_t next(const upb_table *t, size_t i) {
4255 do {
4256 if (++i >= upb_table_size(t))
4257 return SIZE_MAX;
4258 } while(upb_tabent_isempty(&t->entries[i]));
4259
4260 return i;
4261 }
4262
4263 static size_t begin(const upb_table *t) {
4264 return next(t, -1);
4265 }
4266
4267
4268 /* upb_strtable ***************************************************************/
4269
4270 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
4271
4272 static upb_tabkey strcopy(lookupkey_t k2) {
4273 char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
4274 if (str == NULL) return 0;
4275 memcpy(str, &k2.str.len, sizeof(uint32_t));
4276 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4277 return (uintptr_t)str;
4278 }
4279
4280 static uint32_t strhash(upb_tabkey key) {
4281 uint32_t len;
4282 char *str = upb_tabstr(key, &len);
4283 return MurmurHash2(str, len, 0);
4284 }
4285
4286 static bool streql(upb_tabkey k1, lookupkey_t k2) {
4287 uint32_t len;
4288 char *str = upb_tabstr(k1, &len);
4289 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4290 }
4291
4292 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
4293 return init(&t->t, ctype, 2);
4294 }
4295
4296 void upb_strtable_uninit(upb_strtable *t) {
4297 size_t i;
4298 for (i = 0; i < upb_table_size(&t->t); i++)
4299 free((void*)t->t.entries[i].key);
4300 uninit(&t->t);
4301 }
4302
4303 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
4304 upb_strtable new_table;
4305 upb_strtable_iter i;
4306
4307 if (!init(&new_table.t, t->t.ctype, size_lg2))
4308 return false;
4309 upb_strtable_begin(&i, t);
4310 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4311 upb_strtable_insert2(
4312 &new_table,
4313 upb_strtable_iter_key(&i),
4314 upb_strtable_iter_keylength(&i),
4315 upb_strtable_iter_value(&i));
4316 }
4317 upb_strtable_uninit(t);
4318 *t = new_table;
4319 return true;
4320 }
4321
4322 bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
4323 upb_value v) {
4324 lookupkey_t key;
4325 upb_tabkey tabkey;
4326 uint32_t hash;
4327
4328 if (isfull(&t->t)) {
4329 /* Need to resize. New table of double the size, add old elements to it. */
4330 if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
4331 return false;
4332 }
4333 }
4334
4335 key = strkey2(k, len);
4336 tabkey = strcopy(key);
4337 if (tabkey == 0) return false;
4338
4339 hash = MurmurHash2(key.str.str, key.str.len, 0);
4340 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4341 return true;
4342 }
4343
4344 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4345 upb_value *v) {
4346 uint32_t hash = MurmurHash2(key, len, 0);
4347 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4348 }
4349
4350 bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
4351 upb_value *val) {
4352 uint32_t hash = MurmurHash2(key, strlen(key), 0);
4353 upb_tabkey tabkey;
4354 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4355 free((void*)tabkey);
4356 return true;
4357 } else {
4358 return false;
4359 }
4360 }
4361
4362 /* Iteration */
4363
4364 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4365 return &i->t->t.entries[i->index];
4366 }
4367
4368 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4369 i->t = t;
4370 i->index = begin(&t->t);
4371 }
4372
4373 void upb_strtable_next(upb_strtable_iter *i) {
4374 i->index = next(&i->t->t, i->index);
4375 }
4376
4377 bool upb_strtable_done(const upb_strtable_iter *i) {
4378 return i->index >= upb_table_size(&i->t->t) ||
4379 upb_tabent_isempty(str_tabent(i));
4380 }
4381
4382 const char *upb_strtable_iter_key(upb_strtable_iter *i) {
4383 assert(!upb_strtable_done(i));
4384 return upb_tabstr(str_tabent(i)->key, NULL);
4385 }
4386
4387 size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
4388 uint32_t len;
4389 assert(!upb_strtable_done(i));
4390 upb_tabstr(str_tabent(i)->key, &len);
4391 return len;
4392 }
4393
4394 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4395 assert(!upb_strtable_done(i));
4396 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4397 }
4398
4399 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4400 i->index = SIZE_MAX;
4401 }
4402
4403 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4404 const upb_strtable_iter *i2) {
4405 if (upb_strtable_done(i1) && upb_strtable_done(i2))
4406 return true;
4407 return i1->t == i2->t && i1->index == i2->index;
4408 }
4409
4410
4411 /* upb_inttable ***************************************************************/
4412
4413 /* For inttables we use a hybrid structure where small keys are kept in an
4414 * array and large keys are put in the hash table. */
4415
4416 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4417
4418 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4419 return k1 == k2.num;
4420 }
4421
4422 static upb_tabval *mutable_array(upb_inttable *t) {
4423 return (upb_tabval*)t->array;
4424 }
4425
4426 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4427 if (key < t->array_size) {
4428 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4429 } else {
4430 upb_tabent *e =
4431 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4432 return e ? &e->val : NULL;
4433 }
4434 }
4435
4436 static const upb_tabval *inttable_val_const(const upb_inttable *t,
4437 uintptr_t key) {
4438 return inttable_val((upb_inttable*)t, key);
4439 }
4440
4441 size_t upb_inttable_count(const upb_inttable *t) {
4442 return t->t.count + t->array_count;
4443 }
4444
4445 static void check(upb_inttable *t) {
4446 UPB_UNUSED(t);
4447 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4448 {
4449 /* This check is very expensive (makes inserts/deletes O(N)). */
4450 size_t count = 0;
4451 upb_inttable_iter i;
4452 upb_inttable_begin(&i, t);
4453 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4454 assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4455 }
4456 assert(count == upb_inttable_count(t));
4457 }
4458 #endif
4459 }
4460
4461 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4462 size_t asize, int hsize_lg2) {
4463 size_t array_bytes;
4464
4465 if (!init(&t->t, ctype, hsize_lg2)) return false;
4466 /* Always make the array part at least 1 long, so that we know key 0
4467 * won't be in the hash part, which simplifies things. */
4468 t->array_size = UPB_MAX(1, asize);
4469 t->array_count = 0;
4470 array_bytes = t->array_size * sizeof(upb_value);
4471 t->array = malloc(array_bytes);
4472 if (!t->array) {
4473 uninit(&t->t);
4474 return false;
4475 }
4476 memset(mutable_array(t), 0xff, array_bytes);
4477 check(t);
4478 return true;
4479 }
4480
4481 bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
4482 return upb_inttable_sizedinit(t, ctype, 0, 4);
4483 }
4484
4485 void upb_inttable_uninit(upb_inttable *t) {
4486 uninit(&t->t);
4487 free(mutable_array(t));
4488 }
4489
4490 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
4491 /* XXX: Table can't store value (uint64_t)-1. Need to somehow statically
4492 * guarantee that this is not necessary, or fix the limitation. */
4493 upb_tabval tabval;
4494 tabval.val = val.val;
4495 UPB_UNUSED(tabval);
4496 assert(upb_arrhas(tabval));
4497
4498 if (key < t->array_size) {
4499 assert(!upb_arrhas(t->array[key]));
4500 t->array_count++;
4501 mutable_array(t)[key].val = val.val;
4502 } else {
4503 if (isfull(&t->t)) {
4504 /* Need to resize the hash part, but we re-use the array part. */
4505 size_t i;
4506 upb_table new_table;
4507 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
4508 return false;
4509 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4510 const upb_tabent *e = &t->t.entries[i];
4511 uint32_t hash;
4512 upb_value v;
4513
4514 _upb_value_setval(&v, e->val.val, t->t.ctype);
4515 hash = upb_inthash(e->key);
4516 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4517 }
4518
4519 assert(t->t.count == new_table.count);
4520
4521 uninit(&t->t);
4522 t->t = new_table;
4523 }
4524 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4525 }
4526 check(t);
4527 return true;
4528 }
4529
4530 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4531 const upb_tabval *table_v = inttable_val_const(t, key);
4532 if (!table_v) return false;
4533 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4534 return true;
4535 }
4536
4537 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4538 upb_tabval *table_v = inttable_val(t, key);
4539 if (!table_v) return false;
4540 table_v->val = val.val;
4541 return true;
4542 }
4543
4544 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4545 bool success;
4546 if (key < t->array_size) {
4547 if (upb_arrhas(t->array[key])) {
4548 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4549 t->array_count--;
4550 if (val) {
4551 _upb_value_setval(val, t->array[key].val, t->t.ctype);
4552 }
4553 mutable_array(t)[key] = empty;
4554 success = true;
4555 } else {
4556 success = false;
4557 }
4558 } else {
4559 upb_tabkey removed;
4560 uint32_t hash = upb_inthash(key);
4561 success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4562 }
4563 check(t);
4564 return success;
4565 }
4566
4567 bool upb_inttable_push(upb_inttable *t, upb_value val) {
4568 return upb_inttable_insert(t, upb_inttable_count(t), val);
4569 }
4570
4571 upb_value upb_inttable_pop(upb_inttable *t) {
4572 upb_value val;
4573 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4574 UPB_ASSERT_VAR(ok, ok);
4575 return val;
4576 }
4577
4578 bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
4579 return upb_inttable_insert(t, (uintptr_t)key, val);
4580 }
4581
4582 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4583 upb_value *v) {
4584 return upb_inttable_lookup(t, (uintptr_t)key, v);
4585 }
4586
4587 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4588 return upb_inttable_remove(t, (uintptr_t)key, val);
4589 }
4590
4591 void upb_inttable_compact(upb_inttable *t) {
4592 /* Create a power-of-two histogram of the table keys. */
4593 int counts[UPB_MAXARRSIZE + 1] = {0};
4594 uintptr_t max_key = 0;
4595 upb_inttable_iter i;
4596 size_t arr_size;
4597 int arr_count;
4598 upb_inttable new_t;
4599
4600 upb_inttable_begin(&i, t);
4601 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4602 uintptr_t key = upb_inttable_iter_key(&i);
4603 if (key > max_key) {
4604 max_key = key;
4605 }
4606 counts[log2ceil(key)]++;
4607 }
4608
4609 arr_size = 1;
4610 arr_count = upb_inttable_count(t);
4611
4612 if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
4613 /* We can put 100% of the entries in the array part. */
4614 arr_size = max_key + 1;
4615 } else {
4616 /* Find the largest power of two that satisfies the MIN_DENSITY
4617 * definition. */
4618 int size_lg2;
4619 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
4620 arr_size = 1 << size_lg2;
4621 arr_count -= counts[size_lg2];
4622 if (arr_count >= arr_size * MIN_DENSITY) {
4623 break;
4624 }
4625 }
4626 }
4627
4628 /* Array part must always be at least 1 entry large to catch lookups of key
4629 * 0. Key 0 must always be in the array part because "0" in the hash part
4630 * denotes an empty entry. */
4631 arr_size = UPB_MAX(arr_size, 1);
4632
4633 {
4634 /* Insert all elements into new, perfectly-sized table. */
4635 int hash_count = upb_inttable_count(t) - arr_count;
4636 int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4637 int hashsize_lg2 = log2ceil(hash_size);
4638
4639 assert(hash_count >= 0);
4640 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
4641 upb_inttable_begin(&i, t);
4642 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4643 uintptr_t k = upb_inttable_iter_key(&i);
4644 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
4645 }
4646 assert(new_t.array_size == arr_size);
4647 assert(new_t.t.size_lg2 == hashsize_lg2);
4648 }
4649 upb_inttable_uninit(t);
4650 *t = new_t;
4651 }
4652
4653 /* Iteration. */
4654
4655 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4656 assert(!i->array_part);
4657 return &i->t->t.entries[i->index];
4658 }
4659
4660 static upb_tabval int_arrent(const upb_inttable_iter *i) {
4661 assert(i->array_part);
4662 return i->t->array[i->index];
4663 }
4664
4665 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4666 i->t = t;
4667 i->index = -1;
4668 i->array_part = true;
4669 upb_inttable_next(i);
4670 }
4671
4672 void upb_inttable_next(upb_inttable_iter *iter) {
4673 const upb_inttable *t = iter->t;
4674 if (iter->array_part) {
4675 while (++iter->index < t->array_size) {
4676 if (upb_arrhas(int_arrent(iter))) {
4677 return;
4678 }
4679 }
4680 iter->array_part = false;
4681 iter->index = begin(&t->t);
4682 } else {
4683 iter->index = next(&t->t, iter->index);
4684 }
4685 }
4686
4687 bool upb_inttable_done(const upb_inttable_iter *i) {
4688 if (i->array_part) {
4689 return i->index >= i->t->array_size ||
4690 !upb_arrhas(int_arrent(i));
4691 } else {
4692 return i->index >= upb_table_size(&i->t->t) ||
4693 upb_tabent_isempty(int_tabent(i));
4694 }
4695 }
4696
4697 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4698 assert(!upb_inttable_done(i));
4699 return i->array_part ? i->index : int_tabent(i)->key;
4700 }
4701
4702 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4703 assert(!upb_inttable_done(i));
4704 return _upb_value_val(
4705 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4706 i->t->t.ctype);
4707 }
4708
4709 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4710 i->index = SIZE_MAX;
4711 i->array_part = false;
4712 }
4713
4714 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4715 const upb_inttable_iter *i2) {
4716 if (upb_inttable_done(i1) && upb_inttable_done(i2))
4717 return true;
4718 return i1->t == i2->t && i1->index == i2->index &&
4719 i1->array_part == i2->array_part;
4720 }
4721
4722 #ifdef UPB_UNALIGNED_READS_OK
4723 /* -----------------------------------------------------------------------------
4724 * MurmurHash2, by Austin Appleby (released as public domain).
4725 * Reformatted and C99-ified by Joshua Haberman.
4726 * Note - This code makes a few assumptions about how your machine behaves -
4727 * 1. We can read a 4-byte value from any address without crashing
4728 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4729 * And it has a few limitations -
4730 * 1. It will not work incrementally.
4731 * 2. It will not produce the same results on little-endian and big-endian
4732 * machines. */
4733 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4734 /* 'm' and 'r' are mixing constants generated offline.
4735 * They're not really 'magic', they just happen to work well. */
4736 const uint32_t m = 0x5bd1e995;
4737 const int32_t r = 24;
4738
4739 /* Initialize the hash to a 'random' value */
4740 uint32_t h = seed ^ len;
4741
4742 /* Mix 4 bytes at a time into the hash */
4743 const uint8_t * data = (const uint8_t *)key;
4744 while(len >= 4) {
4745 uint32_t k = *(uint32_t *)data;
4746
4747 k *= m;
4748 k ^= k >> r;
4749 k *= m;
4750
4751 h *= m;
4752 h ^= k;
4753
4754 data += 4;
4755 len -= 4;
4756 }
4757
4758 /* Handle the last few bytes of the input array */
4759 switch(len) {
4760 case 3: h ^= data[2] << 16;
4761 case 2: h ^= data[1] << 8;
4762 case 1: h ^= data[0]; h *= m;
4763 };
4764
4765 /* Do a few final mixes of the hash to ensure the last few
4766 * bytes are well-incorporated. */
4767 h ^= h >> 13;
4768 h *= m;
4769 h ^= h >> 15;
4770
4771 return h;
4772 }
4773
4774 #else /* !UPB_UNALIGNED_READS_OK */
4775
4776 /* -----------------------------------------------------------------------------
4777 * MurmurHashAligned2, by Austin Appleby
4778 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
4779 * on certain platforms.
4780 * Performance will be lower than MurmurHash2 */
4781
4782 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
4783
4784 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
4785 const uint32_t m = 0x5bd1e995;
4786 const int32_t r = 24;
4787 const uint8_t * data = (const uint8_t *)key;
4788 uint32_t h = seed ^ len;
4789 uint8_t align = (uintptr_t)data & 3;
4790
4791 if(align && (len >= 4)) {
4792 /* Pre-load the temp registers */
4793 uint32_t t = 0, d = 0;
4794 int32_t sl;
4795 int32_t sr;
4796
4797 switch(align) {
4798 case 1: t |= data[2] << 16;
4799 case 2: t |= data[1] << 8;
4800 case 3: t |= data[0];
4801 }
4802
4803 t <<= (8 * align);
4804
4805 data += 4-align;
4806 len -= 4-align;
4807
4808 sl = 8 * (4-align);
4809 sr = 8 * align;
4810
4811 /* Mix */
4812
4813 while(len >= 4) {
4814 uint32_t k;
4815
4816 d = *(uint32_t *)data;
4817 t = (t >> sr) | (d << sl);
4818
4819 k = t;
4820
4821 MIX(h,k,m);
4822
4823 t = d;
4824
4825 data += 4;
4826 len -= 4;
4827 }
4828
4829 /* Handle leftover data in temp registers */
4830
4831 d = 0;
4832
4833 if(len >= align) {
4834 uint32_t k;
4835
4836 switch(align) {
4837 case 3: d |= data[2] << 16;
4838 case 2: d |= data[1] << 8;
4839 case 1: d |= data[0];
4840 }
4841
4842 k = (t >> sr) | (d << sl);
4843 MIX(h,k,m);
4844
4845 data += align;
4846 len -= align;
4847
4848 /* ----------
4849 * Handle tail bytes */
4850
4851 switch(len) {
4852 case 3: h ^= data[2] << 16;
4853 case 2: h ^= data[1] << 8;
4854 case 1: h ^= data[0]; h *= m;
4855 };
4856 } else {
4857 switch(len) {
4858 case 3: d |= data[2] << 16;
4859 case 2: d |= data[1] << 8;
4860 case 1: d |= data[0];
4861 case 0: h ^= (t >> sr) | (d << sl); h *= m;
4862 }
4863 }
4864
4865 h ^= h >> 13;
4866 h *= m;
4867 h ^= h >> 15;
4868
4869 return h;
4870 } else {
4871 while(len >= 4) {
4872 uint32_t k = *(uint32_t *)data;
4873
4874 MIX(h,k,m);
4875
4876 data += 4;
4877 len -= 4;
4878 }
4879
4880 /* ----------
4881 * Handle tail bytes */
4882
4883 switch(len) {
4884 case 3: h ^= data[2] << 16;
4885 case 2: h ^= data[1] << 8;
4886 case 1: h ^= data[0]; h *= m;
4887 };
4888
4889 h ^= h >> 13;
4890 h *= m;
4891 h ^= h >> 15;
4892
4893 return h;
4894 }
4895 }
4896 #undef MIX
4897
4898 #endif /* UPB_UNALIGNED_READS_OK */
4899
4900 #include <errno.h>
4901 #include <stdarg.h>
4902 #include <stddef.h>
4903 #include <stdint.h>
4904 #include <stdio.h>
4905 #include <stdlib.h>
4906 #include <string.h>
4907
4908 bool upb_dumptostderr(void *closure, const upb_status* status) {
4909 UPB_UNUSED(closure);
4910 fprintf(stderr, "%s\n", upb_status_errmsg(status));
4911 return false;
4912 }
4913
4914 /* Guarantee null-termination and provide ellipsis truncation.
4915 * It may be tempting to "optimize" this by initializing these final
4916 * four bytes up-front and then being careful never to overwrite them,
4917 * this is safer and simpler. */
4918 static void nullz(upb_status *status) {
4919 const char *ellipsis = "...";
4920 size_t len = strlen(ellipsis);
4921 assert(sizeof(status->msg) > len);
4922 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
4923 }
4924
4925 void upb_status_clear(upb_status *status) {
4926 if (!status) return;
4927 status->ok_ = true;
4928 status->code_ = 0;
4929 status->msg[0] = '\0';
4930 }
4931
4932 bool upb_ok(const upb_status *status) { return status->ok_; }
4933
4934 upb_errorspace *upb_status_errspace(const upb_status *status) {
4935 return status->error_space_;
4936 }
4937
4938 int upb_status_errcode(const upb_status *status) { return status->code_; }
4939
4940 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
4941
4942 void upb_status_seterrmsg(upb_status *status, const char *msg) {
4943 if (!status) return;
4944 status->ok_ = false;
4945 strncpy(status->msg, msg, sizeof(status->msg));
4946 nullz(status);
4947 }
4948
4949 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
4950 va_list args;
4951 va_start(args, fmt);
4952 upb_status_vseterrf(status, fmt, args);
4953 va_end(args);
4954 }
4955
4956 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
4957 if (!status) return;
4958 status->ok_ = false;
4959 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
4960 nullz(status);
4961 }
4962
4963 void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
4964 int code) {
4965 if (!status) return;
4966 status->ok_ = false;
4967 status->error_space_ = space;
4968 status->code_ = code;
4969 space->set_message(status, code);
4970 }
4971
4972 void upb_status_copy(upb_status *to, const upb_status *from) {
4973 if (!to) return;
4974 *to = *from;
4975 }
4976 /* This file was generated by upbc (the upb compiler).
4977 * Do not edit -- your changes will be discarded when the file is
4978 * regenerated. */
4979
4980
4981 static const upb_msgdef msgs[20];
4982 static const upb_fielddef fields[81];
4983 static const upb_enumdef enums[4];
4984 static const upb_tabent strentries[236];
4985 static const upb_tabent intentries[14];
4986 static const upb_tabval arrays[232];
4987
4988 #ifdef UPB_DEBUG_REFS
4989 static upb_inttable reftables[212];
4990 #endif
4991
4992 static const upb_msgdef msgs[20] = {
4993 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYP E_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
4994 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_IN TTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2 , 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
4995 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INI T(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_ CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
4996 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, U PB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB _CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
4997 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE _INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
4998 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3 , UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
4999 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_IN IT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UP B_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
5000 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
5001 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INI T(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
5002 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0 , 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTY PE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
5003 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
5004 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1 , UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3 , UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
5005 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_I NIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, U PB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
5006 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
5007 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_ INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
5008 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1 , UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3 , UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
5009 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0 , UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE _PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
5010 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE _INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
5011 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INI T(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UP B_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
5012 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTT ABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2 , 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
5013 };
5014
5015 static const upb_fielddef fields[81] = {
5016 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &refta bles[41]),
5017 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43] ),
5018 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &re ftables[45]),
5019 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, f alse, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46 ], &reftables[47]),
5020 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftable s[49]),
5021 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[5 1]),
5022 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]) ,
5023 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftable s[55]),
5024 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &re ftables[57]),
5025 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&refta bles[58], &reftables[59]),
5026 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&refta bles[60], &reftables[61]),
5027 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &r eftables[63]),
5028 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]) ,
5029 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&refta bles[66], &reftables[67]),
5030 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&refta bles[68], &reftables[69]),
5031 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0}, &reftables[70], &reftables[71]),
5032 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables [72], &reftables[73]),
5033 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[7 4], &reftables[75]),
5034 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables [77]),
5035 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &refta bles[79]),
5036 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[8 1]),
5037 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, f alse, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[8 3]),
5038 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftable s[84], &reftables[85]),
5039 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], & reftables[87]),
5040 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &re ftables[89]),
5041 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &r eftables[91]),
5042 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables [93]),
5043 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, f alse, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[9 4], &reftables[95]),
5044 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
5045 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &refta bles[99]),
5046 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&refta bles[100], &reftables[101]),
5047 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
5048 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&re ftables[104], &reftables[105]),
5049 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftabl es[106], &reftables[107]),
5050 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
5051 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
5052 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables [112], &reftables[113]),
5053 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
5054 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
5055 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
5056 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
5057 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
5058 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[1 25]),
5059 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, fal se, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&r eftables[126], &reftables[127]),
5060 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&ref tables[128], &reftables[129]),
5061 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftable s[130], &reftables[131]),
5062 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132] , &reftables[133]),
5063 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
5064 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, f alse, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&ref tables[136], &reftables[137]),
5065 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftab les[138], &reftables[139]),
5066 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftable s[140], &reftables[141]),
5067 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftable s[142], &reftables[143]),
5068 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftable s[144], &reftables[145]),
5069 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftab les[146], &reftables[147]),
5070 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftab les[148], &reftables[149]),
5071 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftab les[150], &reftables[151]),
5072 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftable s[153]),
5073 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155 ]),
5074 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
5075 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], & reftables[159]),
5076 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, fa lse, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&r eftables[160], &reftables[161]),
5077 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&re ftables[162], &reftables[163]),
5078 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &r eftables[165]),
5079 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftab les[166], &reftables[167]),
5080 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false , false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0 },&reftables[168], &reftables[169]),
5081 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], & reftables[171]),
5082 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
5083 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftable s[175]),
5084 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &re ftables[177]),
5085 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, f alse, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[17 8], &reftables[179]),
5086 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[1 81]),
5087 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
5088 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
5089 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
5090 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
5091 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
5092 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
5093 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
5094 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false , false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[ 196], &reftables[197]),
5095 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, f alse, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
5096 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, fal se, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&ref tables[200], &reftables[201]),
5097 };
5098
5099 static const upb_enumdef enums[4] = {
5100 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_IN IT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYP E_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
5101 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INI T(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTY PE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
5102 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
5103 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT (3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_ CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
5104 };
5105
5106 static const upb_tabent strentries[236] = {
5107 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR _INIT(&fields[14]), NULL},
5108 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5109 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5110 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[38]), NULL},
5111 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5112 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5113 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5114 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INI T(&fields[16]), NULL},
5115 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVAL UE_PTR_INIT(&fields[15]), NULL},
5116 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5117 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_P TR_INIT(&fields[44]), NULL},
5118 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5119 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5120 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5121 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[49]), NULL},
5122 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR _INIT(&fields[9]), &strentries[14]},
5123 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INI T(&fields[66]), NULL},
5124 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT( &fields[8]), NULL},
5125 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5126 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5127 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5128 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INI T(&fields[78]), NULL},
5129 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[50]), NULL},
5130 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[40]), &strentries[22]},
5131 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[73]), NULL},
5132 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5133 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_P TR_INIT(&fields[1]), NULL},
5134 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5135 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_IN IT(&fields[47]), NULL},
5136 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5137 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[52]), NULL},
5138 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[37]), &strentries[30]},
5139 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[71]), NULL},
5140 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5141 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5142 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5143 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5144 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INI T(&fields[27]), NULL},
5145 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5146 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[41]), NULL},
5147 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5148 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5149 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5150 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5151 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_IN IT(&fields[46]), &strentries[49]},
5152 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5153 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5154 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR _INIT(&fields[70]), NULL},
5155 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_ INIT(&fields[12]), NULL},
5156 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT (&fields[69]), &strentries[48]},
5157 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE _PTR_INIT(&fields[4]), NULL},
5158 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[51]), NULL},
5159 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_T ABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
5160 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5161 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT (&fields[79]), NULL},
5162 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5163 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5164 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5165 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5166 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_IN IT(&fields[58]), NULL},
5167 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT (&fields[28]), NULL},
5168 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5169 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INI T(&fields[3]), NULL},
5170 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5171 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5172 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PT R_INIT(&fields[6]), NULL},
5173 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5174 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[77]), NULL},
5175 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR _INIT(&fields[13]), NULL},
5176 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVAL UE_PTR_INIT(&fields[80]), NULL},
5177 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5178 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[34]), NULL},
5179 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_I NIT(&fields[63]), NULL},
5180 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5181 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVA LUE_PTR_INIT(&fields[64]), NULL},
5182 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5183 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5184 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5185 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PT R_INIT(&fields[5]), NULL},
5186 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_ PTR_INIT(&fields[32]), NULL},
5187 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_I NIT(&fields[57]), NULL},
5188 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[53]), &strentries[82]},
5189 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR _INIT(&fields[10]), NULL},
5190 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABV ALUE_PTR_INIT(&fields[61]), &strentries[81]},
5191 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5192 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT (&fields[17]), NULL},
5193 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5194 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5195 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[75]), NULL},
5196 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5197 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TA BVALUE_PTR_INIT(&fields[2]), NULL},
5198 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5199 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TA BVALUE_PTR_INIT(&fields[24]), NULL},
5200 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5201 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_ TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
5202 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash "), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5203 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5204 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5205 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5206 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PT R_INIT(&fields[18]), NULL},
5207 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_ PTR_INIT(&fields[26]), NULL},
5208 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_ PTR_INIT(&fields[48]), NULL},
5209 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TA BVALUE_PTR_INIT(&fields[62]), NULL},
5210 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_T ABVALUE_PTR_INIT(&fields[25]), NULL},
5211 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UP B_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
5212 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5213 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[76]), NULL},
5214 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_access or"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5215 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5216 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5217 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5218 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[39]), NULL},
5219 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PT R_INIT(&fields[20]), NULL},
5220 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5221 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_P TR_INIT(&fields[56]), NULL},
5222 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[55]), NULL},
5223 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[74]), NULL},
5224 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5225 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5226 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5227 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5228 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_I NIT(&fields[54]), &strentries[122]},
5229 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_IN IT(&fields[33]), NULL},
5230 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[35]), &strentries[121]},
5231 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_T ABVALUE_PTR_INIT(&fields[72]), NULL},
5232 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5233 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5234 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5235 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5236 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5237 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_ INIT(&fields[30]), NULL},
5238 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5239 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5240 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5241 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5242 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT (&fields[65]), &strentries[139]},
5243 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5244 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABV ALUE_PTR_INIT(&fields[68]), NULL},
5245 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVA LUE_PTR_INIT(&fields[29]), &strentries[137]},
5246 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT (&fields[59]), NULL},
5247 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_ PTR_INIT(&fields[7]), NULL},
5248 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5249 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5250 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT (&fields[36]), NULL},
5251 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5252 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5253 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5254 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TAB VALUE_PTR_INIT(&fields[43]), NULL},
5255 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVAL UE_PTR_INIT(&fields[0]), NULL},
5256 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5257 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5258 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5259 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5260 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TAB VALUE_PTR_INIT(&fields[60]), NULL},
5261 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVA LUE_PTR_INIT(&fields[19]), NULL},
5262 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_ PTR_INIT(&fields[67]), &strentries[154]},
5263 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5264 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5265 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_ PTR_INIT(&fields[21]), NULL},
5266 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR _INIT(&fields[42]), NULL},
5267 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALU E_INT_INIT(2), &strentries[162]},
5268 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5269 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALU E_INT_INIT(3), NULL},
5270 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALU E_INT_INIT(1), NULL},
5271 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_ INT_INIT(6), NULL},
5272 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5273 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5274 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5275 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5276 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_I NT_INIT(9), NULL},
5277 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_IN T_INIT(2), &strentries[193]},
5278 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_I NT_INIT(1), NULL},
5279 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5280 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_IN T_INIT(5), NULL},
5281 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE _INT_INIT(15), NULL},
5282 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_ INT_INIT(7), NULL},
5283 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5284 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_ INT_INIT(11), &strentries[194]},
5285 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5286 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5287 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_IN T_INIT(3), &strentries[191]},
5288 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5289 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5290 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5291 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5292 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT _INIT(14), NULL},
5293 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_I NT_INIT(13), NULL},
5294 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5295 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_I NT_INIT(4), &strentries[190]},
5296 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5297 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE _INT_INIT(16), NULL},
5298 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_IN T_INIT(12), NULL},
5299 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_I NT_INIT(18), NULL},
5300 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT _INIT(8), NULL},
5301 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_IN T_INIT(10), NULL},
5302 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_I NT_INIT(17), NULL},
5303 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5304 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT (1), NULL},
5305 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_IN IT(0), &strentries[197]},
5306 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_ INT_INIT(2), NULL},
5307 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT _INIT(2), NULL},
5308 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INI T(1), &strentries[203]},
5309 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5310 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_ INT_INIT(3), NULL},
5311 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5312 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5313 {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInf o.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
5314 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.Uninterpreted Option"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
5315 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescripto rProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
5316 {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescrip torProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
5317 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5318 {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOpti ons"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
5319 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5320 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5321 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5322 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorPro to"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
5323 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5324 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInf o"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
5325 {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescript orProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
5326 {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorPro to.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
5327 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5328 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDesc riptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
5329 {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions" ), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
5330 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions") , UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
5331 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescripto rProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
5332 {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescript orProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
5333 {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescri ptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
5334 {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions. CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
5335 {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescripto rSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
5336 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions") , UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
5337 {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescript orProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
5338 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.O ptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
5339 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOption s"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
5340 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOption s"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
5341 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions "), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
5342 {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.Uninterpreted Option.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
5343 };
5344
5345 static const upb_tabent intentries[14] = {
5346 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5347 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5348 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5349 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5350 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5351 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5352 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5353 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5354 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5355 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5356 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5357 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5358 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5359 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5360 };
5361
5362 static const upb_tabval arrays[232] = {
5363 UPB_TABVALUE_EMPTY_INIT,
5364 UPB_TABVALUE_PTR_INIT(&fields[38]),
5365 UPB_TABVALUE_PTR_INIT(&fields[16]),
5366 UPB_TABVALUE_PTR_INIT(&fields[44]),
5367 UPB_TABVALUE_PTR_INIT(&fields[9]),
5368 UPB_TABVALUE_PTR_INIT(&fields[15]),
5369 UPB_TABVALUE_PTR_INIT(&fields[14]),
5370 UPB_TABVALUE_PTR_INIT(&fields[49]),
5371 UPB_TABVALUE_EMPTY_INIT,
5372 UPB_TABVALUE_PTR_INIT(&fields[66]),
5373 UPB_TABVALUE_PTR_INIT(&fields[8]),
5374 UPB_TABVALUE_EMPTY_INIT,
5375 UPB_TABVALUE_PTR_INIT(&fields[40]),
5376 UPB_TABVALUE_PTR_INIT(&fields[78]),
5377 UPB_TABVALUE_PTR_INIT(&fields[50]),
5378 UPB_TABVALUE_EMPTY_INIT,
5379 UPB_TABVALUE_EMPTY_INIT,
5380 UPB_TABVALUE_PTR_INIT(&fields[1]),
5381 UPB_TABVALUE_EMPTY_INIT,
5382 UPB_TABVALUE_EMPTY_INIT,
5383 UPB_TABVALUE_EMPTY_INIT,
5384 UPB_TABVALUE_EMPTY_INIT,
5385 UPB_TABVALUE_EMPTY_INIT,
5386 UPB_TABVALUE_EMPTY_INIT,
5387 UPB_TABVALUE_PTR_INIT(&fields[37]),
5388 UPB_TABVALUE_PTR_INIT(&fields[47]),
5389 UPB_TABVALUE_PTR_INIT(&fields[52]),
5390 UPB_TABVALUE_EMPTY_INIT,
5391 UPB_TABVALUE_EMPTY_INIT,
5392 UPB_TABVALUE_EMPTY_INIT,
5393 UPB_TABVALUE_EMPTY_INIT,
5394 UPB_TABVALUE_EMPTY_INIT,
5395 UPB_TABVALUE_PTR_INIT(&fields[41]),
5396 UPB_TABVALUE_PTR_INIT(&fields[12]),
5397 UPB_TABVALUE_PTR_INIT(&fields[46]),
5398 UPB_TABVALUE_PTR_INIT(&fields[27]),
5399 UPB_TABVALUE_PTR_INIT(&fields[69]),
5400 UPB_TABVALUE_PTR_INIT(&fields[70]),
5401 UPB_TABVALUE_PTR_INIT(&fields[4]),
5402 UPB_TABVALUE_PTR_INIT(&fields[51]),
5403 UPB_TABVALUE_EMPTY_INIT,
5404 UPB_TABVALUE_PTR_INIT(&fields[3]),
5405 UPB_TABVALUE_PTR_INIT(&fields[58]),
5406 UPB_TABVALUE_PTR_INIT(&fields[6]),
5407 UPB_TABVALUE_EMPTY_INIT,
5408 UPB_TABVALUE_PTR_INIT(&fields[28]),
5409 UPB_TABVALUE_EMPTY_INIT,
5410 UPB_TABVALUE_EMPTY_INIT,
5411 UPB_TABVALUE_EMPTY_INIT,
5412 UPB_TABVALUE_PTR_INIT(&fields[11]),
5413 UPB_TABVALUE_PTR_INIT(&fields[79]),
5414 UPB_TABVALUE_EMPTY_INIT,
5415 UPB_TABVALUE_EMPTY_INIT,
5416 UPB_TABVALUE_EMPTY_INIT,
5417 UPB_TABVALUE_EMPTY_INIT,
5418 UPB_TABVALUE_EMPTY_INIT,
5419 UPB_TABVALUE_EMPTY_INIT,
5420 UPB_TABVALUE_EMPTY_INIT,
5421 UPB_TABVALUE_EMPTY_INIT,
5422 UPB_TABVALUE_EMPTY_INIT,
5423 UPB_TABVALUE_EMPTY_INIT,
5424 UPB_TABVALUE_EMPTY_INIT,
5425 UPB_TABVALUE_EMPTY_INIT,
5426 UPB_TABVALUE_EMPTY_INIT,
5427 UPB_TABVALUE_EMPTY_INIT,
5428 UPB_TABVALUE_EMPTY_INIT,
5429 UPB_TABVALUE_EMPTY_INIT,
5430 UPB_TABVALUE_EMPTY_INIT,
5431 UPB_TABVALUE_EMPTY_INIT,
5432 UPB_TABVALUE_EMPTY_INIT,
5433 UPB_TABVALUE_EMPTY_INIT,
5434 UPB_TABVALUE_EMPTY_INIT,
5435 UPB_TABVALUE_EMPTY_INIT,
5436 UPB_TABVALUE_PTR_INIT(&fields[34]),
5437 UPB_TABVALUE_PTR_INIT(&fields[57]),
5438 UPB_TABVALUE_PTR_INIT(&fields[5]),
5439 UPB_TABVALUE_PTR_INIT(&fields[32]),
5440 UPB_TABVALUE_PTR_INIT(&fields[10]),
5441 UPB_TABVALUE_PTR_INIT(&fields[63]),
5442 UPB_TABVALUE_PTR_INIT(&fields[13]),
5443 UPB_TABVALUE_PTR_INIT(&fields[53]),
5444 UPB_TABVALUE_PTR_INIT(&fields[64]),
5445 UPB_TABVALUE_PTR_INIT(&fields[61]),
5446 UPB_TABVALUE_PTR_INIT(&fields[80]),
5447 UPB_TABVALUE_EMPTY_INIT,
5448 UPB_TABVALUE_PTR_INIT(&fields[17]),
5449 UPB_TABVALUE_EMPTY_INIT,
5450 UPB_TABVALUE_PTR_INIT(&fields[26]),
5451 UPB_TABVALUE_EMPTY_INIT,
5452 UPB_TABVALUE_EMPTY_INIT,
5453 UPB_TABVALUE_EMPTY_INIT,
5454 UPB_TABVALUE_EMPTY_INIT,
5455 UPB_TABVALUE_EMPTY_INIT,
5456 UPB_TABVALUE_EMPTY_INIT,
5457 UPB_TABVALUE_PTR_INIT(&fields[25]),
5458 UPB_TABVALUE_PTR_INIT(&fields[48]),
5459 UPB_TABVALUE_PTR_INIT(&fields[24]),
5460 UPB_TABVALUE_PTR_INIT(&fields[18]),
5461 UPB_TABVALUE_EMPTY_INIT,
5462 UPB_TABVALUE_EMPTY_INIT,
5463 UPB_TABVALUE_EMPTY_INIT,
5464 UPB_TABVALUE_EMPTY_INIT,
5465 UPB_TABVALUE_PTR_INIT(&fields[2]),
5466 UPB_TABVALUE_PTR_INIT(&fields[23]),
5467 UPB_TABVALUE_PTR_INIT(&fields[62]),
5468 UPB_TABVALUE_EMPTY_INIT,
5469 UPB_TABVALUE_PTR_INIT(&fields[22]),
5470 UPB_TABVALUE_EMPTY_INIT,
5471 UPB_TABVALUE_EMPTY_INIT,
5472 UPB_TABVALUE_EMPTY_INIT,
5473 UPB_TABVALUE_EMPTY_INIT,
5474 UPB_TABVALUE_EMPTY_INIT,
5475 UPB_TABVALUE_EMPTY_INIT,
5476 UPB_TABVALUE_EMPTY_INIT,
5477 UPB_TABVALUE_EMPTY_INIT,
5478 UPB_TABVALUE_EMPTY_INIT,
5479 UPB_TABVALUE_EMPTY_INIT,
5480 UPB_TABVALUE_EMPTY_INIT,
5481 UPB_TABVALUE_EMPTY_INIT,
5482 UPB_TABVALUE_EMPTY_INIT,
5483 UPB_TABVALUE_EMPTY_INIT,
5484 UPB_TABVALUE_EMPTY_INIT,
5485 UPB_TABVALUE_EMPTY_INIT,
5486 UPB_TABVALUE_EMPTY_INIT,
5487 UPB_TABVALUE_EMPTY_INIT,
5488 UPB_TABVALUE_EMPTY_INIT,
5489 UPB_TABVALUE_EMPTY_INIT,
5490 UPB_TABVALUE_EMPTY_INIT,
5491 UPB_TABVALUE_EMPTY_INIT,
5492 UPB_TABVALUE_EMPTY_INIT,
5493 UPB_TABVALUE_EMPTY_INIT,
5494 UPB_TABVALUE_EMPTY_INIT,
5495 UPB_TABVALUE_EMPTY_INIT,
5496 UPB_TABVALUE_EMPTY_INIT,
5497 UPB_TABVALUE_EMPTY_INIT,
5498 UPB_TABVALUE_EMPTY_INIT,
5499 UPB_TABVALUE_EMPTY_INIT,
5500 UPB_TABVALUE_EMPTY_INIT,
5501 UPB_TABVALUE_EMPTY_INIT,
5502 UPB_TABVALUE_EMPTY_INIT,
5503 UPB_TABVALUE_EMPTY_INIT,
5504 UPB_TABVALUE_EMPTY_INIT,
5505 UPB_TABVALUE_EMPTY_INIT,
5506 UPB_TABVALUE_EMPTY_INIT,
5507 UPB_TABVALUE_EMPTY_INIT,
5508 UPB_TABVALUE_EMPTY_INIT,
5509 UPB_TABVALUE_EMPTY_INIT,
5510 UPB_TABVALUE_EMPTY_INIT,
5511 UPB_TABVALUE_EMPTY_INIT,
5512 UPB_TABVALUE_EMPTY_INIT,
5513 UPB_TABVALUE_EMPTY_INIT,
5514 UPB_TABVALUE_PTR_INIT(&fields[31]),
5515 UPB_TABVALUE_PTR_INIT(&fields[45]),
5516 UPB_TABVALUE_EMPTY_INIT,
5517 UPB_TABVALUE_EMPTY_INIT,
5518 UPB_TABVALUE_EMPTY_INIT,
5519 UPB_TABVALUE_EMPTY_INIT,
5520 UPB_TABVALUE_EMPTY_INIT,
5521 UPB_TABVALUE_EMPTY_INIT,
5522 UPB_TABVALUE_EMPTY_INIT,
5523 UPB_TABVALUE_EMPTY_INIT,
5524 UPB_TABVALUE_EMPTY_INIT,
5525 UPB_TABVALUE_EMPTY_INIT,
5526 UPB_TABVALUE_EMPTY_INIT,
5527 UPB_TABVALUE_EMPTY_INIT,
5528 UPB_TABVALUE_EMPTY_INIT,
5529 UPB_TABVALUE_EMPTY_INIT,
5530 UPB_TABVALUE_PTR_INIT(&fields[39]),
5531 UPB_TABVALUE_PTR_INIT(&fields[20]),
5532 UPB_TABVALUE_PTR_INIT(&fields[56]),
5533 UPB_TABVALUE_PTR_INIT(&fields[55]),
5534 UPB_TABVALUE_EMPTY_INIT,
5535 UPB_TABVALUE_EMPTY_INIT,
5536 UPB_TABVALUE_EMPTY_INIT,
5537 UPB_TABVALUE_EMPTY_INIT,
5538 UPB_TABVALUE_EMPTY_INIT,
5539 UPB_TABVALUE_PTR_INIT(&fields[35]),
5540 UPB_TABVALUE_PTR_INIT(&fields[33]),
5541 UPB_TABVALUE_PTR_INIT(&fields[54]),
5542 UPB_TABVALUE_EMPTY_INIT,
5543 UPB_TABVALUE_EMPTY_INIT,
5544 UPB_TABVALUE_EMPTY_INIT,
5545 UPB_TABVALUE_EMPTY_INIT,
5546 UPB_TABVALUE_EMPTY_INIT,
5547 UPB_TABVALUE_PTR_INIT(&fields[30]),
5548 UPB_TABVALUE_EMPTY_INIT,
5549 UPB_TABVALUE_PTR_INIT(&fields[59]),
5550 UPB_TABVALUE_PTR_INIT(&fields[65]),
5551 UPB_TABVALUE_PTR_INIT(&fields[29]),
5552 UPB_TABVALUE_PTR_INIT(&fields[68]),
5553 UPB_TABVALUE_EMPTY_INIT,
5554 UPB_TABVALUE_EMPTY_INIT,
5555 UPB_TABVALUE_PTR_INIT(&fields[36]),
5556 UPB_TABVALUE_PTR_INIT(&fields[19]),
5557 UPB_TABVALUE_PTR_INIT(&fields[60]),
5558 UPB_TABVALUE_PTR_INIT(&fields[43]),
5559 UPB_TABVALUE_PTR_INIT(&fields[7]),
5560 UPB_TABVALUE_PTR_INIT(&fields[67]),
5561 UPB_TABVALUE_PTR_INIT(&fields[0]),
5562 UPB_TABVALUE_EMPTY_INIT,
5563 UPB_TABVALUE_PTR_INIT(&fields[42]),
5564 UPB_TABVALUE_PTR_INIT(&fields[21]),
5565 UPB_TABVALUE_EMPTY_INIT,
5566 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
5567 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
5568 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
5569 UPB_TABVALUE_EMPTY_INIT,
5570 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
5571 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
5572 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
5573 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
5574 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
5575 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
5576 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
5577 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
5578 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
5579 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
5580 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
5581 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
5582 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
5583 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
5584 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
5585 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
5586 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
5587 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
5588 UPB_TABVALUE_PTR_INIT("STRING"),
5589 UPB_TABVALUE_PTR_INIT("CORD"),
5590 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
5591 UPB_TABVALUE_EMPTY_INIT,
5592 UPB_TABVALUE_PTR_INIT("SPEED"),
5593 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
5594 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
5595 };
5596
5597 static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_C TYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
5598
5599 const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
5600 upb_symtab_ref(&symtab, owner);
5601 return &symtab;
5602 }
5603
5604 #ifdef UPB_DEBUG_REFS
5605 static upb_inttable reftables[212] = {
5606 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5607 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5608 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5609 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5610 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5611 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5612 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5613 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5614 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5615 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5616 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5617 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5618 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5619 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5620 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5621 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5622 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5623 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5624 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5625 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5626 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5627 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5628 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5629 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5630 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5631 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5632 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5633 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5634 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5635 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5636 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5637 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5638 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5639 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5640 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5641 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5642 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5643 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5644 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5645 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5646 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5647 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5648 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5649 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5650 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5651 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5652 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5653 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5654 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5655 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5656 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5657 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5658 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5659 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5660 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5661 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5662 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5663 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5664 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5665 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5666 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5667 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5668 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5669 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5670 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5671 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5672 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5673 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5674 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5675 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5676 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5677 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5678 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5679 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5680 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5681 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5682 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5683 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5684 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5685 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5686 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5687 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5688 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5689 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5690 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5691 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5692 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5693 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5694 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5695 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5696 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5697 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5698 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5699 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5700 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5701 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5702 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5703 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5704 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5705 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5706 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5707 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5708 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5709 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5710 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5711 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5712 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5713 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5714 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5715 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5716 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5717 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5718 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5719 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5720 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5721 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5722 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5723 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5724 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5725 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5726 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5727 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5728 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5729 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5730 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5731 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5732 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5733 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5734 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5735 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5736 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5737 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5738 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5739 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5740 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5741 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5742 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5743 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5744 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5745 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5746 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5747 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5748 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5749 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5750 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5751 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5752 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5753 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5754 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5755 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5756 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5757 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5758 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5759 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5760 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5761 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5762 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5763 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5764 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5765 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5766 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5767 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5768 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5769 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5770 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5771 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5772 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5773 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5774 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5775 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5776 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5777 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5778 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5779 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5780 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5781 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5782 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5783 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5784 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5785 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5786 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5787 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5788 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5789 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5790 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5791 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5792 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5793 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5794 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5795 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5796 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5797 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5798 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5799 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5800 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5801 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5802 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5803 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5804 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5805 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5806 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5807 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5808 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5809 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5810 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5811 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5812 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5813 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5814 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5815 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5816 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5817 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5818 };
5819 #endif
5820
5821 /*
5822 ** XXX: The routines in this file that consume a string do not currently
5823 ** support having the string span buffers. In the future, as upb_sink and
5824 ** its buffering/sharing functionality evolve there should be an easy and
5825 ** idiomatic way of correctly handling this case. For now, we accept this
5826 ** limitation since we currently only parse descriptors from single strings.
5827 */
5828
5829
5830 #include <errno.h>
5831 #include <stdlib.h>
5832 #include <string.h>
5833
5834 /* upb_deflist is an internal-only dynamic array for storing a growing list of
5835 * upb_defs. */
5836 typedef struct {
5837 upb_def **defs;
5838 size_t len;
5839 size_t size;
5840 bool owned;
5841 } upb_deflist;
5842
5843 /* We keep a stack of all the messages scopes we are currently in, as well as
5844 * the top-level file scope. This is necessary to correctly qualify the
5845 * definitions that are contained inside. "name" tracks the name of the
5846 * message or package (a bare name -- not qualified by any enclosing scopes). */
5847 typedef struct {
5848 char *name;
5849 /* Index of the first def that is under this scope. For msgdefs, the
5850 * msgdef itself is at start-1. */
5851 int start;
5852 } upb_descreader_frame;
5853
5854 /* The maximum number of nested declarations that are allowed, ie.
5855 * message Foo {
5856 * message Bar {
5857 * message Baz {
5858 * }
5859 * }
5860 * }
5861 *
5862 * This is a resource limit that affects how big our runtime stack can grow.
5863 * TODO: make this a runtime-settable property of the Reader instance. */
5864 #define UPB_MAX_MESSAGE_NESTING 64
5865
5866 struct upb_descreader {
5867 upb_sink sink;
5868 upb_deflist defs;
5869 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5870 int stack_len;
5871
5872 uint32_t number;
5873 char *name;
5874 bool saw_number;
5875 bool saw_name;
5876
5877 char *default_string;
5878
5879 upb_fielddef *f;
5880 };
5881
5882 static char *upb_strndup(const char *buf, size_t n) {
5883 char *ret = malloc(n + 1);
5884 if (!ret) return NULL;
5885 memcpy(ret, buf, n);
5886 ret[n] = '\0';
5887 return ret;
5888 }
5889
5890 /* Returns a newly allocated string that joins input strings together, for
5891 * example:
5892 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
5893 * join("", "Baz") -> "Baz"
5894 * Caller owns a ref on the returned string. */
5895 static char *upb_join(const char *base, const char *name) {
5896 if (!base || strlen(base) == 0) {
5897 return upb_strdup(name);
5898 } else {
5899 char *ret = malloc(strlen(base) + strlen(name) + 2);
5900 ret[0] = '\0';
5901 strcat(ret, base);
5902 strcat(ret, ".");
5903 strcat(ret, name);
5904 return ret;
5905 }
5906 }
5907
5908
5909 /* upb_deflist ****************************************************************/
5910
5911 void upb_deflist_init(upb_deflist *l) {
5912 l->size = 0;
5913 l->defs = NULL;
5914 l->len = 0;
5915 l->owned = true;
5916 }
5917
5918 void upb_deflist_uninit(upb_deflist *l) {
5919 size_t i;
5920 if (l->owned)
5921 for(i = 0; i < l->len; i++)
5922 upb_def_unref(l->defs[i], l);
5923 free(l->defs);
5924 }
5925
5926 bool upb_deflist_push(upb_deflist *l, upb_def *d) {
5927 if(++l->len >= l->size) {
5928 size_t new_size = UPB_MAX(l->size, 4);
5929 new_size *= 2;
5930 l->defs = realloc(l->defs, new_size * sizeof(void *));
5931 if (!l->defs) return false;
5932 l->size = new_size;
5933 }
5934 l->defs[l->len - 1] = d;
5935 return true;
5936 }
5937
5938 void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
5939 size_t i;
5940 assert(l->owned);
5941 for (i = 0; i < l->len; i++)
5942 upb_def_donateref(l->defs[i], l, owner);
5943 l->owned = false;
5944 }
5945
5946 static upb_def *upb_deflist_last(upb_deflist *l) {
5947 return l->defs[l->len-1];
5948 }
5949
5950 /* Qualify the defname for all defs starting with offset "start" with "str". */
5951 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5952 uint32_t i;
5953 for (i = start; i < l->len; i++) {
5954 upb_def *def = l->defs[i];
5955 char *name = upb_join(str, upb_def_fullname(def));
5956 upb_def_setfullname(def, name, NULL);
5957 free(name);
5958 }
5959 }
5960
5961
5962 /* upb_descreader ************************************************************/
5963
5964 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5965 int index;
5966 assert(r->stack_len > 1);
5967 index = r->stack[r->stack_len-1].start - 1;
5968 assert(index >= 0);
5969 return upb_downcast_msgdef_mutable(r->defs.defs[index]);
5970 }
5971
5972 static upb_def *upb_descreader_last(upb_descreader *r) {
5973 return upb_deflist_last(&r->defs);
5974 }
5975
5976 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
5977 * entities that have names and can contain sub-definitions. */
5978 void upb_descreader_startcontainer(upb_descreader *r) {
5979 upb_descreader_frame *f = &r->stack[r->stack_len++];
5980 f->start = r->defs.len;
5981 f->name = NULL;
5982 }
5983
5984 void upb_descreader_endcontainer(upb_descreader *r) {
5985 upb_descreader_frame *f = &r->stack[--r->stack_len];
5986 upb_deflist_qualify(&r->defs, f->name, f->start);
5987 free(f->name);
5988 f->name = NULL;
5989 }
5990
5991 void upb_descreader_setscopename(upb_descreader *r, char *str) {
5992 upb_descreader_frame *f = &r->stack[r->stack_len-1];
5993 free(f->name);
5994 f->name = str;
5995 }
5996
5997 /* Handlers for google.protobuf.FileDescriptorProto. */
5998 static bool file_startmsg(void *r, const void *hd) {
5999 UPB_UNUSED(hd);
6000 upb_descreader_startcontainer(r);
6001 return true;
6002 }
6003
6004 static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
6005 upb_descreader *r = closure;
6006 UPB_UNUSED(hd);
6007 UPB_UNUSED(status);
6008 upb_descreader_endcontainer(r);
6009 return true;
6010 }
6011
6012 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6013 size_t n, const upb_bufhandle *handle) {
6014 upb_descreader *r = closure;
6015 UPB_UNUSED(hd);
6016 UPB_UNUSED(handle);
6017 /* XXX: see comment at the top of the file. */
6018 upb_descreader_setscopename(r, upb_strndup(buf, n));
6019 return n;
6020 }
6021
6022 /* Handlers for google.protobuf.EnumValueDescriptorProto. */
6023 static bool enumval_startmsg(void *closure, const void *hd) {
6024 upb_descreader *r = closure;
6025 UPB_UNUSED(hd);
6026 r->saw_number = false;
6027 r->saw_name = false;
6028 return true;
6029 }
6030
6031 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6032 size_t n, const upb_bufhandle *handle) {
6033 upb_descreader *r = closure;
6034 UPB_UNUSED(hd);
6035 UPB_UNUSED(handle);
6036 /* XXX: see comment at the top of the file. */
6037 free(r->name);
6038 r->name = upb_strndup(buf, n);
6039 r->saw_name = true;
6040 return n;
6041 }
6042
6043 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6044 upb_descreader *r = closure;
6045 UPB_UNUSED(hd);
6046 r->number = val;
6047 r->saw_number = true;
6048 return true;
6049 }
6050
6051 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6052 upb_descreader *r = closure;
6053 upb_enumdef *e;
6054 UPB_UNUSED(hd);
6055
6056 if(!r->saw_number || !r->saw_name) {
6057 upb_status_seterrmsg(status, "Enum value missing name or number.");
6058 return false;
6059 }
6060 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6061 upb_enumdef_addval(e, r->name, r->number, status);
6062 free(r->name);
6063 r->name = NULL;
6064 return true;
6065 }
6066
6067
6068 /* Handlers for google.protobuf.EnumDescriptorProto. */
6069 static bool enum_startmsg(void *closure, const void *hd) {
6070 upb_descreader *r = closure;
6071 UPB_UNUSED(hd);
6072 upb_deflist_push(&r->defs,
6073 upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
6074 return true;
6075 }
6076
6077 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6078 upb_descreader *r = closure;
6079 upb_enumdef *e;
6080 UPB_UNUSED(hd);
6081
6082 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6083 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6084 upb_status_seterrmsg(status, "Enum had no name.");
6085 return false;
6086 }
6087 if (upb_enumdef_numvals(e) == 0) {
6088 upb_status_seterrmsg(status, "Enum had no values.");
6089 return false;
6090 }
6091 return true;
6092 }
6093
6094 static size_t enum_onname(void *closure, const void *hd, const char *buf,
6095 size_t n, const upb_bufhandle *handle) {
6096 upb_descreader *r = closure;
6097 char *fullname = upb_strndup(buf, n);
6098 UPB_UNUSED(hd);
6099 UPB_UNUSED(handle);
6100 /* XXX: see comment at the top of the file. */
6101 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6102 free(fullname);
6103 return n;
6104 }
6105
6106 /* Handlers for google.protobuf.FieldDescriptorProto */
6107 static bool field_startmsg(void *closure, const void *hd) {
6108 upb_descreader *r = closure;
6109 UPB_UNUSED(hd);
6110 r->f = upb_fielddef_new(&r->defs);
6111 free(r->default_string);
6112 r->default_string = NULL;
6113
6114 /* fielddefs default to packed, but descriptors default to non-packed. */
6115 upb_fielddef_setpacked(r->f, false);
6116 return true;
6117 }
6118
6119 /* Converts the default value in string "str" into "d". Passes a ref on str.
6120 * Returns true on success. */
6121 static bool parse_default(char *str, upb_fielddef *f) {
6122 bool success = true;
6123 char *end;
6124 switch (upb_fielddef_type(f)) {
6125 case UPB_TYPE_INT32: {
6126 long val = strtol(str, &end, 0);
6127 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6128 success = false;
6129 else
6130 upb_fielddef_setdefaultint32(f, val);
6131 break;
6132 }
6133 case UPB_TYPE_INT64: {
6134 /* XXX: Need to write our own strtoll, since it's not available in c89. */
6135 long long val = strtol(str, &end, 0);
6136 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6137 success = false;
6138 else
6139 upb_fielddef_setdefaultint64(f, val);
6140 break;
6141 }
6142 case UPB_TYPE_UINT32: {
6143 unsigned long val = strtoul(str, &end, 0);
6144 if (val > UINT32_MAX || errno == ERANGE || *end)
6145 success = false;
6146 else
6147 upb_fielddef_setdefaultuint32(f, val);
6148 break;
6149 }
6150 case UPB_TYPE_UINT64: {
6151 /* XXX: Need to write our own strtoull, since it's not available in c89. * /
6152 unsigned long long val = strtoul(str, &end, 0);
6153 if (val > UINT64_MAX || errno == ERANGE || *end)
6154 success = false;
6155 else
6156 upb_fielddef_setdefaultuint64(f, val);
6157 break;
6158 }
6159 case UPB_TYPE_DOUBLE: {
6160 double val = strtod(str, &end);
6161 if (errno == ERANGE || *end)
6162 success = false;
6163 else
6164 upb_fielddef_setdefaultdouble(f, val);
6165 break;
6166 }
6167 case UPB_TYPE_FLOAT: {
6168 /* XXX: Need to write our own strtof, since it's not available in c89. */
6169 float val = strtod(str, &end);
6170 if (errno == ERANGE || *end)
6171 success = false;
6172 else
6173 upb_fielddef_setdefaultfloat(f, val);
6174 break;
6175 }
6176 case UPB_TYPE_BOOL: {
6177 if (strcmp(str, "false") == 0)
6178 upb_fielddef_setdefaultbool(f, false);
6179 else if (strcmp(str, "true") == 0)
6180 upb_fielddef_setdefaultbool(f, true);
6181 else
6182 success = false;
6183 break;
6184 }
6185 default: abort();
6186 }
6187 return success;
6188 }
6189
6190 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6191 upb_descreader *r = closure;
6192 upb_fielddef *f = r->f;
6193 UPB_UNUSED(hd);
6194
6195 /* TODO: verify that all required fields were present. */
6196 assert(upb_fielddef_number(f) != 0);
6197 assert(upb_fielddef_name(f) != NULL);
6198 assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6199
6200 if (r->default_string) {
6201 if (upb_fielddef_issubmsg(f)) {
6202 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6203 return false;
6204 }
6205 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6206 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6207 } else {
6208 if (r->default_string && !parse_default(r->default_string, f)) {
6209 /* We don't worry too much about giving a great error message since the
6210 * compiler should have ensured this was correct. */
6211 upb_status_seterrmsg(status, "Error converting default value.");
6212 return false;
6213 }
6214 }
6215 }
6216 return true;
6217 }
6218
6219 static bool field_onlazy(void *closure, const void *hd, bool val) {
6220 upb_descreader *r = closure;
6221 UPB_UNUSED(hd);
6222
6223 upb_fielddef_setlazy(r->f, val);
6224 return true;
6225 }
6226
6227 static bool field_onpacked(void *closure, const void *hd, bool val) {
6228 upb_descreader *r = closure;
6229 UPB_UNUSED(hd);
6230
6231 upb_fielddef_setpacked(r->f, val);
6232 return true;
6233 }
6234
6235 static bool field_ontype(void *closure, const void *hd, int32_t val) {
6236 upb_descreader *r = closure;
6237 UPB_UNUSED(hd);
6238
6239 upb_fielddef_setdescriptortype(r->f, val);
6240 return true;
6241 }
6242
6243 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6244 upb_descreader *r = closure;
6245 UPB_UNUSED(hd);
6246
6247 upb_fielddef_setlabel(r->f, val);
6248 return true;
6249 }
6250
6251 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6252 upb_descreader *r = closure;
6253 bool ok = upb_fielddef_setnumber(r->f, val, NULL);
6254 UPB_UNUSED(hd);
6255
6256 UPB_ASSERT_VAR(ok, ok);
6257 return true;
6258 }
6259
6260 static size_t field_onname(void *closure, const void *hd, const char *buf,
6261 size_t n, const upb_bufhandle *handle) {
6262 upb_descreader *r = closure;
6263 char *name = upb_strndup(buf, n);
6264 UPB_UNUSED(hd);
6265 UPB_UNUSED(handle);
6266
6267 /* XXX: see comment at the top of the file. */
6268 upb_fielddef_setname(r->f, name, NULL);
6269 free(name);
6270 return n;
6271 }
6272
6273 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6274 size_t n, const upb_bufhandle *handle) {
6275 upb_descreader *r = closure;
6276 char *name = upb_strndup(buf, n);
6277 UPB_UNUSED(hd);
6278 UPB_UNUSED(handle);
6279
6280 /* XXX: see comment at the top of the file. */
6281 upb_fielddef_setsubdefname(r->f, name, NULL);
6282 free(name);
6283 return n;
6284 }
6285
6286 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6287 size_t n, const upb_bufhandle *handle) {
6288 upb_descreader *r = closure;
6289 char *name = upb_strndup(buf, n);
6290 UPB_UNUSED(hd);
6291 UPB_UNUSED(handle);
6292
6293 /* XXX: see comment at the top of the file. */
6294 upb_fielddef_setcontainingtypename(r->f, name, NULL);
6295 free(name);
6296 return n;
6297 }
6298
6299 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6300 size_t n, const upb_bufhandle *handle) {
6301 upb_descreader *r = closure;
6302 UPB_UNUSED(hd);
6303 UPB_UNUSED(handle);
6304
6305 /* Have to convert from string to the correct type, but we might not know the
6306 * type yet, so we save it as a string until the end of the field.
6307 * XXX: see comment at the top of the file. */
6308 free(r->default_string);
6309 r->default_string = upb_strndup(buf, n);
6310 return n;
6311 }
6312
6313 /* Handlers for google.protobuf.DescriptorProto (representing a message). */
6314 static bool msg_startmsg(void *closure, const void *hd) {
6315 upb_descreader *r = closure;
6316 UPB_UNUSED(hd);
6317
6318 upb_deflist_push(&r->defs,
6319 upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
6320 upb_descreader_startcontainer(r);
6321 return true;
6322 }
6323
6324 static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
6325 upb_descreader *r = closure;
6326 upb_msgdef *m = upb_descreader_top(r);
6327 UPB_UNUSED(hd);
6328
6329 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6330 upb_status_seterrmsg(status, "Encountered message with no name.");
6331 return false;
6332 }
6333 upb_descreader_endcontainer(r);
6334 return true;
6335 }
6336
6337 static size_t msg_onname(void *closure, const void *hd, const char *buf,
6338 size_t n, const upb_bufhandle *handle) {
6339 upb_descreader *r = closure;
6340 upb_msgdef *m = upb_descreader_top(r);
6341 /* XXX: see comment at the top of the file. */
6342 char *name = upb_strndup(buf, n);
6343 UPB_UNUSED(hd);
6344 UPB_UNUSED(handle);
6345
6346 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6347 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
6348 return n;
6349 }
6350
6351 static bool msg_onendfield(void *closure, const void *hd) {
6352 upb_descreader *r = closure;
6353 upb_msgdef *m = upb_descreader_top(r);
6354 UPB_UNUSED(hd);
6355
6356 upb_msgdef_addfield(m, r->f, &r->defs, NULL);
6357 r->f = NULL;
6358 return true;
6359 }
6360
6361 static bool pushextension(void *closure, const void *hd) {
6362 upb_descreader *r = closure;
6363 UPB_UNUSED(hd);
6364
6365 assert(upb_fielddef_containingtypename(r->f));
6366 upb_fielddef_setisextension(r->f, true);
6367 upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
6368 r->f = NULL;
6369 return true;
6370 }
6371
6372 #define D(name) upbdefs_google_protobuf_ ## name(s)
6373
6374 static void reghandlers(const void *closure, upb_handlers *h) {
6375 const upb_symtab *s = closure;
6376 const upb_msgdef *m = upb_handlers_msgdef(h);
6377
6378 if (m == D(DescriptorProto)) {
6379 upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
6380 upb_handlers_setendmsg(h, &msg_endmsg, NULL);
6381 upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
6382 upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
6383 NULL);
6384 upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
6385 NULL);
6386 } else if (m == D(FileDescriptorProto)) {
6387 upb_handlers_setstartmsg(h, &file_startmsg, NULL);
6388 upb_handlers_setendmsg(h, &file_endmsg, NULL);
6389 upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
6390 NULL);
6391 upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextensio n,
6392 NULL);
6393 } else if (m == D(EnumValueDescriptorProto)) {
6394 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
6395 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
6396 upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
6397 upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumb er,
6398 NULL);
6399 } else if (m == D(EnumDescriptorProto)) {
6400 upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
6401 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
6402 upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
6403 } else if (m == D(FieldDescriptorProto)) {
6404 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
6405 upb_handlers_setendmsg(h, &field_endmsg, NULL);
6406 upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
6407 NULL);
6408 upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
6409 NULL);
6410 upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
6411 NULL);
6412 upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
6413 NULL);
6414 upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
6415 &field_ontypename, NULL);
6416 upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
6417 &field_onextendee, NULL);
6418 upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
6419 &field_ondefaultval, NULL);
6420 } else if (m == D(FieldOptions)) {
6421 upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
6422 upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
6423 }
6424 }
6425
6426 #undef D
6427
6428 void descreader_cleanup(void *_r) {
6429 upb_descreader *r = _r;
6430 free(r->name);
6431 upb_deflist_uninit(&r->defs);
6432 free(r->default_string);
6433 while (r->stack_len > 0) {
6434 upb_descreader_frame *f = &r->stack[--r->stack_len];
6435 free(f->name);
6436 }
6437 }
6438
6439
6440 /* Public API ****************************************************************/
6441
6442 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6443 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6444 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6445 return NULL;
6446 }
6447
6448 upb_deflist_init(&r->defs);
6449 upb_sink_reset(upb_descreader_input(r), h, r);
6450 r->stack_len = 0;
6451 r->name = NULL;
6452 r->default_string = NULL;
6453
6454 return r;
6455 }
6456
6457 upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6458 *n = r->defs.len;
6459 upb_deflist_donaterefs(&r->defs, owner);
6460 return r->defs.defs;
6461 }
6462
6463 upb_sink *upb_descreader_input(upb_descreader *r) {
6464 return &r->sink;
6465 }
6466
6467 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6468 const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6469 const upb_handlers *h = upb_handlers_newfrozen(
6470 upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
6471 upb_symtab_unref(s, &s);
6472 return h;
6473 }
6474 /*
6475 ** protobuf decoder bytecode compiler
6476 **
6477 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6478 ** according to that specific schema and destination handlers.
6479 **
6480 ** Compiling to bytecode is always the first step. If we are using the
6481 ** interpreted decoder we leave it as bytecode and interpret that. If we are
6482 ** using a JIT decoder we use a code generator to turn the bytecode into native
6483 ** code, LLVM IR, etc.
6484 **
6485 ** Bytecode definition is in decoder.int.h.
6486 */
6487
6488 #include <stdarg.h>
6489
6490 #ifdef UPB_DUMP_BYTECODE
6491 #include <stdio.h>
6492 #endif
6493
6494 #define MAXLABEL 5
6495 #define EMPTYLABEL -1
6496
6497 /* mgroup *********************************************************************/
6498
6499 static void freegroup(upb_refcounted *r) {
6500 mgroup *g = (mgroup*)r;
6501 upb_inttable_uninit(&g->methods);
6502 #ifdef UPB_USE_JIT_X64
6503 upb_pbdecoder_freejit(g);
6504 #endif
6505 free(g->bytecode);
6506 free(g);
6507 }
6508
6509 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
6510 void *closure) {
6511 const mgroup *g = (const mgroup*)r;
6512 upb_inttable_iter i;
6513 upb_inttable_begin(&i, &g->methods);
6514 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6515 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6516 visit(r, upb_pbdecodermethod_upcast(method), closure);
6517 }
6518 }
6519
6520 mgroup *newgroup(const void *owner) {
6521 mgroup *g = malloc(sizeof(*g));
6522 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
6523 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
6524 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6525 g->bytecode = NULL;
6526 g->bytecode_end = NULL;
6527 return g;
6528 }
6529
6530
6531 /* upb_pbdecodermethod ********************************************************/
6532
6533 static void freemethod(upb_refcounted *r) {
6534 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6535
6536 if (method->dest_handlers_) {
6537 upb_handlers_unref(method->dest_handlers_, method);
6538 }
6539
6540 upb_inttable_uninit(&method->dispatch);
6541 free(method);
6542 }
6543
6544 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
6545 void *closure) {
6546 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
6547 visit(r, m->group, closure);
6548 }
6549
6550 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6551 mgroup *group) {
6552 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
6553 upb_pbdecodermethod *ret = malloc(sizeof(*ret));
6554 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
6555 upb_byteshandler_init(&ret->input_handler_);
6556
6557 /* The method references the group and vice-versa, in a circular reference. */
6558 upb_ref2(ret, group);
6559 upb_ref2(group, ret);
6560 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
6561 upb_pbdecodermethod_unref(ret, &ret);
6562
6563 ret->group = mgroup_upcast_mutable(group);
6564 ret->dest_handlers_ = dest_handlers;
6565 ret->is_native_ = false; /* If we JIT, it will update this later. */
6566 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6567
6568 if (ret->dest_handlers_) {
6569 upb_handlers_ref(ret->dest_handlers_, ret);
6570 }
6571 return ret;
6572 }
6573
6574 const upb_handlers *upb_pbdecodermethod_desthandlers(
6575 const upb_pbdecodermethod *m) {
6576 return m->dest_handlers_;
6577 }
6578
6579 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6580 const upb_pbdecodermethod *m) {
6581 return &m->input_handler_;
6582 }
6583
6584 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6585 return m->is_native_;
6586 }
6587
6588 const upb_pbdecodermethod *upb_pbdecodermethod_new(
6589 const upb_pbdecodermethodopts *opts, const void *owner) {
6590 const upb_pbdecodermethod *ret;
6591 upb_pbcodecache cache;
6592
6593 upb_pbcodecache_init(&cache);
6594 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
6595 upb_pbdecodermethod_ref(ret, owner);
6596 upb_pbcodecache_uninit(&cache);
6597 return ret;
6598 }
6599
6600
6601 /* bytecode compiler **********************************************************/
6602
6603 /* Data used only at compilation time. */
6604 typedef struct {
6605 mgroup *group;
6606
6607 uint32_t *pc;
6608 int fwd_labels[MAXLABEL];
6609 int back_labels[MAXLABEL];
6610
6611 /* For fields marked "lazy", parse them lazily or eagerly? */
6612 bool lazy;
6613 } compiler;
6614
6615 static compiler *newcompiler(mgroup *group, bool lazy) {
6616 compiler *ret = malloc(sizeof(*ret));
6617 int i;
6618
6619 ret->group = group;
6620 ret->lazy = lazy;
6621 for (i = 0; i < MAXLABEL; i++) {
6622 ret->fwd_labels[i] = EMPTYLABEL;
6623 ret->back_labels[i] = EMPTYLABEL;
6624 }
6625 return ret;
6626 }
6627
6628 static void freecompiler(compiler *c) {
6629 free(c);
6630 }
6631
6632 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6633
6634 /* How many words an instruction is. */
6635 static int instruction_len(uint32_t instr) {
6636 switch (getop(instr)) {
6637 case OP_SETDISPATCH: return 1 + ptr_words;
6638 case OP_TAGN: return 3;
6639 case OP_SETBIGGROUPNUM: return 2;
6640 default: return 1;
6641 }
6642 }
6643
6644 bool op_has_longofs(int32_t instruction) {
6645 switch (getop(instruction)) {
6646 case OP_CALL:
6647 case OP_BRANCH:
6648 case OP_CHECKDELIM:
6649 return true;
6650 /* The "tag" instructions only have 8 bytes available for the jump target,
6651 * but that is ok because these opcodes only require short jumps. */
6652 case OP_TAG1:
6653 case OP_TAG2:
6654 case OP_TAGN:
6655 return false;
6656 default:
6657 assert(false);
6658 return false;
6659 }
6660 }
6661
6662 static int32_t getofs(uint32_t instruction) {
6663 if (op_has_longofs(instruction)) {
6664 return (int32_t)instruction >> 8;
6665 } else {
6666 return (int8_t)(instruction >> 8);
6667 }
6668 }
6669
6670 static void setofs(uint32_t *instruction, int32_t ofs) {
6671 if (op_has_longofs(*instruction)) {
6672 *instruction = getop(*instruction) | ofs << 8;
6673 } else {
6674 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6675 }
6676 assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
6677 }
6678
6679 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
6680
6681 /* Defines a local label at the current PC location. All previous forward
6682 * references are updated to point to this location. The location is noted
6683 * for any future backward references. */
6684 static void label(compiler *c, unsigned int label) {
6685 int val;
6686 uint32_t *codep;
6687
6688 assert(label < MAXLABEL);
6689 val = c->fwd_labels[label];
6690 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6691 while (codep) {
6692 int ofs = getofs(*codep);
6693 setofs(codep, c->pc - codep - instruction_len(*codep));
6694 codep = ofs ? codep + ofs : NULL;
6695 }
6696 c->fwd_labels[label] = EMPTYLABEL;
6697 c->back_labels[label] = pcofs(c);
6698 }
6699
6700 /* Creates a reference to a numbered label; either a forward reference
6701 * (positive arg) or backward reference (negative arg). For forward references
6702 * the value returned now is actually a "next" pointer into a linked list of all
6703 * instructions that use this label and will be patched later when the label is
6704 * defined with label().
6705 *
6706 * The returned value is the offset that should be written into the instruction.
6707 */
6708 static int32_t labelref(compiler *c, int label) {
6709 assert(label < MAXLABEL);
6710 if (label == LABEL_DISPATCH) {
6711 /* No resolving required. */
6712 return 0;
6713 } else if (label < 0) {
6714 /* Backward local label. Relative to the next instruction. */
6715 uint32_t from = (c->pc + 1) - c->group->bytecode;
6716 return c->back_labels[-label] - from;
6717 } else {
6718 /* Forward local label: prepend to (possibly-empty) linked list. */
6719 int *lptr = &c->fwd_labels[label];
6720 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6721 *lptr = pcofs(c);
6722 return ret;
6723 }
6724 }
6725
6726 static void put32(compiler *c, uint32_t v) {
6727 mgroup *g = c->group;
6728 if (c->pc == g->bytecode_end) {
6729 int ofs = pcofs(c);
6730 size_t oldsize = g->bytecode_end - g->bytecode;
6731 size_t newsize = UPB_MAX(oldsize * 2, 64);
6732 /* TODO(haberman): handle OOM. */
6733 g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
6734 g->bytecode_end = g->bytecode + newsize;
6735 c->pc = g->bytecode + ofs;
6736 }
6737 *c->pc++ = v;
6738 }
6739
6740 static void putop(compiler *c, opcode op, ...) {
6741 va_list ap;
6742 va_start(ap, op);
6743
6744 switch (op) {
6745 case OP_SETDISPATCH: {
6746 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6747 put32(c, OP_SETDISPATCH);
6748 put32(c, ptr);
6749 if (sizeof(uintptr_t) > sizeof(uint32_t))
6750 put32(c, (uint64_t)ptr >> 32);
6751 break;
6752 }
6753 case OP_STARTMSG:
6754 case OP_ENDMSG:
6755 case OP_PUSHLENDELIM:
6756 case OP_POP:
6757 case OP_SETDELIM:
6758 case OP_HALT:
6759 case OP_RET:
6760 case OP_DISPATCH:
6761 put32(c, op);
6762 break;
6763 case OP_PARSE_DOUBLE:
6764 case OP_PARSE_FLOAT:
6765 case OP_PARSE_INT64:
6766 case OP_PARSE_UINT64:
6767 case OP_PARSE_INT32:
6768 case OP_PARSE_FIXED64:
6769 case OP_PARSE_FIXED32:
6770 case OP_PARSE_BOOL:
6771 case OP_PARSE_UINT32:
6772 case OP_PARSE_SFIXED32:
6773 case OP_PARSE_SFIXED64:
6774 case OP_PARSE_SINT32:
6775 case OP_PARSE_SINT64:
6776 case OP_STARTSEQ:
6777 case OP_ENDSEQ:
6778 case OP_STARTSUBMSG:
6779 case OP_ENDSUBMSG:
6780 case OP_STARTSTR:
6781 case OP_STRING:
6782 case OP_ENDSTR:
6783 case OP_PUSHTAGDELIM:
6784 put32(c, op | va_arg(ap, upb_selector_t) << 8);
6785 break;
6786 case OP_SETBIGGROUPNUM:
6787 put32(c, op);
6788 put32(c, va_arg(ap, int));
6789 break;
6790 case OP_CALL: {
6791 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6792 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6793 break;
6794 }
6795 case OP_CHECKDELIM:
6796 case OP_BRANCH: {
6797 uint32_t instruction = op;
6798 int label = va_arg(ap, int);
6799 setofs(&instruction, labelref(c, label));
6800 put32(c, instruction);
6801 break;
6802 }
6803 case OP_TAG1:
6804 case OP_TAG2: {
6805 int label = va_arg(ap, int);
6806 uint64_t tag = va_arg(ap, uint64_t);
6807 uint32_t instruction = op | (tag << 16);
6808 assert(tag <= 0xffff);
6809 setofs(&instruction, labelref(c, label));
6810 put32(c, instruction);
6811 break;
6812 }
6813 case OP_TAGN: {
6814 int label = va_arg(ap, int);
6815 uint64_t tag = va_arg(ap, uint64_t);
6816 uint32_t instruction = op | (upb_value_size(tag) << 16);
6817 setofs(&instruction, labelref(c, label));
6818 put32(c, instruction);
6819 put32(c, tag);
6820 put32(c, tag >> 32);
6821 break;
6822 }
6823 }
6824
6825 va_end(ap);
6826 }
6827
6828 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
6829
6830 const char *upb_pbdecoder_getopname(unsigned int op) {
6831 #define QUOTE(x) #x
6832 #define EXPAND_AND_QUOTE(x) QUOTE(x)
6833 #define OPNAME(x) OP_##x
6834 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6835 #define T(x) OP(PARSE_##x)
6836 /* Keep in sync with list in decoder.int.h. */
6837 switch ((opcode)op) {
6838 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6839 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6840 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6841 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6842 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6843 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6844 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6845 }
6846 return "<unknown op>";
6847 #undef OP
6848 #undef T
6849 }
6850
6851 #endif
6852
6853 #ifdef UPB_DUMP_BYTECODE
6854
6855 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6856
6857 uint32_t *begin = p;
6858
6859 while (p < end) {
6860 fprintf(f, "%p %8tx", p, p - begin);
6861 uint32_t instr = *p++;
6862 uint8_t op = getop(instr);
6863 fprintf(f, " %s", upb_pbdecoder_getopname(op));
6864 switch ((opcode)op) {
6865 case OP_SETDISPATCH: {
6866 const upb_inttable *dispatch;
6867 memcpy(&dispatch, p, sizeof(void*));
6868 p += ptr_words;
6869 const upb_pbdecodermethod *method =
6870 (void *)((char *)dispatch -
6871 offsetof(upb_pbdecodermethod, dispatch));
6872 fprintf(f, " %s", upb_msgdef_fullname(
6873 upb_handlers_msgdef(method->dest_handlers_)));
6874 break;
6875 }
6876 case OP_DISPATCH:
6877 case OP_STARTMSG:
6878 case OP_ENDMSG:
6879 case OP_PUSHLENDELIM:
6880 case OP_POP:
6881 case OP_SETDELIM:
6882 case OP_HALT:
6883 case OP_RET:
6884 break;
6885 case OP_PARSE_DOUBLE:
6886 case OP_PARSE_FLOAT:
6887 case OP_PARSE_INT64:
6888 case OP_PARSE_UINT64:
6889 case OP_PARSE_INT32:
6890 case OP_PARSE_FIXED64:
6891 case OP_PARSE_FIXED32:
6892 case OP_PARSE_BOOL:
6893 case OP_PARSE_UINT32:
6894 case OP_PARSE_SFIXED32:
6895 case OP_PARSE_SFIXED64:
6896 case OP_PARSE_SINT32:
6897 case OP_PARSE_SINT64:
6898 case OP_STARTSEQ:
6899 case OP_ENDSEQ:
6900 case OP_STARTSUBMSG:
6901 case OP_ENDSUBMSG:
6902 case OP_STARTSTR:
6903 case OP_STRING:
6904 case OP_ENDSTR:
6905 case OP_PUSHTAGDELIM:
6906 fprintf(f, " %d", instr >> 8);
6907 break;
6908 case OP_SETBIGGROUPNUM:
6909 fprintf(f, " %d", *p++);
6910 break;
6911 case OP_CHECKDELIM:
6912 case OP_CALL:
6913 case OP_BRANCH:
6914 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6915 break;
6916 case OP_TAG1:
6917 case OP_TAG2: {
6918 fprintf(f, " tag:0x%x", instr >> 16);
6919 if (getofs(instr)) {
6920 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6921 }
6922 break;
6923 }
6924 case OP_TAGN: {
6925 uint64_t tag = *p++;
6926 tag |= (uint64_t)*p++ << 32;
6927 fprintf(f, " tag:0x%llx", (long long)tag);
6928 fprintf(f, " n:%d", instr >> 16);
6929 if (getofs(instr)) {
6930 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6931 }
6932 break;
6933 }
6934 }
6935 fputs("\n", f);
6936 }
6937 }
6938
6939 #endif
6940
6941 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6942 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6943 uint64_t encoded_tag = upb_vencode32(tag);
6944 /* No tag should be greater than 5 bytes. */
6945 assert(encoded_tag <= 0xffffffffff);
6946 return encoded_tag;
6947 }
6948
6949 static void putchecktag(compiler *c, const upb_fielddef *f,
6950 int wire_type, int dest) {
6951 uint64_t tag = get_encoded_tag(f, wire_type);
6952 switch (upb_value_size(tag)) {
6953 case 1:
6954 putop(c, OP_TAG1, dest, tag);
6955 break;
6956 case 2:
6957 putop(c, OP_TAG2, dest, tag);
6958 break;
6959 default:
6960 putop(c, OP_TAGN, dest, tag);
6961 break;
6962 }
6963 }
6964
6965 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6966 upb_selector_t selector;
6967 bool ok = upb_handlers_getselector(f, type, &selector);
6968 UPB_ASSERT_VAR(ok, ok);
6969 return selector;
6970 }
6971
6972 /* Takes an existing, primary dispatch table entry and repacks it with a
6973 * different alternate wire type. Called when we are inserting a secondary
6974 * dispatch table entry for an alternate wire type. */
6975 static uint64_t repack(uint64_t dispatch, int new_wt2) {
6976 uint64_t ofs;
6977 uint8_t wt1;
6978 uint8_t old_wt2;
6979 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6980 assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
6981 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6982 }
6983
6984 /* Marks the current bytecode position as the dispatch target for this message,
6985 * field, and wire type. */
6986 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6987 const upb_fielddef *f, int wire_type) {
6988 /* Offset is relative to msg base. */
6989 uint64_t ofs = pcofs(c) - method->code_base.ofs;
6990 uint32_t fn = upb_fielddef_number(f);
6991 upb_inttable *d = &method->dispatch;
6992 upb_value v;
6993 if (upb_inttable_remove(d, fn, &v)) {
6994 /* TODO: prioritize based on packed setting in .proto file. */
6995 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6996 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6997 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6998 } else {
6999 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7000 upb_inttable_insert(d, fn, upb_value_uint64(val));
7001 }
7002 }
7003
7004 static void putpush(compiler *c, const upb_fielddef *f) {
7005 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7006 putop(c, OP_PUSHLENDELIM);
7007 } else {
7008 uint32_t fn = upb_fielddef_number(f);
7009 if (fn >= 1 << 24) {
7010 putop(c, OP_PUSHTAGDELIM, 0);
7011 putop(c, OP_SETBIGGROUPNUM, fn);
7012 } else {
7013 putop(c, OP_PUSHTAGDELIM, fn);
7014 }
7015 }
7016 }
7017
7018 static upb_pbdecodermethod *find_submethod(const compiler *c,
7019 const upb_pbdecodermethod *method,
7020 const upb_fielddef *f) {
7021 const upb_handlers *sub =
7022 upb_handlers_getsubhandlers(method->dest_handlers_, f);
7023 upb_value v;
7024 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7025 ? upb_value_getptr(v)
7026 : NULL;
7027 }
7028
7029 static void putsel(compiler *c, opcode op, upb_selector_t sel,
7030 const upb_handlers *h) {
7031 if (upb_handlers_gethandler(h, sel)) {
7032 putop(c, op, sel);
7033 }
7034 }
7035
7036 /* Puts an opcode to call a callback, but only if a callback actually exists for
7037 * this field and handler type. */
7038 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7039 const upb_fielddef *f, upb_handlertype_t type) {
7040 putsel(c, op, getsel(f, type), h);
7041 }
7042
7043 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7044 if (!upb_fielddef_lazy(f))
7045 return false;
7046
7047 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7048 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7049 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7050 }
7051
7052
7053 /* bytecode compiler code generation ******************************************/
7054
7055 /* Symbolic names for our local labels. */
7056 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
7057 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
7058 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
7059 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
7060
7061 /* Generates bytecode to parse a single non-lazy message field. */
7062 static void generate_msgfield(compiler *c, const upb_fielddef *f,
7063 upb_pbdecodermethod *method) {
7064 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7065 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7066 int wire_type;
7067
7068 if (!sub_m) {
7069 /* Don't emit any code for this field at all; it will be parsed as an
7070 * unknown field. */
7071 return;
7072 }
7073
7074 label(c, LABEL_FIELD);
7075
7076 wire_type =
7077 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7078 ? UPB_WIRE_TYPE_DELIMITED
7079 : UPB_WIRE_TYPE_START_GROUP;
7080
7081 if (upb_fielddef_isseq(f)) {
7082 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7083 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7084 dispatchtarget(c, method, f, wire_type);
7085 putop(c, OP_PUSHTAGDELIM, 0);
7086 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7087 label(c, LABEL_LOOPSTART);
7088 putpush(c, f);
7089 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7090 putop(c, OP_CALL, sub_m);
7091 putop(c, OP_POP);
7092 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7093 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7094 putop(c, OP_SETDELIM);
7095 }
7096 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7097 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7098 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7099 label(c, LABEL_LOOPBREAK);
7100 putop(c, OP_POP);
7101 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7102 } else {
7103 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7104 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7105 dispatchtarget(c, method, f, wire_type);
7106 putpush(c, f);
7107 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7108 putop(c, OP_CALL, sub_m);
7109 putop(c, OP_POP);
7110 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7111 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7112 putop(c, OP_SETDELIM);
7113 }
7114 }
7115 }
7116
7117 /* Generates bytecode to parse a single string or lazy submessage field. */
7118 static void generate_delimfield(compiler *c, const upb_fielddef *f,
7119 upb_pbdecodermethod *method) {
7120 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7121
7122 label(c, LABEL_FIELD);
7123 if (upb_fielddef_isseq(f)) {
7124 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7125 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7126 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7127 putop(c, OP_PUSHTAGDELIM, 0);
7128 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7129 label(c, LABEL_LOOPSTART);
7130 putop(c, OP_PUSHLENDELIM);
7131 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7132 /* Need to emit even if no handler to skip past the string. */
7133 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7134 putop(c, OP_POP);
7135 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7136 putop(c, OP_SETDELIM);
7137 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7138 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7139 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7140 label(c, LABEL_LOOPBREAK);
7141 putop(c, OP_POP);
7142 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7143 } else {
7144 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7145 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7146 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7147 putop(c, OP_PUSHLENDELIM);
7148 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7149 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7150 putop(c, OP_POP);
7151 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7152 putop(c, OP_SETDELIM);
7153 }
7154 }
7155
7156 /* Generates bytecode to parse a single primitive field. */
7157 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7158 upb_pbdecodermethod *method) {
7159 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7160 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7161 opcode parse_type;
7162 upb_selector_t sel;
7163 int wire_type;
7164
7165 label(c, LABEL_FIELD);
7166
7167 /* From a decoding perspective, ENUM is the same as INT32. */
7168 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7169 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7170
7171 parse_type = (opcode)descriptor_type;
7172
7173 /* TODO(haberman): generate packed or non-packed first depending on "packed"
7174 * setting in the fielddef. This will favor (in speed) whichever was
7175 * specified. */
7176
7177 assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7178 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7179 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7180 if (upb_fielddef_isseq(f)) {
7181 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7182 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7183 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7184 putop(c, OP_PUSHLENDELIM);
7185 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
7186 label(c, LABEL_LOOPSTART);
7187 putop(c, parse_type, sel);
7188 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7189 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7190 dispatchtarget(c, method, f, wire_type);
7191 putop(c, OP_PUSHTAGDELIM, 0);
7192 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
7193 label(c, LABEL_LOOPSTART);
7194 putop(c, parse_type, sel);
7195 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7196 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7197 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7198 label(c, LABEL_LOOPBREAK);
7199 putop(c, OP_POP); /* Packed and non-packed join. */
7200 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7201 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
7202 } else {
7203 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7204 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7205 dispatchtarget(c, method, f, wire_type);
7206 putop(c, parse_type, sel);
7207 }
7208 }
7209
7210 /* Adds bytecode for parsing the given message to the given decoderplan,
7211 * while adding all dispatch targets to this message's dispatch table. */
7212 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7213 const upb_handlers *h;
7214 const upb_msgdef *md;
7215 uint32_t* start_pc;
7216 upb_msg_field_iter i;
7217 upb_value val;
7218
7219 assert(method);
7220
7221 /* Clear all entries in the dispatch table. */
7222 upb_inttable_uninit(&method->dispatch);
7223 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7224
7225 h = upb_pbdecodermethod_desthandlers(method);
7226 md = upb_handlers_msgdef(h);
7227
7228 method->code_base.ofs = pcofs(c);
7229 putop(c, OP_SETDISPATCH, &method->dispatch);
7230 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7231 label(c, LABEL_FIELD);
7232 start_pc = c->pc;
7233 for(upb_msg_field_begin(&i, md);
7234 !upb_msg_field_done(&i);
7235 upb_msg_field_next(&i)) {
7236 const upb_fielddef *f = upb_msg_iter_field(&i);
7237 upb_fieldtype_t type = upb_fielddef_type(f);
7238
7239 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7240 generate_msgfield(c, f, method);
7241 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7242 type == UPB_TYPE_MESSAGE) {
7243 generate_delimfield(c, f, method);
7244 } else {
7245 generate_primitivefield(c, f, method);
7246 }
7247 }
7248
7249 /* If there were no fields, or if no handlers were defined, we need to
7250 * generate a non-empty loop body so that we can at least dispatch for unknown
7251 * fields and check for the end of the message. */
7252 if (c->pc == start_pc) {
7253 /* Check for end-of-message. */
7254 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7255 /* Unconditionally dispatch. */
7256 putop(c, OP_DISPATCH, 0);
7257 }
7258
7259 /* For now we just loop back to the last field of the message (or if none,
7260 * the DISPATCH opcode for the message). */
7261 putop(c, OP_BRANCH, -LABEL_FIELD);
7262
7263 /* Insert both a label and a dispatch table entry for this end-of-msg. */
7264 label(c, LABEL_ENDMSG);
7265 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7266 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7267
7268 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7269 putop(c, OP_RET);
7270
7271 upb_inttable_compact(&method->dispatch);
7272 }
7273
7274 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7275 * Returns the method for these handlers.
7276 *
7277 * Generates a new method for every destination handlers reachable from "h". */
7278 static void find_methods(compiler *c, const upb_handlers *h) {
7279 upb_value v;
7280 upb_msg_field_iter i;
7281 const upb_msgdef *md;
7282
7283 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7284 return;
7285 newmethod(h, c->group);
7286
7287 /* Find submethods. */
7288 md = upb_handlers_msgdef(h);
7289 for(upb_msg_field_begin(&i, md);
7290 !upb_msg_field_done(&i);
7291 upb_msg_field_next(&i)) {
7292 const upb_fielddef *f = upb_msg_iter_field(&i);
7293 const upb_handlers *sub_h;
7294 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7295 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7296 /* We only generate a decoder method for submessages with handlers.
7297 * Others will be parsed as unknown fields. */
7298 find_methods(c, sub_h);
7299 }
7300 }
7301 }
7302
7303 /* (Re-)compile bytecode for all messages in "msgs."
7304 * Overwrites any existing bytecode in "c". */
7305 static void compile_methods(compiler *c) {
7306 upb_inttable_iter i;
7307
7308 /* Start over at the beginning of the bytecode. */
7309 c->pc = c->group->bytecode;
7310
7311 upb_inttable_begin(&i, &c->group->methods);
7312 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7313 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7314 compile_method(c, method);
7315 }
7316 }
7317
7318 static void set_bytecode_handlers(mgroup *g) {
7319 upb_inttable_iter i;
7320 upb_inttable_begin(&i, &g->methods);
7321 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7322 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
7323 upb_byteshandler *h = &m->input_handler_;
7324
7325 m->code_base.ptr = g->bytecode + m->code_base.ofs;
7326
7327 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
7328 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
7329 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
7330 }
7331 }
7332
7333
7334 /* JIT setup. *****************************************************************/
7335
7336 #ifdef UPB_USE_JIT_X64
7337
7338 static void sethandlers(mgroup *g, bool allowjit) {
7339 g->jit_code = NULL;
7340 if (allowjit) {
7341 /* Compile byte-code into machine code, create handlers. */
7342 upb_pbdecoder_jit(g);
7343 } else {
7344 set_bytecode_handlers(g);
7345 }
7346 }
7347
7348 #else /* UPB_USE_JIT_X64 */
7349
7350 static void sethandlers(mgroup *g, bool allowjit) {
7351 /* No JIT compiled in; use bytecode handlers unconditionally. */
7352 UPB_UNUSED(allowjit);
7353 set_bytecode_handlers(g);
7354 }
7355
7356 #endif /* UPB_USE_JIT_X64 */
7357
7358
7359 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
7360 * handlers and other mgroups (but verify we have a transitive closure). */
7361 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
7362 const void *owner) {
7363 mgroup *g;
7364 compiler *c;
7365
7366 UPB_UNUSED(allowjit);
7367 assert(upb_handlers_isfrozen(dest));
7368
7369 g = newgroup(owner);
7370 c = newcompiler(g, lazy);
7371 find_methods(c, dest);
7372
7373 /* We compile in two passes:
7374 * 1. all messages are assigned relative offsets from the beginning of the
7375 * bytecode (saved in method->code_base).
7376 * 2. forwards OP_CALL instructions can be correctly linked since message
7377 * offsets have been previously assigned.
7378 *
7379 * Could avoid the second pass by linking OP_CALL instructions somehow. */
7380 compile_methods(c);
7381 compile_methods(c);
7382 g->bytecode_end = c->pc;
7383 freecompiler(c);
7384
7385 #ifdef UPB_DUMP_BYTECODE
7386 {
7387 FILE *f = fopen("/tmp/upb-bytecode", "wb");
7388 assert(f);
7389 dumpbc(g->bytecode, g->bytecode_end, stderr);
7390 dumpbc(g->bytecode, g->bytecode_end, f);
7391 fclose(f);
7392 }
7393 #endif
7394
7395 sethandlers(g, allowjit);
7396 return g;
7397 }
7398
7399
7400 /* upb_pbcodecache ************************************************************/
7401
7402 void upb_pbcodecache_init(upb_pbcodecache *c) {
7403 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
7404 c->allow_jit_ = true;
7405 }
7406
7407 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
7408 upb_inttable_iter i;
7409 upb_inttable_begin(&i, &c->groups);
7410 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7411 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
7412 mgroup_unref(group, c);
7413 }
7414 upb_inttable_uninit(&c->groups);
7415 }
7416
7417 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
7418 return c->allow_jit_;
7419 }
7420
7421 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
7422 if (upb_inttable_count(&c->groups) > 0)
7423 return false;
7424 c->allow_jit_ = allow;
7425 return true;
7426 }
7427
7428 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
7429 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
7430 upb_value v;
7431 bool ok;
7432
7433 /* Right now we build a new DecoderMethod every time.
7434 * TODO(haberman): properly cache methods by their true key. */
7435 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
7436 upb_inttable_push(&c->groups, upb_value_constptr(g));
7437
7438 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
7439 UPB_ASSERT_VAR(ok, ok);
7440 return upb_value_getptr(v);
7441 }
7442
7443
7444 /* upb_pbdecodermethodopts ****************************************************/
7445
7446 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
7447 const upb_handlers *h) {
7448 opts->handlers = h;
7449 opts->lazy = false;
7450 }
7451
7452 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7453 opts->lazy = lazy;
7454 }
7455 /*
7456 ** upb::Decoder (Bytecode Decoder VM)
7457 **
7458 ** Bytecode must previously have been generated using the bytecode compiler in
7459 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
7460 ** parse the input.
7461 **
7462 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
7463 ** instruction and resume from there. A fair amount of the logic here is to
7464 ** handle the fact that values can span buffer seams and we have to be able to
7465 ** be capable of suspending/resuming from any byte in the stream. This
7466 ** sometimes requires keeping a few trailing bytes from the last buffer around
7467 ** in the "residual" buffer.
7468 */
7469
7470 #include <inttypes.h>
7471 #include <stddef.h>
7472
7473 #ifdef UPB_DUMP_BYTECODE
7474 #include <stdio.h>
7475 #endif
7476
7477 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
7478
7479 /* Error messages that are shared between the bytecode and JIT decoders. */
7480 const char *kPbDecoderStackOverflow = "Nesting too deep.";
7481
7482 /* Error messages shared within this file. */
7483 static const char *kUnterminatedVarint = "Unterminated varint.";
7484
7485 /* upb_pbdecoder **************************************************************/
7486
7487 static opcode halt = OP_HALT;
7488
7489 /* Whether an op consumes any of the input buffer. */
7490 static bool consumes_input(opcode op) {
7491 switch (op) {
7492 case OP_SETDISPATCH:
7493 case OP_STARTMSG:
7494 case OP_ENDMSG:
7495 case OP_STARTSEQ:
7496 case OP_ENDSEQ:
7497 case OP_STARTSUBMSG:
7498 case OP_ENDSUBMSG:
7499 case OP_STARTSTR:
7500 case OP_ENDSTR:
7501 case OP_PUSHTAGDELIM:
7502 case OP_POP:
7503 case OP_SETDELIM:
7504 case OP_SETBIGGROUPNUM:
7505 case OP_CHECKDELIM:
7506 case OP_CALL:
7507 case OP_RET:
7508 case OP_BRANCH:
7509 return false;
7510 default:
7511 return true;
7512 }
7513 }
7514
7515 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7516
7517 /* It's unfortunate that we have to micro-manage the compiler with
7518 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7519 * specific to one hardware configuration. But empirically on a Core i7,
7520 * performance increases 30-50% with these annotations. Every instance where
7521 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7522 * benchmarks. */
7523
7524 static void seterr(upb_pbdecoder *d, const char *msg) {
7525 upb_status status = UPB_STATUS_INIT;
7526 upb_status_seterrmsg(&status, msg);
7527 upb_env_reporterror(d->env, &status);
7528 }
7529
7530 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7531 seterr(d, msg);
7532 }
7533
7534
7535 /* Buffering ******************************************************************/
7536
7537 /* We operate on one buffer at a time, which is either the user's buffer passed
7538 * to our "decode" callback or some residual bytes from the previous buffer. */
7539
7540 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
7541 * or past the current delimited end. */
7542 static size_t curbufleft(const upb_pbdecoder *d) {
7543 assert(d->data_end >= d->ptr);
7544 return d->data_end - d->ptr;
7545 }
7546
7547 /* Overall stream offset of d->ptr. */
7548 uint64_t offset(const upb_pbdecoder *d) {
7549 return d->bufstart_ofs + (d->ptr - d->buf);
7550 }
7551
7552 /* Advances d->ptr. */
7553 static void advance(upb_pbdecoder *d, size_t len) {
7554 assert(curbufleft(d) >= len);
7555 d->ptr += len;
7556 }
7557
7558 static bool in_buf(const char *p, const char *buf, const char *end) {
7559 return p >= buf && p <= end;
7560 }
7561
7562 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7563 return in_buf(p, d->residual, d->residual_end);
7564 }
7565
7566 /* Calculates the delim_end value, which is affected by both the current buffer
7567 * and the parsing stack, so must be called whenever either is updated. */
7568 static void set_delim_end(upb_pbdecoder *d) {
7569 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7570 if (delim_ofs <= (size_t)(d->end - d->buf)) {
7571 d->delim_end = d->buf + delim_ofs;
7572 d->data_end = d->delim_end;
7573 } else {
7574 d->data_end = d->end;
7575 d->delim_end = NULL;
7576 }
7577 }
7578
7579 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7580 d->ptr = buf;
7581 d->buf = buf;
7582 d->end = end;
7583 set_delim_end(d);
7584 }
7585
7586 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7587 assert(curbufleft(d) == 0);
7588 d->bufstart_ofs += (d->end - d->buf);
7589 switchtobuf(d, buf, buf + len);
7590 }
7591
7592 static void checkpoint(upb_pbdecoder *d) {
7593 /* The assertion here is in the interests of efficiency, not correctness.
7594 * We are trying to ensure that we don't checkpoint() more often than
7595 * necessary. */
7596 assert(d->checkpoint != d->ptr);
7597 d->checkpoint = d->ptr;
7598 }
7599
7600 /* Resumes the decoder from an initial state or from a previous suspend. */
7601 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7602 size_t size, const upb_bufhandle *handle) {
7603 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
7604 d->buf_param = buf;
7605 d->size_param = size;
7606 d->handle = handle;
7607 if (d->residual_end > d->residual) {
7608 /* We have residual bytes from the last buffer. */
7609 assert(d->ptr == d->residual);
7610 } else {
7611 switchtobuf(d, buf, buf + size);
7612 }
7613 d->checkpoint = d->ptr;
7614 if (d->top->groupnum < 0) {
7615 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7616 d->checkpoint = d->ptr;
7617 }
7618 return DECODE_OK;
7619 }
7620
7621 /* Suspends the decoder at the last checkpoint, without saving any residual
7622 * bytes. If there are any unconsumed bytes, returns a short byte count. */
7623 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7624 d->pc = d->last;
7625 if (d->checkpoint == d->residual) {
7626 /* Checkpoint was in residual buf; no user bytes were consumed. */
7627 d->ptr = d->residual;
7628 return 0;
7629 } else {
7630 size_t consumed;
7631 assert(!in_residual_buf(d, d->checkpoint));
7632 assert(d->buf == d->buf_param);
7633
7634 consumed = d->checkpoint - d->buf;
7635 d->bufstart_ofs += consumed;
7636 d->residual_end = d->residual;
7637 switchtobuf(d, d->residual, d->residual_end);
7638 return consumed;
7639 }
7640 }
7641
7642 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
7643 * bytes in our residual buffer. This is necessary if we need more user
7644 * bytes to form a complete value, which might not be contiguous in the
7645 * user's buffers. Always consumes all user bytes. */
7646 static size_t suspend_save(upb_pbdecoder *d) {
7647 /* We hit end-of-buffer before we could parse a full value.
7648 * Save any unconsumed bytes (if any) to the residual buffer. */
7649 d->pc = d->last;
7650
7651 if (d->checkpoint == d->residual) {
7652 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7653 assert((d->residual_end - d->residual) + d->size_param <=
7654 sizeof(d->residual));
7655 if (!in_residual_buf(d, d->ptr)) {
7656 d->bufstart_ofs -= (d->residual_end - d->residual);
7657 }
7658 memcpy(d->residual_end, d->buf_param, d->size_param);
7659 d->residual_end += d->size_param;
7660 } else {
7661 /* Checkpoint was in user buf; old residual bytes not needed. */
7662 size_t save;
7663 assert(!in_residual_buf(d, d->checkpoint));
7664
7665 d->ptr = d->checkpoint;
7666 save = curbufleft(d);
7667 assert(save <= sizeof(d->residual));
7668 memcpy(d->residual, d->ptr, save);
7669 d->residual_end = d->residual + save;
7670 d->bufstart_ofs = offset(d);
7671 }
7672
7673 switchtobuf(d, d->residual, d->residual_end);
7674 return d->size_param;
7675 }
7676
7677 /* Skips "bytes" bytes in the stream, which may be more than available. If we
7678 * skip more bytes than are available, we return a long read count to the caller
7679 * indicating how many bytes the caller should skip before passing a new buffer.
7680 */
7681 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7682 assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7683 if (curbufleft(d) >= bytes) {
7684 /* Skipped data is all in current buffer. */
7685 advance(d, bytes);
7686 return DECODE_OK;
7687 } else {
7688 /* Skipped data extends beyond currently available buffers. */
7689 size_t skip;
7690 d->pc = d->last;
7691 skip = bytes - curbufleft(d);
7692 d->bufstart_ofs += (d->end - d->buf) + skip;
7693 d->residual_end = d->residual;
7694 switchtobuf(d, d->residual, d->residual_end);
7695 return d->size_param + skip;
7696 }
7697 }
7698
7699 /* Copies the next "bytes" bytes into "buf" and advances the stream.
7700 * Requires that this many bytes are available in the current buffer. */
7701 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7702 size_t bytes) {
7703 assert(bytes <= curbufleft(d));
7704 memcpy(buf, d->ptr, bytes);
7705 advance(d, bytes);
7706 }
7707
7708 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
7709 * available in the current buffer or not. Returns a status code as described
7710 * in decoder.int.h. */
7711 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7712 size_t bytes) {
7713 const size_t avail = curbufleft(d);
7714 consumebytes(d, buf, avail);
7715 bytes -= avail;
7716 assert(bytes > 0);
7717 if (in_residual_buf(d, d->ptr)) {
7718 advancetobuf(d, d->buf_param, d->size_param);
7719 }
7720 if (curbufleft(d) >= bytes) {
7721 consumebytes(d, (char *)buf + avail, bytes);
7722 return DECODE_OK;
7723 } else if (d->data_end == d->delim_end) {
7724 seterr(d, "Submessage ended in the middle of a value or group");
7725 return upb_pbdecoder_suspend(d);
7726 } else {
7727 return suspend_save(d);
7728 }
7729 }
7730
7731 /* Gets the next "bytes" bytes, regardless of whether they are available in the
7732 * current buffer or not. Returns a status code as described in decoder.int.h.
7733 */
7734 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7735 size_t bytes) {
7736 if (curbufleft(d) >= bytes) {
7737 /* Buffer has enough data to satisfy. */
7738 consumebytes(d, buf, bytes);
7739 return DECODE_OK;
7740 } else {
7741 return getbytes_slow(d, buf, bytes);
7742 }
7743 }
7744
7745 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7746 size_t bytes) {
7747 size_t ret = curbufleft(d);
7748 memcpy(buf, d->ptr, ret);
7749 if (in_residual_buf(d, d->ptr)) {
7750 size_t copy = UPB_MIN(bytes - ret, d->size_param);
7751 memcpy((char *)buf + ret, d->buf_param, copy);
7752 ret += copy;
7753 }
7754 return ret;
7755 }
7756
7757 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7758 size_t bytes) {
7759 if (curbufleft(d) >= bytes) {
7760 memcpy(buf, d->ptr, bytes);
7761 return bytes;
7762 } else {
7763 return peekbytes_slow(d, buf, bytes);
7764 }
7765 }
7766
7767
7768 /* Decoding of wire types *****************************************************/
7769
7770 /* Slow path for decoding a varint from the current buffer position.
7771 * Returns a status code as described in decoder.int.h. */
7772 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7773 uint64_t *u64) {
7774 uint8_t byte = 0x80;
7775 int bitpos;
7776 *u64 = 0;
7777 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7778 int32_t ret = getbytes(d, &byte, 1);
7779 if (ret >= 0) return ret;
7780 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7781 }
7782 if(bitpos == 70 && (byte & 0x80)) {
7783 seterr(d, kUnterminatedVarint);
7784 return upb_pbdecoder_suspend(d);
7785 }
7786 return DECODE_OK;
7787 }
7788
7789 /* Decodes a varint from the current buffer position.
7790 * Returns a status code as described in decoder.int.h. */
7791 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7792 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7793 *u64 = *d->ptr;
7794 advance(d, 1);
7795 return DECODE_OK;
7796 } else if (curbufleft(d) >= 10) {
7797 /* Fast case. */
7798 upb_decoderet r = upb_vdecode_fast(d->ptr);
7799 if (r.p == NULL) {
7800 seterr(d, kUnterminatedVarint);
7801 return upb_pbdecoder_suspend(d);
7802 }
7803 advance(d, r.p - d->ptr);
7804 *u64 = r.val;
7805 return DECODE_OK;
7806 } else {
7807 /* Slow case -- varint spans buffer seam. */
7808 return upb_pbdecoder_decode_varint_slow(d, u64);
7809 }
7810 }
7811
7812 /* Decodes a 32-bit varint from the current buffer position.
7813 * Returns a status code as described in decoder.int.h. */
7814 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7815 uint64_t u64;
7816 int32_t ret = decode_varint(d, &u64);
7817 if (ret >= 0) return ret;
7818 if (u64 > UINT32_MAX) {
7819 seterr(d, "Unterminated 32-bit varint");
7820 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7821 * so we know this path will always be treated as error by our caller.
7822 * Right now the size_t -> int32_t can overflow and produce negative values.
7823 */
7824 *u32 = 0;
7825 return upb_pbdecoder_suspend(d);
7826 }
7827 *u32 = u64;
7828 return DECODE_OK;
7829 }
7830
7831 /* Decodes a fixed32 from the current buffer position.
7832 * Returns a status code as described in decoder.int.h.
7833 * TODO: proper byte swapping for big-endian machines. */
7834 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7835 return getbytes(d, u32, 4);
7836 }
7837
7838 /* Decodes a fixed64 from the current buffer position.
7839 * Returns a status code as described in decoder.int.h.
7840 * TODO: proper byte swapping for big-endian machines. */
7841 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7842 return getbytes(d, u64, 8);
7843 }
7844
7845 /* Non-static versions of the above functions.
7846 * These are called by the JIT for fallback paths. */
7847 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7848 return decode_fixed32(d, u32);
7849 }
7850
7851 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7852 return decode_fixed64(d, u64);
7853 }
7854
7855 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
7856 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
7857
7858 /* Pushes a frame onto the decoder stack. */
7859 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7860 upb_pbdecoder_frame *fr = d->top;
7861
7862 if (end > fr->end_ofs) {
7863 seterr(d, "Submessage end extends past enclosing submessage.");
7864 return false;
7865 } else if (fr == d->limit) {
7866 seterr(d, kPbDecoderStackOverflow);
7867 return false;
7868 }
7869
7870 fr++;
7871 fr->end_ofs = end;
7872 fr->dispatch = NULL;
7873 fr->groupnum = 0;
7874 d->top = fr;
7875 return true;
7876 }
7877
7878 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7879 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7880 * field number) prior to hitting any enclosing submessage end, pushing our
7881 * existing delim end prevents us from continuing to parse values from a
7882 * corrupt proto that doesn't give us an END tag in time. */
7883 if (!decoder_push(d, d->top->end_ofs))
7884 return false;
7885 d->top->groupnum = arg;
7886 return true;
7887 }
7888
7889 /* Pops a frame from the decoder stack. */
7890 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7891
7892 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7893 uint64_t expected) {
7894 uint64_t data = 0;
7895 size_t bytes = upb_value_size(expected);
7896 size_t read = peekbytes(d, &data, bytes);
7897 if (read == bytes && data == expected) {
7898 /* Advance past matched bytes. */
7899 int32_t ok = getbytes(d, &data, read);
7900 UPB_ASSERT_VAR(ok, ok < 0);
7901 return DECODE_OK;
7902 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7903 return suspend_save(d);
7904 } else {
7905 return DECODE_MISMATCH;
7906 }
7907 }
7908
7909 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7910 uint8_t wire_type) {
7911 if (fieldnum >= 0)
7912 goto have_tag;
7913
7914 while (true) {
7915 uint32_t tag;
7916 CHECK_RETURN(decode_v32(d, &tag));
7917 wire_type = tag & 0x7;
7918 fieldnum = tag >> 3;
7919
7920 have_tag:
7921 if (fieldnum == 0) {
7922 seterr(d, "Saw invalid field number (0)");
7923 return upb_pbdecoder_suspend(d);
7924 }
7925
7926 /* TODO: deliver to unknown field callback. */
7927 switch (wire_type) {
7928 case UPB_WIRE_TYPE_32BIT:
7929 CHECK_RETURN(skip(d, 4));
7930 break;
7931 case UPB_WIRE_TYPE_64BIT:
7932 CHECK_RETURN(skip(d, 8));
7933 break;
7934 case UPB_WIRE_TYPE_VARINT: {
7935 uint64_t u64;
7936 CHECK_RETURN(decode_varint(d, &u64));
7937 break;
7938 }
7939 case UPB_WIRE_TYPE_DELIMITED: {
7940 uint32_t len;
7941 CHECK_RETURN(decode_v32(d, &len));
7942 CHECK_RETURN(skip(d, len));
7943 break;
7944 }
7945 case UPB_WIRE_TYPE_START_GROUP:
7946 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
7947 break;
7948 case UPB_WIRE_TYPE_END_GROUP:
7949 if (fieldnum == -d->top->groupnum) {
7950 decoder_pop(d);
7951 } else if (fieldnum == d->top->groupnum) {
7952 return DECODE_ENDGROUP;
7953 } else {
7954 seterr(d, "Unmatched ENDGROUP tag.");
7955 return upb_pbdecoder_suspend(d);
7956 }
7957 break;
7958 default:
7959 seterr(d, "Invalid wire type");
7960 return upb_pbdecoder_suspend(d);
7961 }
7962
7963 if (d->top->groupnum >= 0) {
7964 return DECODE_OK;
7965 }
7966
7967 if (d->ptr == d->delim_end) {
7968 seterr(d, "Enclosing submessage ended in the middle of value or group");
7969 /* Unlike most errors we notice during parsing, right now we have consumed
7970 * all of the user's input.
7971 *
7972 * There are three different options for how to handle this case:
7973 *
7974 * 1. decode() = short count, error = set
7975 * 2. decode() = full count, error = set
7976 * 3. decode() = full count, error NOT set, short count and error will
7977 * be reported on next call to decode() (or end())
7978 *
7979 * (1) and (3) have the advantage that they preserve the invariant that an
7980 * error occurs iff decode() returns a short count.
7981 *
7982 * (2) and (3) have the advantage of reflecting the fact that all of the
7983 * bytes were in fact parsed (and possibly delivered to the unknown field
7984 * handler, in the future when that is supported).
7985 *
7986 * (3) requires extra state in the decode (a place to store the "permanent
7987 * error" that we should return for all subsequent attempts to decode).
7988 * But we likely want this anyway.
7989 *
7990 * Right now we do (1), thanks to the fact that we checkpoint *after* this
7991 * check. (3) may be a better choice long term; unclear at the moment. */
7992 return upb_pbdecoder_suspend(d);
7993 }
7994
7995 checkpoint(d);
7996 }
7997 }
7998
7999 static void goto_endmsg(upb_pbdecoder *d) {
8000 upb_value v;
8001 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8002 UPB_ASSERT_VAR(found, found);
8003 d->pc = d->top->base + upb_value_getuint64(v);
8004 }
8005
8006 /* Parses a tag and jumps to the corresponding bytecode instruction for this
8007 * field.
8008 *
8009 * If the tag is unknown (or the wire type doesn't match), parses the field as
8010 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
8011 * instruction for the end of message. */
8012 static int32_t dispatch(upb_pbdecoder *d) {
8013 upb_inttable *dispatch = d->top->dispatch;
8014 uint32_t tag;
8015 uint8_t wire_type;
8016 uint32_t fieldnum;
8017 upb_value val;
8018 int32_t ret;
8019
8020 /* Decode tag. */
8021 CHECK_RETURN(decode_v32(d, &tag));
8022 wire_type = tag & 0x7;
8023 fieldnum = tag >> 3;
8024
8025 /* Lookup tag. Because of packed/non-packed compatibility, we have to
8026 * check the wire type against two possibilities. */
8027 if (fieldnum != DISPATCH_ENDMSG &&
8028 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8029 uint64_t v = upb_value_getuint64(val);
8030 if (wire_type == (v & 0xff)) {
8031 d->pc = d->top->base + (v >> 16);
8032 return DECODE_OK;
8033 } else if (wire_type == ((v >> 8) & 0xff)) {
8034 bool found =
8035 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8036 UPB_ASSERT_VAR(found, found);
8037 d->pc = d->top->base + upb_value_getuint64(val);
8038 return DECODE_OK;
8039 }
8040 }
8041
8042 /* Unknown field or ENDGROUP. */
8043 ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8044
8045 if (ret == DECODE_ENDGROUP) {
8046 goto_endmsg(d);
8047 return DECODE_OK;
8048 } else if (ret == DECODE_OK) {
8049 /* We just consumed some input, so we might now have consumed all the data
8050 * in the delmited region. Since every opcode that can trigger dispatch is
8051 * directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
8052 * delimited end. */
8053 d->pc = d->last - 1;
8054 assert(getop(*d->pc) == OP_CHECKDELIM);
8055 return DECODE_OK;
8056 }
8057
8058 return ret;
8059 }
8060
8061 /* Callers know that the stack is more than one deep because the opcodes that
8062 * call this only occur after PUSH operations. */
8063 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8064 assert(d->top != d->stack);
8065 return d->top - 1;
8066 }
8067
8068
8069 /* The main decoding loop *****************************************************/
8070
8071 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
8072 * switch() statement. */
8073 size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
8074 size_t size, const upb_bufhandle *handle) {
8075 upb_pbdecoder *d = closure;
8076 const mgroup *group = hd;
8077 int32_t result;
8078 assert(buf);
8079 result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
8080 if (result == DECODE_ENDGROUP) {
8081 goto_endmsg(d);
8082 }
8083 CHECK_RETURN(result);
8084 UPB_UNUSED(group);
8085
8086 #define VMCASE(op, code) \
8087 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8088 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8089 VMCASE(OP_PARSE_ ## type, { \
8090 ctype val; \
8091 CHECK_RETURN(decode_ ## wt(d, &val)); \
8092 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8093 })
8094
8095 while(1) {
8096 int32_t instruction;
8097 opcode op;
8098 uint32_t arg;
8099 int32_t longofs;
8100
8101 d->last = d->pc;
8102 instruction = *d->pc++;
8103 op = getop(instruction);
8104 arg = instruction >> 8;
8105 longofs = arg;
8106 assert(d->ptr != d->residual_end);
8107 #ifdef UPB_DUMP_BYTECODE
8108 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8109 "%x %s (%d)\n",
8110 (int)offset(d),
8111 (int)(d->ptr - d->buf),
8112 (int)(d->data_end - d->ptr),
8113 (int)(d->end - d->ptr),
8114 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8115 (int)(d->pc - 1 - group->bytecode),
8116 upb_pbdecoder_getopname(op),
8117 arg);
8118 #endif
8119 switch (op) {
8120 /* Technically, we are losing data if we see a 32-bit varint that is not
8121 * properly sign-extended. We could detect this and error about the data
8122 * loss, but proto2 does not do this, so we pass. */
8123 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
8124 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
8125 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
8126 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
8127 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
8128 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
8129 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
8130 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
8131 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
8132 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
8133 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
8134 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
8135 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
8136
8137 VMCASE(OP_SETDISPATCH,
8138 d->top->base = d->pc - 1;
8139 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8140 d->pc += sizeof(void*) / sizeof(uint32_t);
8141 )
8142 VMCASE(OP_STARTMSG,
8143 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8144 )
8145 VMCASE(OP_ENDMSG,
8146 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8147 )
8148 VMCASE(OP_STARTSEQ,
8149 upb_pbdecoder_frame *outer = outer_frame(d);
8150 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8151 )
8152 VMCASE(OP_ENDSEQ,
8153 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8154 )
8155 VMCASE(OP_STARTSUBMSG,
8156 upb_pbdecoder_frame *outer = outer_frame(d);
8157 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8158 )
8159 VMCASE(OP_ENDSUBMSG,
8160 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8161 )
8162 VMCASE(OP_STARTSTR,
8163 uint32_t len = d->top->end_ofs - offset(d);
8164 upb_pbdecoder_frame *outer = outer_frame(d);
8165 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8166 if (len == 0) {
8167 d->pc++; /* Skip OP_STRING. */
8168 }
8169 )
8170 VMCASE(OP_STRING,
8171 uint32_t len = curbufleft(d);
8172 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8173 if (n > len) {
8174 if (n > d->top->end_ofs - offset(d)) {
8175 seterr(d, "Tried to skip past end of string.");
8176 return upb_pbdecoder_suspend(d);
8177 } else {
8178 int32_t ret = skip(d, n);
8179 /* This shouldn't return DECODE_OK, because n > len. */
8180 assert(ret >= 0);
8181 return ret;
8182 }
8183 }
8184 advance(d, n);
8185 if (n < len || d->delim_end == NULL) {
8186 /* We aren't finished with this string yet. */
8187 d->pc--; /* Repeat OP_STRING. */
8188 if (n > 0) checkpoint(d);
8189 return upb_pbdecoder_suspend(d);
8190 }
8191 )
8192 VMCASE(OP_ENDSTR,
8193 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8194 )
8195 VMCASE(OP_PUSHTAGDELIM,
8196 CHECK_SUSPEND(pushtagdelim(d, arg));
8197 )
8198 VMCASE(OP_SETBIGGROUPNUM,
8199 d->top->groupnum = *d->pc++;
8200 )
8201 VMCASE(OP_POP,
8202 assert(d->top > d->stack);
8203 decoder_pop(d);
8204 )
8205 VMCASE(OP_PUSHLENDELIM,
8206 uint32_t len;
8207 CHECK_RETURN(decode_v32(d, &len));
8208 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8209 set_delim_end(d);
8210 )
8211 VMCASE(OP_SETDELIM,
8212 set_delim_end(d);
8213 )
8214 VMCASE(OP_CHECKDELIM,
8215 /* We are guaranteed of this assert because we never allow ourselves to
8216 * consume bytes beyond data_end, which covers delim_end when non-NULL.
8217 */
8218 assert(!(d->delim_end && d->ptr > d->delim_end));
8219 if (d->ptr == d->delim_end)
8220 d->pc += longofs;
8221 )
8222 VMCASE(OP_CALL,
8223 d->callstack[d->call_len++] = d->pc;
8224 d->pc += longofs;
8225 )
8226 VMCASE(OP_RET,
8227 assert(d->call_len > 0);
8228 d->pc = d->callstack[--d->call_len];
8229 )
8230 VMCASE(OP_BRANCH,
8231 d->pc += longofs;
8232 )
8233 VMCASE(OP_TAG1,
8234 uint8_t expected;
8235 CHECK_SUSPEND(curbufleft(d) > 0);
8236 expected = (arg >> 8) & 0xff;
8237 if (*d->ptr == expected) {
8238 advance(d, 1);
8239 } else {
8240 int8_t shortofs;
8241 badtag:
8242 shortofs = arg;
8243 if (shortofs == LABEL_DISPATCH) {
8244 CHECK_RETURN(dispatch(d));
8245 } else {
8246 d->pc += shortofs;
8247 break; /* Avoid checkpoint(). */
8248 }
8249 }
8250 )
8251 VMCASE(OP_TAG2,
8252 uint16_t expected;
8253 CHECK_SUSPEND(curbufleft(d) > 0);
8254 expected = (arg >> 8) & 0xffff;
8255 if (curbufleft(d) >= 2) {
8256 uint16_t actual;
8257 memcpy(&actual, d->ptr, 2);
8258 if (expected == actual) {
8259 advance(d, 2);
8260 } else {
8261 goto badtag;
8262 }
8263 } else {
8264 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
8265 if (result == DECODE_MISMATCH) goto badtag;
8266 if (result >= 0) return result;
8267 }
8268 )
8269 VMCASE(OP_TAGN, {
8270 uint64_t expected;
8271 int32_t result;
8272 memcpy(&expected, d->pc, 8);
8273 d->pc += 2;
8274 result = upb_pbdecoder_checktag_slow(d, expected);
8275 if (result == DECODE_MISMATCH) goto badtag;
8276 if (result >= 0) return result;
8277 })
8278 VMCASE(OP_DISPATCH, {
8279 CHECK_RETURN(dispatch(d));
8280 })
8281 VMCASE(OP_HALT, {
8282 return size;
8283 })
8284 }
8285 }
8286 }
8287
8288 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
8289 upb_pbdecoder *d = closure;
8290 UPB_UNUSED(size_hint);
8291 d->top->end_ofs = UINT64_MAX;
8292 d->bufstart_ofs = 0;
8293 d->call_len = 1;
8294 d->callstack[0] = &halt;
8295 d->pc = pc;
8296 return d;
8297 }
8298
8299 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
8300 upb_pbdecoder *d = closure;
8301 UPB_UNUSED(hd);
8302 UPB_UNUSED(size_hint);
8303 d->top->end_ofs = UINT64_MAX;
8304 d->bufstart_ofs = 0;
8305 d->call_len = 0;
8306 return d;
8307 }
8308
8309 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
8310 upb_pbdecoder *d = closure;
8311 const upb_pbdecodermethod *method = handler_data;
8312 uint64_t end;
8313 char dummy;
8314 #ifdef UPB_USE_JIT_X64
8315 const mgroup *group = (const mgroup*)method->group;
8316 #endif
8317
8318 if (d->residual_end > d->residual) {
8319 seterr(d, "Unexpected EOF");
8320 return false;
8321 }
8322
8323 if (d->top->end_ofs != UINT64_MAX) {
8324 seterr(d, "Unexpected EOF inside delimited string");
8325 return false;
8326 }
8327
8328 /* Message ends here. */
8329 end = offset(d);
8330 d->top->end_ofs = end;
8331
8332 #ifdef UPB_USE_JIT_X64
8333 if (group->jit_code) {
8334 if (d->top != d->stack)
8335 d->stack->end_ofs = 0;
8336 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
8337 } else
8338 #endif
8339 {
8340 const uint32_t *p = d->pc;
8341 d->stack->end_ofs = end;
8342 /* Check the previous bytecode, but guard against beginning. */
8343 if (p != method->code_base.ptr) p--;
8344 if (getop(*p) == OP_CHECKDELIM) {
8345 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
8346 assert(getop(*d->pc) == OP_TAG1 ||
8347 getop(*d->pc) == OP_TAG2 ||
8348 getop(*d->pc) == OP_TAGN ||
8349 getop(*d->pc) == OP_DISPATCH);
8350 d->pc = p;
8351 }
8352 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
8353 }
8354
8355 if (d->call_len != 0) {
8356 seterr(d, "Unexpected EOF");
8357 return false;
8358 }
8359
8360 return true;
8361 }
8362
8363 void upb_pbdecoder_reset(upb_pbdecoder *d) {
8364 d->top = d->stack;
8365 d->top->groupnum = 0;
8366 d->ptr = d->residual;
8367 d->buf = d->residual;
8368 d->end = d->residual;
8369 d->residual_end = d->residual;
8370 }
8371
8372 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
8373 UPB_UNUSED(d);
8374 return entries * sizeof(upb_pbdecoder_frame);
8375 }
8376
8377 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
8378 UPB_UNUSED(d);
8379
8380 #ifdef UPB_USE_JIT_X64
8381 if (d->method_->is_native_) {
8382 /* Each native stack frame needs two pointers, plus we need a few frames for
8383 * the enter/exit trampolines. */
8384 size_t ret = entries * sizeof(void*) * 2;
8385 ret += sizeof(void*) * 10;
8386 return ret;
8387 }
8388 #endif
8389
8390 return entries * sizeof(uint32_t*);
8391 }
8392
8393 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8394 upb_sink *sink) {
8395 const size_t default_max_nesting = 64;
8396 #ifndef NDEBUG
8397 size_t size_before = upb_env_bytesallocated(e);
8398 #endif
8399
8400 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8401 if (!d) return NULL;
8402
8403 d->method_ = m;
8404 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8405 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8406 if (!d->stack || !d->callstack) {
8407 return NULL;
8408 }
8409
8410 d->env = e;
8411 d->limit = d->stack + default_max_nesting - 1;
8412 d->stack_size = default_max_nesting;
8413
8414 upb_pbdecoder_reset(d);
8415 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8416
8417 assert(sink);
8418 if (d->method_->dest_handlers_) {
8419 if (sink->handlers != d->method_->dest_handlers_)
8420 return NULL;
8421 }
8422 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
8423
8424 /* If this fails, increase the value in decoder.h. */
8425 assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8426 return d;
8427 }
8428
8429 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8430 return offset(d);
8431 }
8432
8433 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8434 return d->method_;
8435 }
8436
8437 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8438 return &d->input_;
8439 }
8440
8441 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8442 return d->stack_size;
8443 }
8444
8445 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8446 assert(d->top >= d->stack);
8447
8448 if (max < (size_t)(d->top - d->stack)) {
8449 /* Can't set a limit smaller than what we are currently at. */
8450 return false;
8451 }
8452
8453 if (max > d->stack_size) {
8454 /* Need to reallocate stack and callstack to accommodate. */
8455 size_t old_size = stacksize(d, d->stack_size);
8456 size_t new_size = stacksize(d, max);
8457 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8458 if (!p) {
8459 return false;
8460 }
8461 d->stack = p;
8462
8463 old_size = callstacksize(d, d->stack_size);
8464 new_size = callstacksize(d, max);
8465 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8466 if (!p) {
8467 return false;
8468 }
8469 d->callstack = p;
8470
8471 d->stack_size = max;
8472 }
8473
8474 d->limit = d->stack + max - 1;
8475 return true;
8476 }
8477 /*
8478 ** upb::Encoder
8479 **
8480 ** Since we are implementing pure handlers (ie. without any out-of-band access
8481 ** to pre-computed lengths), we have to buffer all submessages before we can
8482 ** emit even their first byte.
8483 **
8484 ** Not knowing the size of submessages also means we can't write a perfect
8485 ** zero-copy implementation, even with buffering. Lengths are stored as
8486 ** varints, which means that we don't know how many bytes to reserve for the
8487 ** length until we know what the length is.
8488 **
8489 ** This leaves us with three main choices:
8490 **
8491 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
8492 ** once into the output buffer.
8493 **
8494 ** 2. attempt to buffer data directly into the output buffer, estimating how
8495 ** many bytes each length will take. When our guesses are wrong, use
8496 ** memmove() to grow or shrink the allotted space.
8497 **
8498 ** 3. buffer directly into the output buffer, allocating a max length
8499 ** ahead-of-time for each submessage length. If we overallocated, we waste
8500 ** space, but no memcpy() or memmove() is required. This approach requires
8501 ** defining a maximum size for submessages and rejecting submessages that
8502 ** exceed that size.
8503 **
8504 ** (2) and (3) have the potential to have better performance, but they are more
8505 ** complicated and subtle to implement:
8506 **
8507 ** (3) requires making an arbitrary choice of the maximum message size; it
8508 ** wastes space when submessages are shorter than this and fails
8509 ** completely when they are longer. This makes it more finicky and
8510 ** requires configuration based on the input. It also makes it impossible
8511 ** to perfectly match the output of reference encoders that always use the
8512 ** optimal amount of space for each length.
8513 **
8514 ** (2) requires guessing the the size upfront, and if multiple lengths are
8515 ** guessed wrong the minimum required number of memmove() operations may
8516 ** be complicated to compute correctly. Implemented properly, it may have
8517 ** a useful amortized or average cost, but more investigation is required
8518 ** to determine this and what the optimal algorithm is to achieve it.
8519 **
8520 ** (1) makes you always pay for exactly one copy, but its implementation is
8521 ** the simplest and its performance is predictable.
8522 **
8523 ** So for now, we implement (1) only. If we wish to optimize later, we should
8524 ** be able to do it without affecting users.
8525 **
8526 ** The strategy is to buffer the segments of data that do *not* depend on
8527 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8528 ** and lengths. When the top-level submessage ends, we can go beginning to end,
8529 ** alternating the writing of lengths with memcpy() of the rest of the data.
8530 ** At the top level though, no buffering is required.
8531 */
8532
8533
8534 #include <stdlib.h>
8535
8536 /* The output buffer is divided into segments; a segment is a string of data
8537 * that is "ready to go" -- it does not need any varint lengths inserted into
8538 * the middle. The seams between segments are where varints will be inserted
8539 * once they are known.
8540 *
8541 * We also use the concept of a "run", which is a range of encoded bytes that
8542 * occur at a single submessage level. Every segment contains one or more runs.
8543 *
8544 * A segment can span messages. Consider:
8545 *
8546 * .--Submessage lengths---------.
8547 * | | |
8548 * | V V
8549 * V | |--------------- | |-----------------
8550 * Submessages: | |-----------------------------------------------
8551 * Top-level msg: ------------------------------------------------------------
8552 *
8553 * Segments: ----- ------------------- -----------------
8554 * Runs: *---- *--------------*--- *----------------
8555 * (* marks the start)
8556 *
8557 * Note that the top-level menssage is not in any segment because it does not
8558 * have any length preceding it.
8559 *
8560 * A segment is only interrupted when another length needs to be inserted. So
8561 * observe how the second segment spans both the inner submessage and part of
8562 * the next enclosing message. */
8563 typedef struct {
8564 uint32_t msglen; /* The length to varint-encode before this segment. */
8565 uint32_t seglen; /* Length of the segment. */
8566 } upb_pb_encoder_segment;
8567
8568 struct upb_pb_encoder {
8569 upb_env *env;
8570
8571 /* Our input and output. */
8572 upb_sink input_;
8573 upb_bytessink *output_;
8574
8575 /* The "subclosure" -- used as the inner closure as part of the bytessink
8576 * protocol. */
8577 void *subc;
8578
8579 /* The output buffer and limit, and our current write position. "buf"
8580 * initially points to "initbuf", but is dynamically allocated if we need to
8581 * grow beyond the initial size. */
8582 char *buf, *ptr, *limit;
8583
8584 /* The beginning of the current run, or undefined if we are at the top
8585 * level. */
8586 char *runbegin;
8587
8588 /* The list of segments we are accumulating. */
8589 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8590
8591 /* The stack of enclosing submessages. Each entry in the stack points to the
8592 * segment where this submessage's length is being accumulated. */
8593 int *stack, *top, *stacklimit;
8594
8595 /* Depth of startmsg/endmsg calls. */
8596 int depth;
8597 };
8598
8599 /* low-level buffering ********************************************************/
8600
8601 /* Low-level functions for interacting with the output buffer. */
8602
8603 /* TODO(haberman): handle pushback */
8604 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8605 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8606 UPB_ASSERT_VAR(n, n == len);
8607 }
8608
8609 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8610 return &e->segbuf[*e->top];
8611 }
8612
8613 /* Call to ensure that at least "bytes" bytes are available for writing at
8614 * e->ptr. Returns false if the bytes could not be allocated. */
8615 static bool reserve(upb_pb_encoder *e, size_t bytes) {
8616 if ((size_t)(e->limit - e->ptr) < bytes) {
8617 /* Grow buffer. */
8618 char *new_buf;
8619 size_t needed = bytes + (e->ptr - e->buf);
8620 size_t old_size = e->limit - e->buf;
8621
8622 size_t new_size = old_size;
8623
8624 while (new_size < needed) {
8625 new_size *= 2;
8626 }
8627
8628 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8629
8630 if (new_buf == NULL) {
8631 return false;
8632 }
8633
8634 e->ptr = new_buf + (e->ptr - e->buf);
8635 e->runbegin = new_buf + (e->runbegin - e->buf);
8636 e->limit = new_buf + new_size;
8637 e->buf = new_buf;
8638 }
8639
8640 return true;
8641 }
8642
8643 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8644 * previously called reserve() with at least this many bytes. */
8645 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8646 assert((size_t)(e->limit - e->ptr) >= bytes);
8647 e->ptr += bytes;
8648 }
8649
8650 /* Call when all of the bytes for a handler have been written. Flushes the
8651 * bytes if possible and necessary, returning false if this failed. */
8652 static bool commit(upb_pb_encoder *e) {
8653 if (!e->top) {
8654 /* We aren't inside a delimited region. Flush our accumulated bytes to
8655 * the output.
8656 *
8657 * TODO(haberman): in the future we may want to delay flushing for
8658 * efficiency reasons. */
8659 putbuf(e, e->buf, e->ptr - e->buf);
8660 e->ptr = e->buf;
8661 }
8662
8663 return true;
8664 }
8665
8666 /* Writes the given bytes to the buffer, handling reserve/advance. */
8667 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8668 if (!reserve(e, len)) {
8669 return false;
8670 }
8671
8672 memcpy(e->ptr, data, len);
8673 encoder_advance(e, len);
8674 return true;
8675 }
8676
8677 /* Finish the current run by adding the run totals to the segment and message
8678 * length. */
8679 static void accumulate(upb_pb_encoder *e) {
8680 size_t run_len;
8681 assert(e->ptr >= e->runbegin);
8682 run_len = e->ptr - e->runbegin;
8683 e->segptr->seglen += run_len;
8684 top(e)->msglen += run_len;
8685 e->runbegin = e->ptr;
8686 }
8687
8688 /* Call to indicate the start of delimited region for which the full length is
8689 * not yet known. All data will be buffered until the length is known.
8690 * Delimited regions may be nested; their lengths will all be tracked properly. */
8691 static bool start_delim(upb_pb_encoder *e) {
8692 if (e->top) {
8693 /* We are already buffering, advance to the next segment and push it on the
8694 * stack. */
8695 accumulate(e);
8696
8697 if (++e->top == e->stacklimit) {
8698 /* TODO(haberman): grow stack? */
8699 return false;
8700 }
8701
8702 if (++e->segptr == e->seglimit) {
8703 /* Grow segment buffer. */
8704 size_t old_size =
8705 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8706 size_t new_size = old_size * 2;
8707 upb_pb_encoder_segment *new_buf =
8708 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8709
8710 if (new_buf == NULL) {
8711 return false;
8712 }
8713
8714 e->segptr = new_buf + (e->segptr - e->segbuf);
8715 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8716 e->segbuf = new_buf;
8717 }
8718 } else {
8719 /* We were previously at the top level, start buffering. */
8720 e->segptr = e->segbuf;
8721 e->top = e->stack;
8722 e->runbegin = e->ptr;
8723 }
8724
8725 *e->top = e->segptr - e->segbuf;
8726 e->segptr->seglen = 0;
8727 e->segptr->msglen = 0;
8728
8729 return true;
8730 }
8731
8732 /* Call to indicate the end of a delimited region. We now know the length of
8733 * the delimited region. If we are not nested inside any other delimited
8734 * regions, we can now emit all of the buffered data we accumulated. */
8735 static bool end_delim(upb_pb_encoder *e) {
8736 size_t msglen;
8737 accumulate(e);
8738 msglen = top(e)->msglen;
8739
8740 if (e->top == e->stack) {
8741 /* All lengths are now available, emit all buffered data. */
8742 char buf[UPB_PB_VARINT_MAX_LEN];
8743 upb_pb_encoder_segment *s;
8744 const char *ptr = e->buf;
8745 for (s = e->segbuf; s <= e->segptr; s++) {
8746 size_t lenbytes = upb_vencode64(s->msglen, buf);
8747 putbuf(e, buf, lenbytes);
8748 putbuf(e, ptr, s->seglen);
8749 ptr += s->seglen;
8750 }
8751
8752 e->ptr = e->buf;
8753 e->top = NULL;
8754 } else {
8755 /* Need to keep buffering; propagate length info into enclosing
8756 * submessages. */
8757 --e->top;
8758 top(e)->msglen += msglen + upb_varint_size(msglen);
8759 }
8760
8761 return true;
8762 }
8763
8764
8765 /* tag_t **********************************************************************/
8766
8767 /* A precomputed (pre-encoded) tag and length. */
8768
8769 typedef struct {
8770 uint8_t bytes;
8771 char tag[7];
8772 } tag_t;
8773
8774 /* Allocates a new tag for this field, and sets it in these handlerattr. */
8775 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8776 upb_handlerattr *attr) {
8777 uint32_t n = upb_fielddef_number(f);
8778
8779 tag_t *tag = malloc(sizeof(tag_t));
8780 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8781
8782 upb_handlerattr_init(attr);
8783 upb_handlerattr_sethandlerdata(attr, tag);
8784 upb_handlers_addcleanup(h, tag, free);
8785 }
8786
8787 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8788 return encode_bytes(e, tag->tag, tag->bytes);
8789 }
8790
8791
8792 /* encoding of wire types *****************************************************/
8793
8794 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8795 /* TODO(haberman): byte-swap for big endian. */
8796 return encode_bytes(e, &val, sizeof(uint64_t));
8797 }
8798
8799 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8800 /* TODO(haberman): byte-swap for big endian. */
8801 return encode_bytes(e, &val, sizeof(uint32_t));
8802 }
8803
8804 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8805 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8806 return false;
8807 }
8808
8809 encoder_advance(e, upb_vencode64(val, e->ptr));
8810 return true;
8811 }
8812
8813 static uint64_t dbl2uint64(double d) {
8814 uint64_t ret;
8815 memcpy(&ret, &d, sizeof(uint64_t));
8816 return ret;
8817 }
8818
8819 static uint32_t flt2uint32(float d) {
8820 uint32_t ret;
8821 memcpy(&ret, &d, sizeof(uint32_t));
8822 return ret;
8823 }
8824
8825
8826 /* encoding of proto types ****************************************************/
8827
8828 static bool startmsg(void *c, const void *hd) {
8829 upb_pb_encoder *e = c;
8830 UPB_UNUSED(hd);
8831 if (e->depth++ == 0) {
8832 upb_bytessink_start(e->output_, 0, &e->subc);
8833 }
8834 return true;
8835 }
8836
8837 static bool endmsg(void *c, const void *hd, upb_status *status) {
8838 upb_pb_encoder *e = c;
8839 UPB_UNUSED(hd);
8840 UPB_UNUSED(status);
8841 if (--e->depth == 0) {
8842 upb_bytessink_end(e->output_);
8843 }
8844 return true;
8845 }
8846
8847 static void *encode_startdelimfield(void *c, const void *hd) {
8848 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8849 return ok ? c : UPB_BREAK;
8850 }
8851
8852 static bool encode_enddelimfield(void *c, const void *hd) {
8853 UPB_UNUSED(hd);
8854 return end_delim(c);
8855 }
8856
8857 static void *encode_startgroup(void *c, const void *hd) {
8858 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8859 }
8860
8861 static bool encode_endgroup(void *c, const void *hd) {
8862 return encode_tag(c, hd) && commit(c);
8863 }
8864
8865 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8866 UPB_UNUSED(size_hint);
8867 return encode_startdelimfield(c, hd);
8868 }
8869
8870 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8871 size_t len, const upb_bufhandle *h) {
8872 UPB_UNUSED(hd);
8873 UPB_UNUSED(h);
8874 return encode_bytes(c, buf, len) ? len : 0;
8875 }
8876
8877 #define T(type, ctype, convert, encode) \
8878 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8879 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
8880 } \
8881 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8882 UPB_UNUSED(hd); \
8883 return encode(e, (convert)(val)); \
8884 }
8885
8886 T(double, double, dbl2uint64, encode_fixed64)
8887 T(float, float, flt2uint32, encode_fixed32)
8888 T(int64, int64_t, uint64_t, encode_varint)
8889 T(int32, int32_t, uint32_t, encode_varint)
8890 T(fixed64, uint64_t, uint64_t, encode_fixed64)
8891 T(fixed32, uint32_t, uint32_t, encode_fixed32)
8892 T(bool, bool, bool, encode_varint)
8893 T(uint32, uint32_t, uint32_t, encode_varint)
8894 T(uint64, uint64_t, uint64_t, encode_varint)
8895 T(enum, int32_t, uint32_t, encode_varint)
8896 T(sfixed32, int32_t, uint32_t, encode_fixed32)
8897 T(sfixed64, int64_t, uint64_t, encode_fixed64)
8898 T(sint32, int32_t, upb_zzenc_32, encode_varint)
8899 T(sint64, int64_t, upb_zzenc_64, encode_varint)
8900
8901 #undef T
8902
8903
8904 /* code to build the handlers *************************************************/
8905
8906 static void newhandlers_callback(const void *closure, upb_handlers *h) {
8907 const upb_msgdef *m;
8908 upb_msg_field_iter i;
8909
8910 UPB_UNUSED(closure);
8911
8912 upb_handlers_setstartmsg(h, startmsg, NULL);
8913 upb_handlers_setendmsg(h, endmsg, NULL);
8914
8915 m = upb_handlers_msgdef(h);
8916 for(upb_msg_field_begin(&i, m);
8917 !upb_msg_field_done(&i);
8918 upb_msg_field_next(&i)) {
8919 const upb_fielddef *f = upb_msg_iter_field(&i);
8920 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8921 upb_fielddef_packed(f);
8922 upb_handlerattr attr;
8923 upb_wiretype_t wt =
8924 packed ? UPB_WIRE_TYPE_DELIMITED
8925 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8926
8927 /* Pre-encode the tag for this field. */
8928 new_tag(h, f, wt, &attr);
8929
8930 if (packed) {
8931 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8932 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8933 }
8934
8935 #define T(upper, lower, upbtype) \
8936 case UPB_DESCRIPTOR_TYPE_##upper: \
8937 if (packed) { \
8938 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8939 } else { \
8940 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8941 } \
8942 break;
8943
8944 switch (upb_fielddef_descriptortype(f)) {
8945 T(DOUBLE, double, double);
8946 T(FLOAT, float, float);
8947 T(INT64, int64, int64);
8948 T(INT32, int32, int32);
8949 T(FIXED64, fixed64, uint64);
8950 T(FIXED32, fixed32, uint32);
8951 T(BOOL, bool, bool);
8952 T(UINT32, uint32, uint32);
8953 T(UINT64, uint64, uint64);
8954 T(ENUM, enum, int32);
8955 T(SFIXED32, sfixed32, int32);
8956 T(SFIXED64, sfixed64, int64);
8957 T(SINT32, sint32, int32);
8958 T(SINT64, sint64, int64);
8959 case UPB_DESCRIPTOR_TYPE_STRING:
8960 case UPB_DESCRIPTOR_TYPE_BYTES:
8961 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8962 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8963 upb_handlers_setstring(h, f, encode_strbuf, &attr);
8964 break;
8965 case UPB_DESCRIPTOR_TYPE_MESSAGE:
8966 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8967 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8968 break;
8969 case UPB_DESCRIPTOR_TYPE_GROUP: {
8970 /* Endgroup takes a different tag (wire_type = END_GROUP). */
8971 upb_handlerattr attr2;
8972 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8973
8974 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
8975 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
8976
8977 upb_handlerattr_uninit(&attr2);
8978 break;
8979 }
8980 }
8981
8982 #undef T
8983
8984 upb_handlerattr_uninit(&attr);
8985 }
8986 }
8987
8988 void upb_pb_encoder_reset(upb_pb_encoder *e) {
8989 e->segptr = NULL;
8990 e->top = NULL;
8991 e->depth = 0;
8992 }
8993
8994
8995 /* public API *****************************************************************/
8996
8997 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
8998 const void *owner) {
8999 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9000 }
9001
9002 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9003 upb_bytessink *output) {
9004 const size_t initial_bufsize = 256;
9005 const size_t initial_segbufsize = 16;
9006 /* TODO(haberman): make this configurable. */
9007 const size_t stack_size = 64;
9008 #ifndef NDEBUG
9009 const size_t size_before = upb_env_bytesallocated(env);
9010 #endif
9011
9012 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9013 if (!e) return NULL;
9014
9015 e->buf = upb_env_malloc(env, initial_bufsize);
9016 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9017 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9018
9019 if (!e->buf || !e->segbuf || !e->stack) {
9020 return NULL;
9021 }
9022
9023 e->limit = e->buf + initial_bufsize;
9024 e->seglimit = e->segbuf + initial_segbufsize;
9025 e->stacklimit = e->stack + stack_size;
9026
9027 upb_pb_encoder_reset(e);
9028 upb_sink_reset(&e->input_, h, e);
9029
9030 e->env = env;
9031 e->output_ = output;
9032 e->subc = output->closure;
9033 e->ptr = e->buf;
9034
9035 /* If this fails, increase the value in encoder.h. */
9036 assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9037 return e;
9038 }
9039
9040 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9041
9042
9043 #include <stdio.h>
9044 #include <stdlib.h>
9045 #include <string.h>
9046
9047 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
9048 void *owner, upb_status *status) {
9049 /* Create handlers. */
9050 const upb_pbdecodermethod *decoder_m;
9051 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9052 upb_env env;
9053 upb_pbdecodermethodopts opts;
9054 upb_pbdecoder *decoder;
9055 upb_descreader *reader;
9056 bool ok;
9057 upb_def **ret = NULL;
9058 upb_def **defs;
9059
9060 upb_pbdecodermethodopts_init(&opts, reader_h);
9061 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9062
9063 upb_env_init(&env);
9064 upb_env_reporterrorsto(&env, status);
9065
9066 reader = upb_descreader_create(&env, reader_h);
9067 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9068
9069 /* Push input data. */
9070 ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
9071
9072 if (!ok) goto cleanup;
9073 defs = upb_descreader_getdefs(reader, owner, n);
9074 ret = malloc(sizeof(upb_def*) * (*n));
9075 memcpy(ret, defs, sizeof(upb_def*) * (*n));
9076
9077 cleanup:
9078 upb_env_uninit(&env);
9079 upb_handlers_unref(reader_h, &reader_h);
9080 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9081 return ret;
9082 }
9083
9084 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
9085 upb_status *status) {
9086 int n;
9087 bool success;
9088 upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
9089 if (!defs) return false;
9090 success = upb_symtab_add(s, defs, n, &defs, status);
9091 free(defs);
9092 return success;
9093 }
9094
9095 char *upb_readfile(const char *filename, size_t *len) {
9096 long size;
9097 char *buf;
9098 FILE *f = fopen(filename, "rb");
9099 if(!f) return NULL;
9100 if(fseek(f, 0, SEEK_END) != 0) goto error;
9101 size = ftell(f);
9102 if(size < 0) goto error;
9103 if(fseek(f, 0, SEEK_SET) != 0) goto error;
9104 buf = malloc(size + 1);
9105 if(size && fread(buf, size, 1, f) != 1) goto error;
9106 fclose(f);
9107 if (len) *len = size;
9108 return buf;
9109
9110 error:
9111 fclose(f);
9112 return NULL;
9113 }
9114
9115 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
9116 upb_status *status) {
9117 size_t len;
9118 bool success;
9119 char *data = upb_readfile(fname, &len);
9120 if (!data) {
9121 if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
9122 return false;
9123 }
9124 success = upb_load_descriptor_into_symtab(symtab, data, len, status);
9125 free(data);
9126 return success;
9127 }
9128 /*
9129 * upb::pb::TextPrinter
9130 *
9131 * OPT: This is not optimized at all. It uses printf() which parses the format
9132 * string every time, and it allocates memory for every put.
9133 */
9134
9135
9136 #include <ctype.h>
9137 #include <float.h>
9138 #include <inttypes.h>
9139 #include <stdarg.h>
9140 #include <stdio.h>
9141 #include <stdlib.h>
9142 #include <string.h>
9143
9144
9145 struct upb_textprinter {
9146 upb_sink input_;
9147 upb_bytessink *output_;
9148 int indent_depth_;
9149 bool single_line_;
9150 void *subc;
9151 };
9152
9153 #define CHECK(x) if ((x) < 0) goto err;
9154
9155 static const char *shortname(const char *longname) {
9156 const char *last = strrchr(longname, '.');
9157 return last ? last + 1 : longname;
9158 }
9159
9160 static int indent(upb_textprinter *p) {
9161 int i;
9162 if (!p->single_line_)
9163 for (i = 0; i < p->indent_depth_; i++)
9164 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
9165 return 0;
9166 }
9167
9168 static int endfield(upb_textprinter *p) {
9169 const char ch = (p->single_line_ ? ' ' : '\n');
9170 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9171 return 0;
9172 }
9173
9174 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9175 bool preserve_utf8) {
9176 /* Based on CEscapeInternal() from Google's protobuf release. */
9177 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9178 const char *end = buf + len;
9179
9180 /* I think hex is prettier and more useful, but proto2 uses octal; should
9181 * investigate whether it can parse hex also. */
9182 const bool use_hex = false;
9183 bool last_hex_escape = false; /* true if last output char was \xNN */
9184
9185 for (; buf < end; buf++) {
9186 bool is_hex_escape;
9187
9188 if (dstend - dst < 4) {
9189 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9190 dst = dstbuf;
9191 }
9192
9193 is_hex_escape = false;
9194 switch (*buf) {
9195 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
9196 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
9197 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
9198 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9199 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9200 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9201 default:
9202 /* Note that if we emit \xNN and the buf character after that is a hex
9203 * digit then that digit must be escaped too to prevent it being
9204 * interpreted as part of the character code by C. */
9205 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9206 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9207 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9208 is_hex_escape = use_hex;
9209 dst += 4;
9210 } else {
9211 *(dst++) = *buf; break;
9212 }
9213 }
9214 last_hex_escape = is_hex_escape;
9215 }
9216 /* Flush remaining data. */
9217 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9218 return 0;
9219 }
9220
9221 bool putf(upb_textprinter *p, const char *fmt, ...) {
9222 va_list args;
9223 va_list args_copy;
9224 char *str;
9225 int written;
9226 int len;
9227 bool ok;
9228
9229 va_start(args, fmt);
9230
9231 /* Run once to get the length of the string. */
9232 _upb_va_copy(args_copy, args);
9233 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9234 va_end(args_copy);
9235
9236 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9237 str = malloc(len + 1);
9238 if (!str) return false;
9239 written = vsprintf(str, fmt, args);
9240 va_end(args);
9241 UPB_ASSERT_VAR(written, written == len);
9242
9243 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9244 free(str);
9245 return ok;
9246 }
9247
9248
9249 /* handlers *******************************************************************/
9250
9251 static bool textprinter_startmsg(void *c, const void *hd) {
9252 upb_textprinter *p = c;
9253 UPB_UNUSED(hd);
9254 if (p->indent_depth_ == 0) {
9255 upb_bytessink_start(p->output_, 0, &p->subc);
9256 }
9257 return true;
9258 }
9259
9260 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9261 upb_textprinter *p = c;
9262 UPB_UNUSED(hd);
9263 UPB_UNUSED(s);
9264 if (p->indent_depth_ == 0) {
9265 upb_bytessink_end(p->output_);
9266 }
9267 return true;
9268 }
9269
9270 #define TYPE(name, ctype, fmt) \
9271 static bool textprinter_put ## name(void *closure, const void *handler_data, \
9272 ctype val) { \
9273 upb_textprinter *p = closure; \
9274 const upb_fielddef *f = handler_data; \
9275 CHECK(indent(p)); \
9276 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
9277 CHECK(endfield(p)); \
9278 return true; \
9279 err: \
9280 return false; \
9281 }
9282
9283 static bool textprinter_putbool(void *closure, const void *handler_data,
9284 bool val) {
9285 upb_textprinter *p = closure;
9286 const upb_fielddef *f = handler_data;
9287 CHECK(indent(p));
9288 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9289 CHECK(endfield(p));
9290 return true;
9291 err:
9292 return false;
9293 }
9294
9295 #define STRINGIFY_HELPER(x) #x
9296 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9297
9298 TYPE(int32, int32_t, "%" PRId32)
9299 TYPE(int64, int64_t, "%" PRId64)
9300 TYPE(uint32, uint32_t, "%" PRIu32)
9301 TYPE(uint64, uint64_t, "%" PRIu64)
9302 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
9303 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
9304
9305 #undef TYPE
9306
9307 /* Output a symbolic value from the enum if found, else just print as int32. */
9308 static bool textprinter_putenum(void *closure, const void *handler_data,
9309 int32_t val) {
9310 upb_textprinter *p = closure;
9311 const upb_fielddef *f = handler_data;
9312 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
9313 const char *label = upb_enumdef_iton(enum_def, val);
9314 if (label) {
9315 indent(p);
9316 putf(p, "%s: %s", upb_fielddef_name(f), label);
9317 endfield(p);
9318 } else {
9319 if (!textprinter_putint32(closure, handler_data, val))
9320 return false;
9321 }
9322 return true;
9323 }
9324
9325 static void *textprinter_startstr(void *closure, const void *handler_data,
9326 size_t size_hint) {
9327 upb_textprinter *p = closure;
9328 const upb_fielddef *f = handler_data;
9329 UPB_UNUSED(size_hint);
9330 indent(p);
9331 putf(p, "%s: \"", upb_fielddef_name(f));
9332 return p;
9333 }
9334
9335 static bool textprinter_endstr(void *closure, const void *handler_data) {
9336 upb_textprinter *p = closure;
9337 UPB_UNUSED(handler_data);
9338 putf(p, "\"");
9339 endfield(p);
9340 return true;
9341 }
9342
9343 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
9344 size_t len, const upb_bufhandle *handle) {
9345 upb_textprinter *p = closure;
9346 const upb_fielddef *f = hd;
9347 UPB_UNUSED(handle);
9348 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
9349 return len;
9350 err:
9351 return 0;
9352 }
9353
9354 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
9355 upb_textprinter *p = closure;
9356 const char *name = handler_data;
9357 CHECK(indent(p));
9358 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
9359 p->indent_depth_++;
9360 return p;
9361 err:
9362 return UPB_BREAK;
9363 }
9364
9365 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
9366 upb_textprinter *p = closure;
9367 UPB_UNUSED(handler_data);
9368 p->indent_depth_--;
9369 CHECK(indent(p));
9370 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
9371 CHECK(endfield(p));
9372 return true;
9373 err:
9374 return false;
9375 }
9376
9377 static void onmreg(const void *c, upb_handlers *h) {
9378 const upb_msgdef *m = upb_handlers_msgdef(h);
9379 upb_msg_field_iter i;
9380 UPB_UNUSED(c);
9381
9382 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
9383 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
9384
9385 for(upb_msg_field_begin(&i, m);
9386 !upb_msg_field_done(&i);
9387 upb_msg_field_next(&i)) {
9388 upb_fielddef *f = upb_msg_iter_field(&i);
9389 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
9390 upb_handlerattr_sethandlerdata(&attr, f);
9391 switch (upb_fielddef_type(f)) {
9392 case UPB_TYPE_INT32:
9393 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
9394 break;
9395 case UPB_TYPE_INT64:
9396 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
9397 break;
9398 case UPB_TYPE_UINT32:
9399 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
9400 break;
9401 case UPB_TYPE_UINT64:
9402 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
9403 break;
9404 case UPB_TYPE_FLOAT:
9405 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
9406 break;
9407 case UPB_TYPE_DOUBLE:
9408 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
9409 break;
9410 case UPB_TYPE_BOOL:
9411 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
9412 break;
9413 case UPB_TYPE_STRING:
9414 case UPB_TYPE_BYTES:
9415 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
9416 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
9417 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
9418 break;
9419 case UPB_TYPE_MESSAGE: {
9420 const char *name =
9421 upb_fielddef_istagdelim(f)
9422 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
9423 : upb_fielddef_name(f);
9424 upb_handlerattr_sethandlerdata(&attr, name);
9425 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
9426 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
9427 break;
9428 }
9429 case UPB_TYPE_ENUM:
9430 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
9431 break;
9432 }
9433 }
9434 }
9435
9436 static void textprinter_reset(upb_textprinter *p, bool single_line) {
9437 p->single_line_ = single_line;
9438 p->indent_depth_ = 0;
9439 }
9440
9441
9442 /* Public API *****************************************************************/
9443
9444 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9445 upb_bytessink *output) {
9446 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9447 if (!p) return NULL;
9448
9449 p->output_ = output;
9450 upb_sink_reset(&p->input_, h, p);
9451 textprinter_reset(p, false);
9452
9453 return p;
9454 }
9455
9456 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
9457 const void *owner) {
9458 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
9459 }
9460
9461 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
9462
9463 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
9464 p->single_line_ = single_line;
9465 }
9466
9467
9468 /* Index is descriptor type. */
9469 const uint8_t upb_pb_native_wire_types[] = {
9470 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
9471 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
9472 UPB_WIRE_TYPE_32BIT, /* FLOAT */
9473 UPB_WIRE_TYPE_VARINT, /* INT64 */
9474 UPB_WIRE_TYPE_VARINT, /* UINT64 */
9475 UPB_WIRE_TYPE_VARINT, /* INT32 */
9476 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
9477 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
9478 UPB_WIRE_TYPE_VARINT, /* BOOL */
9479 UPB_WIRE_TYPE_DELIMITED, /* STRING */
9480 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
9481 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
9482 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
9483 UPB_WIRE_TYPE_VARINT, /* UINT32 */
9484 UPB_WIRE_TYPE_VARINT, /* ENUM */
9485 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
9486 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
9487 UPB_WIRE_TYPE_VARINT, /* SINT32 */
9488 UPB_WIRE_TYPE_VARINT, /* SINT64 */
9489 };
9490
9491 /* A basic branch-based decoder, uses 32-bit values to get good performance
9492 * on 32-bit architectures (but performs well on 64-bits also).
9493 * This scheme comes from the original Google Protobuf implementation
9494 * (proto2). */
9495 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
9496 upb_decoderet err = {NULL, 0};
9497 const char *p = r.p;
9498 uint32_t low = (uint32_t)r.val;
9499 uint32_t high = 0;
9500 uint32_t b;
9501 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9502 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9503 b = *(p++); low |= (b & 0x7fU) << 28;
9504 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
9505 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
9506 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
9507 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
9508 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
9509 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
9510 return err;
9511
9512 done:
9513 r.val = ((uint64_t)high << 32) | low;
9514 r.p = p;
9515 return r;
9516 }
9517
9518 /* Like the previous, but uses 64-bit values. */
9519 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
9520 const char *p = r.p;
9521 uint64_t val = r.val;
9522 uint64_t b;
9523 upb_decoderet err = {NULL, 0};
9524 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9525 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9526 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
9527 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
9528 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
9529 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
9530 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
9531 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
9532 return err;
9533
9534 done:
9535 r.val = val;
9536 r.p = p;
9537 return r;
9538 }
9539
9540 /* Given an encoded varint v, returns an integer with a single bit set that
9541 * indicates the end of the varint. Subtracting one from this value will
9542 * yield a mask that leaves only bits that are part of the varint. Returns
9543 * 0 if the varint is unterminated. */
9544 static uint64_t upb_get_vstopbit(uint64_t v) {
9545 uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
9546 return ~cbits & (cbits+1);
9547 }
9548
9549 /* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */
9550 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
9551 uint64_t b;
9552 uint64_t stop_bit;
9553 upb_decoderet my_r;
9554 memcpy(&b, r.p, sizeof(b));
9555 stop_bit = upb_get_vstopbit(b);
9556 b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
9557 b += b & 0x007f007f007f007fULL;
9558 b += 3 * (b & 0x0000ffff0000ffffULL);
9559 b += 15 * (b & 0x00000000ffffffffULL);
9560 if (stop_bit == 0) {
9561 /* Error: unterminated varint. */
9562 upb_decoderet err_r = {(void*)0, 0};
9563 return err_r;
9564 }
9565 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9566 r.val | (b << 7));
9567 return my_r;
9568 }
9569
9570 /* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */
9571 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9572 uint64_t b;
9573 uint64_t stop_bit;
9574 upb_decoderet my_r;
9575 memcpy(&b, r.p, sizeof(b));
9576 stop_bit = upb_get_vstopbit(b);
9577 b &= (stop_bit - 1);
9578 b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
9579 b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
9580 b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
9581 if (stop_bit == 0) {
9582 /* Error: unterminated varint. */
9583 upb_decoderet err_r = {(void*)0, 0};
9584 return err_r;
9585 }
9586 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9587 r.val | (b << 14));
9588 return my_r;
9589 }
9590
9591 #line 1 "upb/json/parser.rl"
9592 /*
9593 ** upb::json::Parser (upb_json_parser)
9594 **
9595 ** A parser that uses the Ragel State Machine Compiler to generate
9596 ** the finite automata.
9597 **
9598 ** Ragel only natively handles regular languages, but we can manually
9599 ** program it a bit to handle context-free languages like JSON, by using
9600 ** the "fcall" and "fret" constructs.
9601 **
9602 ** This parser can handle the basics, but needs several things to be fleshed
9603 ** out:
9604 **
9605 ** - handling of unicode escape sequences (including high surrogate pairs).
9606 ** - properly check and report errors for unknown fields, stack overflow,
9607 ** improper array nesting (or lack of nesting).
9608 ** - handling of base64 sequences with padding characters.
9609 ** - handling of push-back (non-success returns from sink functions).
9610 ** - handling of keys/escape-sequences/etc that span input buffers.
9611 */
9612
9613 #include <stdio.h>
9614 #include <stdint.h>
9615 #include <assert.h>
9616 #include <string.h>
9617 #include <stdlib.h>
9618 #include <errno.h>
9619
9620
9621 #define UPB_JSON_MAX_DEPTH 64
9622
9623 typedef struct {
9624 upb_sink sink;
9625
9626 /* The current message in which we're parsing, and the field whose value we're
9627 * expecting next. */
9628 const upb_msgdef *m;
9629 const upb_fielddef *f;
9630
9631 /* We are in a repeated-field context, ready to emit mapentries as
9632 * submessages. This flag alters the start-of-object (open-brace) behavior to
9633 * begin a sequence of mapentry messages rather than a single submessage. */
9634 bool is_map;
9635
9636 /* We are in a map-entry message context. This flag is set when parsing the
9637 * value field of a single map entry and indicates to all value-field parsers
9638 * (subobjects, strings, numbers, and bools) that the map-entry submessage
9639 * should end as soon as the value is parsed. */
9640 bool is_mapentry;
9641
9642 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9643 * message's map field that we're currently parsing. This differs from |f|
9644 * because |f| is the field in the *current* message (i.e., the map-entry
9645 * message itself), not the parent's field that leads to this map. */
9646 const upb_fielddef *mapfield;
9647 } upb_jsonparser_frame;
9648
9649 struct upb_json_parser {
9650 upb_env *env;
9651 upb_byteshandler input_handler_;
9652 upb_bytessink input_;
9653
9654 /* Stack to track the JSON scopes we are in. */
9655 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9656 upb_jsonparser_frame *top;
9657 upb_jsonparser_frame *limit;
9658
9659 upb_status status;
9660
9661 /* Ragel's internal parsing stack for the parsing state machine. */
9662 int current_state;
9663 int parser_stack[UPB_JSON_MAX_DEPTH];
9664 int parser_top;
9665
9666 /* The handle for the current buffer. */
9667 const upb_bufhandle *handle;
9668
9669 /* Accumulate buffer. See details in parser.rl. */
9670 const char *accumulated;
9671 size_t accumulated_len;
9672 char *accumulate_buf;
9673 size_t accumulate_buf_size;
9674
9675 /* Multi-part text data. See details in parser.rl. */
9676 int multipart_state;
9677 upb_selector_t string_selector;
9678
9679 /* Input capture. See details in parser.rl. */
9680 const char *capture;
9681
9682 /* Intermediate result of parsing a unicode escape sequence. */
9683 uint32_t digit;
9684 };
9685
9686 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9687
9688 /* Used to signal that a capture has been suspended. */
9689 static char suspend_capture;
9690
9691 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9692 upb_handlertype_t type) {
9693 upb_selector_t sel;
9694 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9695 UPB_ASSERT_VAR(ok, ok);
9696 return sel;
9697 }
9698
9699 static upb_selector_t parser_getsel(upb_json_parser *p) {
9700 return getsel_for_handlertype(
9701 p, upb_handlers_getprimitivehandlertype(p->top->f));
9702 }
9703
9704 static bool check_stack(upb_json_parser *p) {
9705 if ((p->top + 1) == p->limit) {
9706 upb_status_seterrmsg(&p->status, "Nesting too deep");
9707 upb_env_reporterror(p->env, &p->status);
9708 return false;
9709 }
9710
9711 return true;
9712 }
9713
9714 /* There are GCC/Clang built-ins for overflow checking which we could start
9715 * using if there was any performance benefit to it. */
9716
9717 static bool checked_add(size_t a, size_t b, size_t *c) {
9718 if (SIZE_MAX - a < b) return false;
9719 *c = a + b;
9720 return true;
9721 }
9722
9723 static size_t saturating_multiply(size_t a, size_t b) {
9724 /* size_t is unsigned, so this is defined behavior even on overflow. */
9725 size_t ret = a * b;
9726 if (b != 0 && ret / b != a) {
9727 ret = SIZE_MAX;
9728 }
9729 return ret;
9730 }
9731
9732
9733 /* Base64 decoding ************************************************************/
9734
9735 /* TODO(haberman): make this streaming. */
9736
9737 static const signed char b64table[] = {
9738 -1, -1, -1, -1, -1, -1, -1, -1,
9739 -1, -1, -1, -1, -1, -1, -1, -1,
9740 -1, -1, -1, -1, -1, -1, -1, -1,
9741 -1, -1, -1, -1, -1, -1, -1, -1,
9742 -1, -1, -1, -1, -1, -1, -1, -1,
9743 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
9744 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9745 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
9746 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
9747 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9748 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9749 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
9750 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9751 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9752 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9753 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
9754 -1, -1, -1, -1, -1, -1, -1, -1,
9755 -1, -1, -1, -1, -1, -1, -1, -1,
9756 -1, -1, -1, -1, -1, -1, -1, -1,
9757 -1, -1, -1, -1, -1, -1, -1, -1,
9758 -1, -1, -1, -1, -1, -1, -1, -1,
9759 -1, -1, -1, -1, -1, -1, -1, -1,
9760 -1, -1, -1, -1, -1, -1, -1, -1,
9761 -1, -1, -1, -1, -1, -1, -1, -1,
9762 -1, -1, -1, -1, -1, -1, -1, -1,
9763 -1, -1, -1, -1, -1, -1, -1, -1,
9764 -1, -1, -1, -1, -1, -1, -1, -1,
9765 -1, -1, -1, -1, -1, -1, -1, -1,
9766 -1, -1, -1, -1, -1, -1, -1, -1,
9767 -1, -1, -1, -1, -1, -1, -1, -1,
9768 -1, -1, -1, -1, -1, -1, -1, -1,
9769 -1, -1, -1, -1, -1, -1, -1, -1
9770 };
9771
9772 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
9773 * bits will be 1 for unrecognized characters makes it easier to check for
9774 * this error condition later (see below). */
9775 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9776
9777 /* Returns true if the given character is not a valid base64 character or
9778 * padding. */
9779 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9780
9781 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9782 size_t len) {
9783 const char *limit = ptr + len;
9784 for (; ptr < limit; ptr += 4) {
9785 uint32_t val;
9786 char output[3];
9787
9788 if (limit - ptr < 4) {
9789 upb_status_seterrf(&p->status,
9790 "Base64 input for bytes field not a multiple of 4: %s",
9791 upb_fielddef_name(p->top->f));
9792 upb_env_reporterror(p->env, &p->status);
9793 return false;
9794 }
9795
9796 val = b64lookup(ptr[0]) << 18 |
9797 b64lookup(ptr[1]) << 12 |
9798 b64lookup(ptr[2]) << 6 |
9799 b64lookup(ptr[3]);
9800
9801 /* Test the upper bit; returns true if any of the characters returned -1. */
9802 if (val & 0x80000000) {
9803 goto otherchar;
9804 }
9805
9806 output[0] = val >> 16;
9807 output[1] = (val >> 8) & 0xff;
9808 output[2] = val & 0xff;
9809 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
9810 }
9811 return true;
9812
9813 otherchar:
9814 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9815 nonbase64(ptr[3]) ) {
9816 upb_status_seterrf(&p->status,
9817 "Non-base64 characters in bytes field: %s",
9818 upb_fielddef_name(p->top->f));
9819 upb_env_reporterror(p->env, &p->status);
9820 return false;
9821 } if (ptr[2] == '=') {
9822 uint32_t val;
9823 char output;
9824
9825 /* Last group contains only two input bytes, one output byte. */
9826 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9827 goto badpadding;
9828 }
9829
9830 val = b64lookup(ptr[0]) << 18 |
9831 b64lookup(ptr[1]) << 12;
9832
9833 assert(!(val & 0x80000000));
9834 output = val >> 16;
9835 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
9836 return true;
9837 } else {
9838 uint32_t val;
9839 char output[2];
9840
9841 /* Last group contains only three input bytes, two output bytes. */
9842 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9843 goto badpadding;
9844 }
9845
9846 val = b64lookup(ptr[0]) << 18 |
9847 b64lookup(ptr[1]) << 12 |
9848 b64lookup(ptr[2]) << 6;
9849
9850 output[0] = val >> 16;
9851 output[1] = (val >> 8) & 0xff;
9852 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
9853 return true;
9854 }
9855
9856 badpadding:
9857 upb_status_seterrf(&p->status,
9858 "Incorrect base64 padding for field: %s (%.*s)",
9859 upb_fielddef_name(p->top->f),
9860 4, ptr);
9861 upb_env_reporterror(p->env, &p->status);
9862 return false;
9863 }
9864
9865
9866 /* Accumulate buffer **********************************************************/
9867
9868 /* Functionality for accumulating a buffer.
9869 *
9870 * Some parts of the parser need an entire value as a contiguous string. For
9871 * example, to look up a member name in a hash table, or to turn a string into
9872 * a number, the relevant library routines need the input string to be in
9873 * contiguous memory, even if the value spanned two or more buffers in the
9874 * input. These routines handle that.
9875 *
9876 * In the common case we can just point to the input buffer to get this
9877 * contiguous string and avoid any actual copy. So we optimistically begin
9878 * this way. But there are a few cases where we must instead copy into a
9879 * separate buffer:
9880 *
9881 * 1. The string was not contiguous in the input (it spanned buffers).
9882 *
9883 * 2. The string included escape sequences that need to be interpreted to get
9884 * the true value in a contiguous buffer. */
9885
9886 static void assert_accumulate_empty(upb_json_parser *p) {
9887 UPB_UNUSED(p);
9888 assert(p->accumulated == NULL);
9889 assert(p->accumulated_len == 0);
9890 }
9891
9892 static void accumulate_clear(upb_json_parser *p) {
9893 p->accumulated = NULL;
9894 p->accumulated_len = 0;
9895 }
9896
9897 /* Used internally by accumulate_append(). */
9898 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9899 void *mem;
9900 size_t old_size = p->accumulate_buf_size;
9901 size_t new_size = UPB_MAX(old_size, 128);
9902 while (new_size < need) {
9903 new_size = saturating_multiply(new_size, 2);
9904 }
9905
9906 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9907 if (!mem) {
9908 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
9909 upb_env_reporterror(p->env, &p->status);
9910 return false;
9911 }
9912
9913 p->accumulate_buf = mem;
9914 p->accumulate_buf_size = new_size;
9915 return true;
9916 }
9917
9918 /* Logically appends the given data to the append buffer.
9919 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9920 * must be valid until the next accumulate_append() call (if any). */
9921 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9922 bool can_alias) {
9923 size_t need;
9924
9925 if (!p->accumulated && can_alias) {
9926 p->accumulated = buf;
9927 p->accumulated_len = len;
9928 return true;
9929 }
9930
9931 if (!checked_add(p->accumulated_len, len, &need)) {
9932 upb_status_seterrmsg(&p->status, "Integer overflow.");
9933 upb_env_reporterror(p->env, &p->status);
9934 return false;
9935 }
9936
9937 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9938 return false;
9939 }
9940
9941 if (p->accumulated != p->accumulate_buf) {
9942 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9943 p->accumulated = p->accumulate_buf;
9944 }
9945
9946 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9947 p->accumulated_len += len;
9948 return true;
9949 }
9950
9951 /* Returns a pointer to the data accumulated since the last accumulate_clear()
9952 * call, and writes the length to *len. This with point either to the input
9953 * buffer or a temporary accumulate buffer. */
9954 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9955 assert(p->accumulated);
9956 *len = p->accumulated_len;
9957 return p->accumulated;
9958 }
9959
9960
9961 /* Mult-part text data ********************************************************/
9962
9963 /* When we have text data in the input, it can often come in multiple segments.
9964 * For example, there may be some raw string data followed by an escape
9965 * sequence. The two segments are processed with different logic. Also buffer
9966 * seams in the input can cause multiple segments.
9967 *
9968 * As we see segments, there are two main cases for how we want to process them:
9969 *
9970 * 1. we want to push the captured input directly to string handlers.
9971 *
9972 * 2. we need to accumulate all the parts into a contiguous buffer for further
9973 * processing (field name lookup, string->number conversion, etc). */
9974
9975 /* This is the set of states for p->multipart_state. */
9976 enum {
9977 /* We are not currently processing multipart data. */
9978 MULTIPART_INACTIVE = 0,
9979
9980 /* We are processing multipart data by accumulating it into a contiguous
9981 * buffer. */
9982 MULTIPART_ACCUMULATE = 1,
9983
9984 /* We are processing multipart data by pushing each part directly to the
9985 * current string handlers. */
9986 MULTIPART_PUSHEAGERLY = 2
9987 };
9988
9989 /* Start a multi-part text value where we accumulate the data for processing at
9990 * the end. */
9991 static void multipart_startaccum(upb_json_parser *p) {
9992 assert_accumulate_empty(p);
9993 assert(p->multipart_state == MULTIPART_INACTIVE);
9994 p->multipart_state = MULTIPART_ACCUMULATE;
9995 }
9996
9997 /* Start a multi-part text value where we immediately push text data to a string
9998 * value with the given selector. */
9999 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10000 assert_accumulate_empty(p);
10001 assert(p->multipart_state == MULTIPART_INACTIVE);
10002 p->multipart_state = MULTIPART_PUSHEAGERLY;
10003 p->string_selector = sel;
10004 }
10005
10006 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10007 bool can_alias) {
10008 switch (p->multipart_state) {
10009 case MULTIPART_INACTIVE:
10010 upb_status_seterrmsg(
10011 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10012 upb_env_reporterror(p->env, &p->status);
10013 return false;
10014
10015 case MULTIPART_ACCUMULATE:
10016 if (!accumulate_append(p, buf, len, can_alias)) {
10017 return false;
10018 }
10019 break;
10020
10021 case MULTIPART_PUSHEAGERLY: {
10022 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10023 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10024 break;
10025 }
10026 }
10027
10028 return true;
10029 }
10030
10031 /* Note: this invalidates the accumulate buffer! Call only after reading its
10032 * contents. */
10033 static void multipart_end(upb_json_parser *p) {
10034 assert(p->multipart_state != MULTIPART_INACTIVE);
10035 p->multipart_state = MULTIPART_INACTIVE;
10036 accumulate_clear(p);
10037 }
10038
10039
10040 /* Input capture **************************************************************/
10041
10042 /* Functionality for capturing a region of the input as text. Gracefully
10043 * handles the case where a buffer seam occurs in the middle of the captured
10044 * region. */
10045
10046 static void capture_begin(upb_json_parser *p, const char *ptr) {
10047 assert(p->multipart_state != MULTIPART_INACTIVE);
10048 assert(p->capture == NULL);
10049 p->capture = ptr;
10050 }
10051
10052 static bool capture_end(upb_json_parser *p, const char *ptr) {
10053 assert(p->capture);
10054 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10055 p->capture = NULL;
10056 return true;
10057 } else {
10058 return false;
10059 }
10060 }
10061
10062 /* This is called at the end of each input buffer (ie. when we have hit a
10063 * buffer seam). If we are in the middle of capturing the input, this
10064 * processes the unprocessed capture region. */
10065 static void capture_suspend(upb_json_parser *p, const char **ptr) {
10066 if (!p->capture) return;
10067
10068 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10069 /* We use this as a signal that we were in the middle of capturing, and
10070 * that capturing should resume at the beginning of the next buffer.
10071 *
10072 * We can't use *ptr here, because we have no guarantee that this pointer
10073 * will be valid when we resume (if the underlying memory is freed, then
10074 * using the pointer at all, even to compare to NULL, is likely undefined
10075 * behavior). */
10076 p->capture = &suspend_capture;
10077 } else {
10078 /* Need to back up the pointer to the beginning of the capture, since
10079 * we were not able to actually preserve it. */
10080 *ptr = p->capture;
10081 }
10082 }
10083
10084 static void capture_resume(upb_json_parser *p, const char *ptr) {
10085 if (p->capture) {
10086 assert(p->capture == &suspend_capture);
10087 p->capture = ptr;
10088 }
10089 }
10090
10091
10092 /* Callbacks from the parser **************************************************/
10093
10094 /* These are the functions called directly from the parser itself.
10095 * We define these in the same order as their declarations in the parser. */
10096
10097 static char escape_char(char in) {
10098 switch (in) {
10099 case 'r': return '\r';
10100 case 't': return '\t';
10101 case 'n': return '\n';
10102 case 'f': return '\f';
10103 case 'b': return '\b';
10104 case '/': return '/';
10105 case '"': return '"';
10106 case '\\': return '\\';
10107 default:
10108 assert(0);
10109 return 'x';
10110 }
10111 }
10112
10113 static bool escape(upb_json_parser *p, const char *ptr) {
10114 char ch = escape_char(*ptr);
10115 return multipart_text(p, &ch, 1, false);
10116 }
10117
10118 static void start_hex(upb_json_parser *p) {
10119 p->digit = 0;
10120 }
10121
10122 static void hexdigit(upb_json_parser *p, const char *ptr) {
10123 char ch = *ptr;
10124
10125 p->digit <<= 4;
10126
10127 if (ch >= '0' && ch <= '9') {
10128 p->digit += (ch - '0');
10129 } else if (ch >= 'a' && ch <= 'f') {
10130 p->digit += ((ch - 'a') + 10);
10131 } else {
10132 assert(ch >= 'A' && ch <= 'F');
10133 p->digit += ((ch - 'A') + 10);
10134 }
10135 }
10136
10137 static bool end_hex(upb_json_parser *p) {
10138 uint32_t codepoint = p->digit;
10139
10140 /* emit the codepoint as UTF-8. */
10141 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10142 int length = 0;
10143 if (codepoint <= 0x7F) {
10144 utf8[0] = codepoint;
10145 length = 1;
10146 } else if (codepoint <= 0x07FF) {
10147 utf8[1] = (codepoint & 0x3F) | 0x80;
10148 codepoint >>= 6;
10149 utf8[0] = (codepoint & 0x1F) | 0xC0;
10150 length = 2;
10151 } else /* codepoint <= 0xFFFF */ {
10152 utf8[2] = (codepoint & 0x3F) | 0x80;
10153 codepoint >>= 6;
10154 utf8[1] = (codepoint & 0x3F) | 0x80;
10155 codepoint >>= 6;
10156 utf8[0] = (codepoint & 0x0F) | 0xE0;
10157 length = 3;
10158 }
10159 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10160 * we have to wait for the next escape to get the full code point). */
10161
10162 return multipart_text(p, utf8, length, false);
10163 }
10164
10165 static void start_text(upb_json_parser *p, const char *ptr) {
10166 capture_begin(p, ptr);
10167 }
10168
10169 static bool end_text(upb_json_parser *p, const char *ptr) {
10170 return capture_end(p, ptr);
10171 }
10172
10173 static void start_number(upb_json_parser *p, const char *ptr) {
10174 multipart_startaccum(p);
10175 capture_begin(p, ptr);
10176 }
10177
10178 static bool parse_number(upb_json_parser *p);
10179
10180 static bool end_number(upb_json_parser *p, const char *ptr) {
10181 if (!capture_end(p, ptr)) {
10182 return false;
10183 }
10184
10185 return parse_number(p);
10186 }
10187
10188 static bool parse_number(upb_json_parser *p) {
10189 size_t len;
10190 const char *buf;
10191 const char *myend;
10192 char *end;
10193
10194 /* strtol() and friends unfortunately do not support specifying the length of
10195 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10196 if (!multipart_text(p, "\0", 1, false)) {
10197 return false;
10198 }
10199
10200 buf = accumulate_getptr(p, &len);
10201 myend = buf + len - 1; /* One for NULL. */
10202
10203 /* XXX: We are using strtol to parse integers, but this is wrong as even
10204 * integers can be represented as 1e6 (for example), which strtol can't
10205 * handle correctly.
10206 *
10207 * XXX: Also, we can't handle large integers properly because strto[u]ll
10208 * isn't in C89.
10209 *
10210 * XXX: Also, we don't properly check floats for overflow, since strtof
10211 * isn't in C89. */
10212 switch (upb_fielddef_type(p->top->f)) {
10213 case UPB_TYPE_ENUM:
10214 case UPB_TYPE_INT32: {
10215 long val = strtol(p->accumulated, &end, 0);
10216 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10217 goto err;
10218 else
10219 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10220 break;
10221 }
10222 case UPB_TYPE_INT64: {
10223 long long val = strtol(p->accumulated, &end, 0);
10224 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10225 goto err;
10226 else
10227 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10228 break;
10229 }
10230 case UPB_TYPE_UINT32: {
10231 unsigned long val = strtoul(p->accumulated, &end, 0);
10232 if (val > UINT32_MAX || errno == ERANGE || end != myend)
10233 goto err;
10234 else
10235 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10236 break;
10237 }
10238 case UPB_TYPE_UINT64: {
10239 unsigned long long val = strtoul(p->accumulated, &end, 0);
10240 if (val > UINT64_MAX || errno == ERANGE || end != myend)
10241 goto err;
10242 else
10243 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10244 break;
10245 }
10246 case UPB_TYPE_DOUBLE: {
10247 double val = strtod(p->accumulated, &end);
10248 if (errno == ERANGE || end != myend)
10249 goto err;
10250 else
10251 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10252 break;
10253 }
10254 case UPB_TYPE_FLOAT: {
10255 float val = strtod(p->accumulated, &end);
10256 if (errno == ERANGE || end != myend)
10257 goto err;
10258 else
10259 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10260 break;
10261 }
10262 default:
10263 assert(false);
10264 }
10265
10266 multipart_end(p);
10267
10268 return true;
10269
10270 err:
10271 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10272 upb_env_reporterror(p->env, &p->status);
10273 multipart_end(p);
10274 return false;
10275 }
10276
10277 static bool parser_putbool(upb_json_parser *p, bool val) {
10278 bool ok;
10279
10280 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10281 upb_status_seterrf(&p->status,
10282 "Boolean value specified for non-bool field: %s",
10283 upb_fielddef_name(p->top->f));
10284 upb_env_reporterror(p->env, &p->status);
10285 return false;
10286 }
10287
10288 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
10289 UPB_ASSERT_VAR(ok, ok);
10290
10291 return true;
10292 }
10293
10294 static bool start_stringval(upb_json_parser *p) {
10295 assert(p->top->f);
10296
10297 if (upb_fielddef_isstring(p->top->f)) {
10298 upb_jsonparser_frame *inner;
10299 upb_selector_t sel;
10300
10301 if (!check_stack(p)) return false;
10302
10303 /* Start a new parser frame: parser frames correspond one-to-one with
10304 * handler frames, and string events occur in a sub-frame. */
10305 inner = p->top + 1;
10306 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10307 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
10308 inner->m = p->top->m;
10309 inner->f = p->top->f;
10310 inner->is_map = false;
10311 inner->is_mapentry = false;
10312 p->top = inner;
10313
10314 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10315 /* For STRING fields we push data directly to the handlers as it is
10316 * parsed. We don't do this yet for BYTES fields, because our base64
10317 * decoder is not streaming.
10318 *
10319 * TODO(haberman): make base64 decoding streaming also. */
10320 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10321 return true;
10322 } else {
10323 multipart_startaccum(p);
10324 return true;
10325 }
10326 } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
10327 /* No need to push a frame -- symbolic enum names in quotes remain in the
10328 * current parser frame.
10329 *
10330 * Enum string values must accumulate so we can look up the value in a table
10331 * once it is complete. */
10332 multipart_startaccum(p);
10333 return true;
10334 } else {
10335 upb_status_seterrf(&p->status,
10336 "String specified for non-string/non-enum field: %s",
10337 upb_fielddef_name(p->top->f));
10338 upb_env_reporterror(p->env, &p->status);
10339 return false;
10340 }
10341 }
10342
10343 static bool end_stringval(upb_json_parser *p) {
10344 bool ok = true;
10345
10346 switch (upb_fielddef_type(p->top->f)) {
10347 case UPB_TYPE_BYTES:
10348 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10349 p->accumulated, p->accumulated_len)) {
10350 return false;
10351 }
10352 /* Fall through. */
10353
10354 case UPB_TYPE_STRING: {
10355 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10356 upb_sink_endstr(&p->top->sink, sel);
10357 p->top--;
10358 break;
10359 }
10360
10361 case UPB_TYPE_ENUM: {
10362 /* Resolve enum symbolic name to integer value. */
10363 const upb_enumdef *enumdef =
10364 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
10365
10366 size_t len;
10367 const char *buf = accumulate_getptr(p, &len);
10368
10369 int32_t int_val = 0;
10370 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10371
10372 if (ok) {
10373 upb_selector_t sel = parser_getsel(p);
10374 upb_sink_putint32(&p->top->sink, sel, int_val);
10375 } else {
10376 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
10377 upb_env_reporterror(p->env, &p->status);
10378 }
10379
10380 break;
10381 }
10382
10383 default:
10384 assert(false);
10385 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
10386 upb_env_reporterror(p->env, &p->status);
10387 ok = false;
10388 break;
10389 }
10390
10391 multipart_end(p);
10392
10393 return ok;
10394 }
10395
10396 static void start_member(upb_json_parser *p) {
10397 assert(!p->top->f);
10398 multipart_startaccum(p);
10399 }
10400
10401 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10402 * field based on the current contents of the accumulate buffer. */
10403 static bool parse_mapentry_key(upb_json_parser *p) {
10404
10405 size_t len;
10406 const char *buf = accumulate_getptr(p, &len);
10407
10408 /* Emit the key field. We do a bit of ad-hoc parsing here because the
10409 * parser state machine has already decided that this is a string field
10410 * name, and we are reinterpreting it as some arbitrary key type. In
10411 * particular, integer and bool keys are quoted, so we need to parse the
10412 * quoted string contents here. */
10413
10414 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10415 if (p->top->f == NULL) {
10416 upb_status_seterrmsg(&p->status, "mapentry message has no key");
10417 upb_env_reporterror(p->env, &p->status);
10418 return false;
10419 }
10420 switch (upb_fielddef_type(p->top->f)) {
10421 case UPB_TYPE_INT32:
10422 case UPB_TYPE_INT64:
10423 case UPB_TYPE_UINT32:
10424 case UPB_TYPE_UINT64:
10425 /* Invoke end_number. The accum buffer has the number's text already. */
10426 if (!parse_number(p)) {
10427 return false;
10428 }
10429 break;
10430 case UPB_TYPE_BOOL:
10431 if (len == 4 && !strncmp(buf, "true", 4)) {
10432 if (!parser_putbool(p, true)) {
10433 return false;
10434 }
10435 } else if (len == 5 && !strncmp(buf, "false", 5)) {
10436 if (!parser_putbool(p, false)) {
10437 return false;
10438 }
10439 } else {
10440 upb_status_seterrmsg(&p->status,
10441 "Map bool key not 'true' or 'false'");
10442 upb_env_reporterror(p->env, &p->status);
10443 return false;
10444 }
10445 multipart_end(p);
10446 break;
10447 case UPB_TYPE_STRING:
10448 case UPB_TYPE_BYTES: {
10449 upb_sink subsink;
10450 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10451 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
10452 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10453 upb_sink_putstring(&subsink, sel, buf, len, NULL);
10454 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10455 upb_sink_endstr(&subsink, sel);
10456 multipart_end(p);
10457 break;
10458 }
10459 default:
10460 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
10461 upb_env_reporterror(p->env, &p->status);
10462 return false;
10463 }
10464
10465 return true;
10466 }
10467
10468 /* Helper: emit one map entry (as a submessage in the map field sequence). This
10469 * is invoked from end_membername(), at the end of the map entry's key string,
10470 * with the map key in the accumulate buffer. It parses the key from that
10471 * buffer, emits the handler calls to start the mapentry submessage (setting up
10472 * its subframe in the process), and sets up state in the subframe so that the
10473 * value parser (invoked next) will emit the mapentry's value field and then
10474 * end the mapentry message. */
10475
10476 static bool handle_mapentry(upb_json_parser *p) {
10477 const upb_fielddef *mapfield;
10478 const upb_msgdef *mapentrymsg;
10479 upb_jsonparser_frame *inner;
10480 upb_selector_t sel;
10481
10482 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10483 * for the mapentry itself, and then set |f| in that frame so that the map
10484 * value field is parsed, and also set a flag to end the frame after the
10485 * map-entry value is parsed. */
10486 if (!check_stack(p)) return false;
10487
10488 mapfield = p->top->mapfield;
10489 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10490
10491 inner = p->top + 1;
10492 p->top->f = mapfield;
10493 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10494 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10495 inner->m = mapentrymsg;
10496 inner->mapfield = mapfield;
10497 inner->is_map = false;
10498
10499 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10500 * the key field value to the sink, and these handlers will pop the frame
10501 * if they see is_mapentry (when invoked by the parser state machine, they
10502 * would have just seen the map-entry value, not key). */
10503 inner->is_mapentry = false;
10504 p->top = inner;
10505
10506 /* send STARTMSG in submsg frame. */
10507 upb_sink_startmsg(&p->top->sink);
10508
10509 parse_mapentry_key(p);
10510
10511 /* Set up the value field to receive the map-entry value. */
10512 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10513 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
10514 p->top->mapfield = mapfield;
10515 if (p->top->f == NULL) {
10516 upb_status_seterrmsg(&p->status, "mapentry message has no value");
10517 upb_env_reporterror(p->env, &p->status);
10518 return false;
10519 }
10520
10521 return true;
10522 }
10523
10524 static bool end_membername(upb_json_parser *p) {
10525 assert(!p->top->f);
10526
10527 if (p->top->is_map) {
10528 return handle_mapentry(p);
10529 } else {
10530 size_t len;
10531 const char *buf = accumulate_getptr(p, &len);
10532 const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
10533
10534 if (!f) {
10535 /* TODO(haberman): Ignore unknown fields if requested/configured to do
10536 * so. */
10537 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
10538 upb_env_reporterror(p->env, &p->status);
10539 return false;
10540 }
10541
10542 p->top->f = f;
10543 multipart_end(p);
10544
10545 return true;
10546 }
10547 }
10548
10549 static void end_member(upb_json_parser *p) {
10550 /* If we just parsed a map-entry value, end that frame too. */
10551 if (p->top->is_mapentry) {
10552 upb_status s = UPB_STATUS_INIT;
10553 upb_selector_t sel;
10554 bool ok;
10555 const upb_fielddef *mapfield;
10556
10557 assert(p->top > p->stack);
10558 /* send ENDMSG on submsg. */
10559 upb_sink_endmsg(&p->top->sink, &s);
10560 mapfield = p->top->mapfield;
10561
10562 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10563 p->top--;
10564 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10565 UPB_ASSERT_VAR(ok, ok);
10566 upb_sink_endsubmsg(&p->top->sink, sel);
10567 }
10568
10569 p->top->f = NULL;
10570 }
10571
10572 static bool start_subobject(upb_json_parser *p) {
10573 assert(p->top->f);
10574
10575 if (upb_fielddef_ismap(p->top->f)) {
10576 upb_jsonparser_frame *inner;
10577 upb_selector_t sel;
10578
10579 /* Beginning of a map. Start a new parser frame in a repeated-field
10580 * context. */
10581 if (!check_stack(p)) return false;
10582
10583 inner = p->top + 1;
10584 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10585 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10586 inner->m = upb_fielddef_msgsubdef(p->top->f);
10587 inner->mapfield = p->top->f;
10588 inner->f = NULL;
10589 inner->is_map = true;
10590 inner->is_mapentry = false;
10591 p->top = inner;
10592
10593 return true;
10594 } else if (upb_fielddef_issubmsg(p->top->f)) {
10595 upb_jsonparser_frame *inner;
10596 upb_selector_t sel;
10597
10598 /* Beginning of a subobject. Start a new parser frame in the submsg
10599 * context. */
10600 if (!check_stack(p)) return false;
10601
10602 inner = p->top + 1;
10603
10604 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10605 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10606 inner->m = upb_fielddef_msgsubdef(p->top->f);
10607 inner->f = NULL;
10608 inner->is_map = false;
10609 inner->is_mapentry = false;
10610 p->top = inner;
10611
10612 return true;
10613 } else {
10614 upb_status_seterrf(&p->status,
10615 "Object specified for non-message/group field: %s",
10616 upb_fielddef_name(p->top->f));
10617 upb_env_reporterror(p->env, &p->status);
10618 return false;
10619 }
10620 }
10621
10622 static void end_subobject(upb_json_parser *p) {
10623 if (p->top->is_map) {
10624 upb_selector_t sel;
10625 p->top--;
10626 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10627 upb_sink_endseq(&p->top->sink, sel);
10628 } else {
10629 upb_selector_t sel;
10630 p->top--;
10631 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
10632 upb_sink_endsubmsg(&p->top->sink, sel);
10633 }
10634 }
10635
10636 static bool start_array(upb_json_parser *p) {
10637 upb_jsonparser_frame *inner;
10638 upb_selector_t sel;
10639
10640 assert(p->top->f);
10641
10642 if (!upb_fielddef_isseq(p->top->f)) {
10643 upb_status_seterrf(&p->status,
10644 "Array specified for non-repeated field: %s",
10645 upb_fielddef_name(p->top->f));
10646 upb_env_reporterror(p->env, &p->status);
10647 return false;
10648 }
10649
10650 if (!check_stack(p)) return false;
10651
10652 inner = p->top + 1;
10653 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10654 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10655 inner->m = p->top->m;
10656 inner->f = p->top->f;
10657 inner->is_map = false;
10658 inner->is_mapentry = false;
10659 p->top = inner;
10660
10661 return true;
10662 }
10663
10664 static void end_array(upb_json_parser *p) {
10665 upb_selector_t sel;
10666
10667 assert(p->top > p->stack);
10668
10669 p->top--;
10670 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10671 upb_sink_endseq(&p->top->sink, sel);
10672 }
10673
10674 static void start_object(upb_json_parser *p) {
10675 if (!p->top->is_map) {
10676 upb_sink_startmsg(&p->top->sink);
10677 }
10678 }
10679
10680 static void end_object(upb_json_parser *p) {
10681 if (!p->top->is_map) {
10682 upb_status status;
10683 upb_status_clear(&status);
10684 upb_sink_endmsg(&p->top->sink, &status);
10685 if (!upb_ok(&status)) {
10686 upb_env_reporterror(p->env, &status);
10687 }
10688 }
10689 }
10690
10691
10692 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
10693
10694
10695 /* The actual parser **********************************************************/
10696
10697 /* What follows is the Ragel parser itself. The language is specified in Ragel
10698 * and the actions call our C functions above.
10699 *
10700 * Ragel has an extensive set of functionality, and we use only a small part of
10701 * it. There are many action types but we only use a few:
10702 *
10703 * ">" -- transition into a machine
10704 * "%" -- transition out of a machine
10705 * "@" -- transition into a final state of a machine.
10706 *
10707 * "@" transitions are tricky because a machine can transition into a final
10708 * state repeatedly. But in some cases we know this can't happen, for example
10709 * a string which is delimited by a final '"' can only transition into its
10710 * final state once, when the closing '"' is seen. */
10711
10712
10713 #line 1218 "upb/json/parser.rl"
10714
10715
10716
10717 #line 1130 "upb/json/parser.c"
10718 static const char _json_actions[] = {
10719 0, 1, 0, 1, 2, 1, 3, 1,
10720 5, 1, 6, 1, 7, 1, 8, 1,
10721 10, 1, 12, 1, 13, 1, 14, 1,
10722 15, 1, 16, 1, 17, 1, 21, 1,
10723 25, 1, 27, 2, 3, 8, 2, 4,
10724 5, 2, 6, 2, 2, 6, 8, 2,
10725 11, 9, 2, 13, 15, 2, 14, 15,
10726 2, 18, 1, 2, 19, 27, 2, 20,
10727 9, 2, 22, 27, 2, 23, 27, 2,
10728 24, 27, 2, 26, 27, 3, 14, 11,
10729 9
10730 };
10731
10732 static const unsigned char _json_key_offsets[] = {
10733 0, 0, 4, 9, 14, 15, 19, 24,
10734 29, 34, 38, 42, 45, 48, 50, 54,
10735 58, 60, 62, 67, 69, 71, 80, 86,
10736 92, 98, 104, 106, 115, 116, 116, 116,
10737 121, 126, 131, 132, 133, 134, 135, 135,
10738 136, 137, 138, 138, 139, 140, 141, 141,
10739 146, 151, 152, 156, 161, 166, 171, 175,
10740 175, 178, 178, 178
10741 };
10742
10743 static const char _json_trans_keys[] = {
10744 32, 123, 9, 13, 32, 34, 125, 9,
10745 13, 32, 34, 125, 9, 13, 34, 32,
10746 58, 9, 13, 32, 93, 125, 9, 13,
10747 32, 44, 125, 9, 13, 32, 44, 125,
10748 9, 13, 32, 34, 9, 13, 45, 48,
10749 49, 57, 48, 49, 57, 46, 69, 101,
10750 48, 57, 69, 101, 48, 57, 43, 45,
10751 48, 57, 48, 57, 48, 57, 46, 69,
10752 101, 48, 57, 34, 92, 34, 92, 34,
10753 47, 92, 98, 102, 110, 114, 116, 117,
10754 48, 57, 65, 70, 97, 102, 48, 57,
10755 65, 70, 97, 102, 48, 57, 65, 70,
10756 97, 102, 48, 57, 65, 70, 97, 102,
10757 34, 92, 34, 45, 91, 102, 110, 116,
10758 123, 48, 57, 34, 32, 93, 125, 9,
10759 13, 32, 44, 93, 9, 13, 32, 93,
10760 125, 9, 13, 97, 108, 115, 101, 117,
10761 108, 108, 114, 117, 101, 32, 34, 125,
10762 9, 13, 32, 34, 125, 9, 13, 34,
10763 32, 58, 9, 13, 32, 93, 125, 9,
10764 13, 32, 44, 125, 9, 13, 32, 44,
10765 125, 9, 13, 32, 34, 9, 13, 32,
10766 9, 13, 0
10767 };
10768
10769 static const char _json_single_lengths[] = {
10770 0, 2, 3, 3, 1, 2, 3, 3,
10771 3, 2, 2, 1, 3, 0, 2, 2,
10772 0, 0, 3, 2, 2, 9, 0, 0,
10773 0, 0, 2, 7, 1, 0, 0, 3,
10774 3, 3, 1, 1, 1, 1, 0, 1,
10775 1, 1, 0, 1, 1, 1, 0, 3,
10776 3, 1, 2, 3, 3, 3, 2, 0,
10777 1, 0, 0, 0
10778 };
10779
10780 static const char _json_range_lengths[] = {
10781 0, 1, 1, 1, 0, 1, 1, 1,
10782 1, 1, 1, 1, 0, 1, 1, 1,
10783 1, 1, 1, 0, 0, 0, 3, 3,
10784 3, 3, 0, 1, 0, 0, 0, 1,
10785 1, 1, 0, 0, 0, 0, 0, 0,
10786 0, 0, 0, 0, 0, 0, 0, 1,
10787 1, 0, 1, 1, 1, 1, 1, 0,
10788 1, 0, 0, 0
10789 };
10790
10791 static const short _json_index_offsets[] = {
10792 0, 0, 4, 9, 14, 16, 20, 25,
10793 30, 35, 39, 43, 46, 50, 52, 56,
10794 60, 62, 64, 69, 72, 75, 85, 89,
10795 93, 97, 101, 104, 113, 115, 116, 117,
10796 122, 127, 132, 134, 136, 138, 140, 141,
10797 143, 145, 147, 148, 150, 152, 154, 155,
10798 160, 165, 167, 171, 176, 181, 186, 190,
10799 191, 194, 195, 196
10800 };
10801
10802 static const char _json_indicies[] = {
10803 0, 2, 0, 1, 3, 4, 5, 3,
10804 1, 6, 7, 8, 6, 1, 9, 1,
10805 10, 11, 10, 1, 11, 1, 1, 11,
10806 12, 13, 14, 15, 13, 1, 16, 17,
10807 8, 16, 1, 17, 7, 17, 1, 18,
10808 19, 20, 1, 19, 20, 1, 22, 23,
10809 23, 21, 24, 1, 23, 23, 24, 21,
10810 25, 25, 26, 1, 26, 1, 26, 21,
10811 22, 23, 23, 20, 21, 28, 29, 27,
10812 31, 32, 30, 33, 33, 33, 33, 33,
10813 33, 33, 33, 34, 1, 35, 35, 35,
10814 1, 36, 36, 36, 1, 37, 37, 37,
10815 1, 38, 38, 38, 1, 40, 41, 39,
10816 42, 43, 44, 45, 46, 47, 48, 43,
10817 1, 49, 1, 50, 51, 53, 54, 1,
10818 53, 52, 55, 56, 54, 55, 1, 56,
10819 1, 1, 56, 52, 57, 1, 58, 1,
10820 59, 1, 60, 1, 61, 62, 1, 63,
10821 1, 64, 1, 65, 66, 1, 67, 1,
10822 68, 1, 69, 70, 71, 72, 70, 1,
10823 73, 74, 75, 73, 1, 76, 1, 77,
10824 78, 77, 1, 78, 1, 1, 78, 79,
10825 80, 81, 82, 80, 1, 83, 84, 75,
10826 83, 1, 84, 74, 84, 1, 85, 86,
10827 86, 1, 1, 1, 1, 0
10828 };
10829
10830 static const char _json_trans_targs[] = {
10831 1, 0, 2, 3, 4, 56, 3, 4,
10832 56, 5, 5, 6, 7, 8, 9, 56,
10833 8, 9, 11, 12, 18, 57, 13, 15,
10834 14, 16, 17, 20, 58, 21, 20, 58,
10835 21, 19, 22, 23, 24, 25, 26, 20,
10836 58, 21, 28, 30, 31, 34, 39, 43,
10837 47, 29, 59, 59, 32, 31, 29, 32,
10838 33, 35, 36, 37, 38, 59, 40, 41,
10839 42, 59, 44, 45, 46, 59, 48, 49,
10840 55, 48, 49, 55, 50, 50, 51, 52,
10841 53, 54, 55, 53, 54, 59, 56
10842 };
10843
10844 static const char _json_trans_actions[] = {
10845 0, 0, 0, 21, 77, 53, 0, 47,
10846 23, 17, 0, 0, 15, 19, 19, 50,
10847 0, 0, 0, 0, 0, 1, 0, 0,
10848 0, 0, 0, 3, 13, 0, 0, 35,
10849 5, 11, 0, 38, 7, 7, 7, 41,
10850 44, 9, 62, 56, 25, 0, 0, 0,
10851 31, 29, 33, 59, 15, 0, 27, 0,
10852 0, 0, 0, 0, 0, 68, 0, 0,
10853 0, 71, 0, 0, 0, 65, 21, 77,
10854 53, 0, 47, 23, 17, 0, 0, 15,
10855 19, 19, 50, 0, 0, 74, 0
10856 };
10857
10858 static const int json_start = 1;
10859
10860 static const int json_en_number_machine = 10;
10861 static const int json_en_string_machine = 19;
10862 static const int json_en_value_machine = 27;
10863 static const int json_en_main = 1;
10864
10865
10866 #line 1221 "upb/json/parser.rl"
10867
10868 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10869 const upb_bufhandle *handle) {
10870 upb_json_parser *parser = closure;
10871
10872 /* Variables used by Ragel's generated code. */
10873 int cs = parser->current_state;
10874 int *stack = parser->parser_stack;
10875 int top = parser->parser_top;
10876
10877 const char *p = buf;
10878 const char *pe = buf + size;
10879
10880 parser->handle = handle;
10881
10882 UPB_UNUSED(hd);
10883 UPB_UNUSED(handle);
10884
10885 capture_resume(parser, buf);
10886
10887
10888 #line 1301 "upb/json/parser.c"
10889 {
10890 int _klen;
10891 unsigned int _trans;
10892 const char *_acts;
10893 unsigned int _nacts;
10894 const char *_keys;
10895
10896 if ( p == pe )
10897 goto _test_eof;
10898 if ( cs == 0 )
10899 goto _out;
10900 _resume:
10901 _keys = _json_trans_keys + _json_key_offsets[cs];
10902 _trans = _json_index_offsets[cs];
10903
10904 _klen = _json_single_lengths[cs];
10905 if ( _klen > 0 ) {
10906 const char *_lower = _keys;
10907 const char *_mid;
10908 const char *_upper = _keys + _klen - 1;
10909 while (1) {
10910 if ( _upper < _lower )
10911 break;
10912
10913 _mid = _lower + ((_upper-_lower) >> 1);
10914 if ( (*p) < *_mid )
10915 _upper = _mid - 1;
10916 else if ( (*p) > *_mid )
10917 _lower = _mid + 1;
10918 else {
10919 _trans += (unsigned int)(_mid - _keys);
10920 goto _match;
10921 }
10922 }
10923 _keys += _klen;
10924 _trans += _klen;
10925 }
10926
10927 _klen = _json_range_lengths[cs];
10928 if ( _klen > 0 ) {
10929 const char *_lower = _keys;
10930 const char *_mid;
10931 const char *_upper = _keys + (_klen<<1) - 2;
10932 while (1) {
10933 if ( _upper < _lower )
10934 break;
10935
10936 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
10937 if ( (*p) < _mid[0] )
10938 _upper = _mid - 2;
10939 else if ( (*p) > _mid[1] )
10940 _lower = _mid + 2;
10941 else {
10942 _trans += (unsigned int)((_mid - _keys)>>1);
10943 goto _match;
10944 }
10945 }
10946 _trans += _klen;
10947 }
10948
10949 _match:
10950 _trans = _json_indicies[_trans];
10951 cs = _json_trans_targs[_trans];
10952
10953 if ( _json_trans_actions[_trans] == 0 )
10954 goto _again;
10955
10956 _acts = _json_actions + _json_trans_actions[_trans];
10957 _nacts = (unsigned int) *_acts++;
10958 while ( _nacts-- > 0 )
10959 {
10960 switch ( *_acts++ )
10961 {
10962 case 0:
10963 #line 1133 "upb/json/parser.rl"
10964 { p--; {cs = stack[--top]; goto _again;} }
10965 break;
10966 case 1:
10967 #line 1134 "upb/json/parser.rl"
10968 { p--; {stack[top++] = cs; cs = 10; goto _again;} }
10969 break;
10970 case 2:
10971 #line 1138 "upb/json/parser.rl"
10972 { start_text(parser, p); }
10973 break;
10974 case 3:
10975 #line 1139 "upb/json/parser.rl"
10976 { CHECK_RETURN_TOP(end_text(parser, p)); }
10977 break;
10978 case 4:
10979 #line 1145 "upb/json/parser.rl"
10980 { start_hex(parser); }
10981 break;
10982 case 5:
10983 #line 1146 "upb/json/parser.rl"
10984 { hexdigit(parser, p); }
10985 break;
10986 case 6:
10987 #line 1147 "upb/json/parser.rl"
10988 { CHECK_RETURN_TOP(end_hex(parser)); }
10989 break;
10990 case 7:
10991 #line 1153 "upb/json/parser.rl"
10992 { CHECK_RETURN_TOP(escape(parser, p)); }
10993 break;
10994 case 8:
10995 #line 1159 "upb/json/parser.rl"
10996 { p--; {cs = stack[--top]; goto _again;} }
10997 break;
10998 case 9:
10999 #line 1162 "upb/json/parser.rl"
11000 { {stack[top++] = cs; cs = 19; goto _again;} }
11001 break;
11002 case 10:
11003 #line 1164 "upb/json/parser.rl"
11004 { p--; {stack[top++] = cs; cs = 27; goto _again;} }
11005 break;
11006 case 11:
11007 #line 1169 "upb/json/parser.rl"
11008 { start_member(parser); }
11009 break;
11010 case 12:
11011 #line 1170 "upb/json/parser.rl"
11012 { CHECK_RETURN_TOP(end_membername(parser)); }
11013 break;
11014 case 13:
11015 #line 1173 "upb/json/parser.rl"
11016 { end_member(parser); }
11017 break;
11018 case 14:
11019 #line 1179 "upb/json/parser.rl"
11020 { start_object(parser); }
11021 break;
11022 case 15:
11023 #line 1182 "upb/json/parser.rl"
11024 { end_object(parser); }
11025 break;
11026 case 16:
11027 #line 1188 "upb/json/parser.rl"
11028 { CHECK_RETURN_TOP(start_array(parser)); }
11029 break;
11030 case 17:
11031 #line 1192 "upb/json/parser.rl"
11032 { end_array(parser); }
11033 break;
11034 case 18:
11035 #line 1197 "upb/json/parser.rl"
11036 { start_number(parser, p); }
11037 break;
11038 case 19:
11039 #line 1198 "upb/json/parser.rl"
11040 { CHECK_RETURN_TOP(end_number(parser, p)); }
11041 break;
11042 case 20:
11043 #line 1200 "upb/json/parser.rl"
11044 { CHECK_RETURN_TOP(start_stringval(parser)); }
11045 break;
11046 case 21:
11047 #line 1201 "upb/json/parser.rl"
11048 { CHECK_RETURN_TOP(end_stringval(parser)); }
11049 break;
11050 case 22:
11051 #line 1203 "upb/json/parser.rl"
11052 { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11053 break;
11054 case 23:
11055 #line 1205 "upb/json/parser.rl"
11056 { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11057 break;
11058 case 24:
11059 #line 1207 "upb/json/parser.rl"
11060 { /* null value */ }
11061 break;
11062 case 25:
11063 #line 1209 "upb/json/parser.rl"
11064 { CHECK_RETURN_TOP(start_subobject(parser)); }
11065 break;
11066 case 26:
11067 #line 1210 "upb/json/parser.rl"
11068 { end_subobject(parser); }
11069 break;
11070 case 27:
11071 #line 1215 "upb/json/parser.rl"
11072 { p--; {cs = stack[--top]; goto _again;} }
11073 break;
11074 #line 1487 "upb/json/parser.c"
11075 }
11076 }
11077
11078 _again:
11079 if ( cs == 0 )
11080 goto _out;
11081 if ( ++p != pe )
11082 goto _resume;
11083 _test_eof: {}
11084 _out: {}
11085 }
11086
11087 #line 1242 "upb/json/parser.rl"
11088
11089 if (p != pe) {
11090 upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
11091 upb_env_reporterror(parser->env, &parser->status);
11092 } else {
11093 capture_suspend(parser, &p);
11094 }
11095
11096 error:
11097 /* Save parsing state back to parser. */
11098 parser->current_state = cs;
11099 parser->parser_top = top;
11100
11101 return p - buf;
11102 }
11103
11104 bool end(void *closure, const void *hd) {
11105 UPB_UNUSED(closure);
11106 UPB_UNUSED(hd);
11107
11108 /* Prevent compile warning on unused static constants. */
11109 UPB_UNUSED(json_start);
11110 UPB_UNUSED(json_en_number_machine);
11111 UPB_UNUSED(json_en_string_machine);
11112 UPB_UNUSED(json_en_value_machine);
11113 UPB_UNUSED(json_en_main);
11114 return true;
11115 }
11116
11117 static void json_parser_reset(upb_json_parser *p) {
11118 int cs;
11119 int top;
11120
11121 p->top = p->stack;
11122 p->top->f = NULL;
11123 p->top->is_map = false;
11124 p->top->is_mapentry = false;
11125
11126 /* Emit Ragel initialization of the parser. */
11127
11128 #line 1541 "upb/json/parser.c"
11129 {
11130 cs = json_start;
11131 top = 0;
11132 }
11133
11134 #line 1282 "upb/json/parser.rl"
11135 p->current_state = cs;
11136 p->parser_top = top;
11137 accumulate_clear(p);
11138 p->multipart_state = MULTIPART_INACTIVE;
11139 p->capture = NULL;
11140 p->accumulated = NULL;
11141 upb_status_clear(&p->status);
11142 }
11143
11144
11145 /* Public API *****************************************************************/
11146
11147 upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
11148 #ifndef NDEBUG
11149 const size_t size_before = upb_env_bytesallocated(env);
11150 #endif
11151 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11152 if (!p) return false;
11153
11154 p->env = env;
11155 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11156 p->accumulate_buf = NULL;
11157 p->accumulate_buf_size = 0;
11158 upb_byteshandler_init(&p->input_handler_);
11159 upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
11160 upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
11161 upb_bytessink_reset(&p->input_, &p->input_handler_, p);
11162
11163 json_parser_reset(p);
11164 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11165 p->top->m = upb_handlers_msgdef(output->handlers);
11166
11167 /* If this fails, uncomment and increase the value in parser.h. */
11168 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11169 assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11170 return p;
11171 }
11172
11173 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11174 return &p->input_;
11175 }
11176 /*
11177 ** This currently uses snprintf() to format primitives, and could be optimized
11178 ** further.
11179 */
11180
11181
11182 #include <stdlib.h>
11183 #include <stdio.h>
11184 #include <string.h>
11185 #include <stdint.h>
11186
11187 struct upb_json_printer {
11188 upb_sink input_;
11189 /* BytesSink closure. */
11190 void *subc_;
11191 upb_bytessink *output_;
11192
11193 /* We track the depth so that we know when to emit startstr/endstr on the
11194 * output. */
11195 int depth_;
11196
11197 /* Have we emitted the first element? This state is necessary to emit commas
11198 * without leaving a trailing comma in arrays/maps. We keep this state per
11199 * frame depth.
11200 *
11201 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11202 * We count frames (contexts in which we separate elements by commas) as both
11203 * repeated fields and messages (maps), and the worst case is a
11204 * message->repeated field->submessage->repeated field->... nesting. */
11205 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11206 };
11207
11208 /* StringPiece; a pointer plus a length. */
11209 typedef struct {
11210 const char *ptr;
11211 size_t len;
11212 } strpc;
11213
11214 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
11215 strpc *ret = malloc(sizeof(*ret));
11216 ret->ptr = upb_fielddef_name(f);
11217 ret->len = strlen(ret->ptr);
11218 upb_handlers_addcleanup(h, ret, free);
11219 return ret;
11220 }
11221
11222 /* ------------ JSON string printing: values, maps, arrays ------------------ */
11223
11224 static void print_data(
11225 upb_json_printer *p, const char *buf, unsigned int len) {
11226 /* TODO: Will need to change if we support pushback from the sink. */
11227 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
11228 UPB_ASSERT_VAR(n, n == len);
11229 }
11230
11231 static void print_comma(upb_json_printer *p) {
11232 if (!p->first_elem_[p->depth_]) {
11233 print_data(p, ",", 1);
11234 }
11235 p->first_elem_[p->depth_] = false;
11236 }
11237
11238 /* Helpers that print properly formatted elements to the JSON output stream. */
11239
11240 /* Used for escaping control chars in strings. */
11241 static const char kControlCharLimit = 0x20;
11242
11243 UPB_INLINE bool is_json_escaped(char c) {
11244 /* See RFC 4627. */
11245 unsigned char uc = (unsigned char)c;
11246 return uc < kControlCharLimit || uc == '"' || uc == '\\';
11247 }
11248
11249 UPB_INLINE char* json_nice_escape(char c) {
11250 switch (c) {
11251 case '"': return "\\\"";
11252 case '\\': return "\\\\";
11253 case '\b': return "\\b";
11254 case '\f': return "\\f";
11255 case '\n': return "\\n";
11256 case '\r': return "\\r";
11257 case '\t': return "\\t";
11258 default: return NULL;
11259 }
11260 }
11261
11262 /* Write a properly escaped string chunk. The surrounding quotes are *not*
11263 * printed; this is so that the caller has the option of emitting the string
11264 * content in chunks. */
11265 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
11266 const char* unescaped_run = NULL;
11267 unsigned int i;
11268 for (i = 0; i < len; i++) {
11269 char c = buf[i];
11270 /* Handle escaping. */
11271 if (is_json_escaped(c)) {
11272 /* Use a "nice" escape, like \n, if one exists for this character. */
11273 const char* escape = json_nice_escape(c);
11274 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
11275 * escape. */
11276 char escape_buf[8];
11277 if (!escape) {
11278 unsigned char byte = (unsigned char)c;
11279 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
11280 escape = escape_buf;
11281 }
11282
11283 /* N.B. that we assume that the input encoding is equal to the output
11284 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
11285 * can simply pass the bytes through. */
11286
11287 /* If there's a current run of unescaped chars, print that run first. */
11288 if (unescaped_run) {
11289 print_data(p, unescaped_run, &buf[i] - unescaped_run);
11290 unescaped_run = NULL;
11291 }
11292 /* Then print the escape code. */
11293 print_data(p, escape, strlen(escape));
11294 } else {
11295 /* Add to the current unescaped run of characters. */
11296 if (unescaped_run == NULL) {
11297 unescaped_run = &buf[i];
11298 }
11299 }
11300 }
11301
11302 /* If the string ended in a run of unescaped characters, print that last run. */
11303 if (unescaped_run) {
11304 print_data(p, unescaped_run, &buf[len] - unescaped_run);
11305 }
11306 }
11307
11308 #define CHKLENGTH(x) if (!(x)) return -1;
11309
11310 /* Helpers that format floating point values according to our custom formats.
11311 * Right now we use %.8g and %.17g for float/double, respectively, to match
11312 * proto2::util::JsonFormat's defaults. May want to change this later. */
11313
11314 static size_t fmt_double(double val, char* buf, size_t length) {
11315 size_t n = _upb_snprintf(buf, length, "%.17g", val);
11316 CHKLENGTH(n > 0 && n < length);
11317 return n;
11318 }
11319
11320 static size_t fmt_float(float val, char* buf, size_t length) {
11321 size_t n = _upb_snprintf(buf, length, "%.8g", val);
11322 CHKLENGTH(n > 0 && n < length);
11323 return n;
11324 }
11325
11326 static size_t fmt_bool(bool val, char* buf, size_t length) {
11327 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
11328 CHKLENGTH(n > 0 && n < length);
11329 return n;
11330 }
11331
11332 static size_t fmt_int64(long val, char* buf, size_t length) {
11333 size_t n = _upb_snprintf(buf, length, "%ld", val);
11334 CHKLENGTH(n > 0 && n < length);
11335 return n;
11336 }
11337
11338 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
11339 size_t n = _upb_snprintf(buf, length, "%llu", val);
11340 CHKLENGTH(n > 0 && n < length);
11341 return n;
11342 }
11343
11344 /* Print a map key given a field name. Called by scalar field handlers and by
11345 * startseq for repeated fields. */
11346 static bool putkey(void *closure, const void *handler_data) {
11347 upb_json_printer *p = closure;
11348 const strpc *key = handler_data;
11349 print_comma(p);
11350 print_data(p, "\"", 1);
11351 putstring(p, key->ptr, key->len);
11352 print_data(p, "\":", 2);
11353 return true;
11354 }
11355
11356 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
11357 #define CHK(val) if (!(val)) return false;
11358
11359 #define TYPE_HANDLERS(type, fmt_func) \
11360 static bool put##type(void *closure, const void *handler_data, type val) { \
11361 upb_json_printer *p = closure; \
11362 char data[64]; \
11363 size_t length = fmt_func(val, data, sizeof(data)); \
11364 UPB_UNUSED(handler_data); \
11365 CHKFMT(length); \
11366 print_data(p, data, length); \
11367 return true; \
11368 } \
11369 static bool scalar_##type(void *closure, const void *handler_data, \
11370 type val) { \
11371 CHK(putkey(closure, handler_data)); \
11372 CHK(put##type(closure, handler_data, val)); \
11373 return true; \
11374 } \
11375 static bool repeated_##type(void *closure, const void *handler_data, \
11376 type val) { \
11377 upb_json_printer *p = closure; \
11378 print_comma(p); \
11379 CHK(put##type(closure, handler_data, val)); \
11380 return true; \
11381 }
11382
11383 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
11384 static bool putmapkey_##type(void *closure, const void *handler_data, \
11385 type val) { \
11386 upb_json_printer *p = closure; \
11387 print_data(p, "\"", 1); \
11388 CHK(put##type(closure, handler_data, val)); \
11389 print_data(p, "\":", 2); \
11390 return true; \
11391 }
11392
11393 TYPE_HANDLERS(double, fmt_double)
11394 TYPE_HANDLERS(float, fmt_float)
11395 TYPE_HANDLERS(bool, fmt_bool)
11396 TYPE_HANDLERS(int32_t, fmt_int64)
11397 TYPE_HANDLERS(uint32_t, fmt_int64)
11398 TYPE_HANDLERS(int64_t, fmt_int64)
11399 TYPE_HANDLERS(uint64_t, fmt_uint64)
11400
11401 /* double and float are not allowed to be map keys. */
11402 TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
11403 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
11404 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
11405 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
11406 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
11407
11408 #undef TYPE_HANDLERS
11409 #undef TYPE_HANDLERS_MAPKEY
11410
11411 typedef struct {
11412 void *keyname;
11413 const upb_enumdef *enumdef;
11414 } EnumHandlerData;
11415
11416 static bool scalar_enum(void *closure, const void *handler_data,
11417 int32_t val) {
11418 const EnumHandlerData *hd = handler_data;
11419 upb_json_printer *p = closure;
11420 const char *symbolic_name;
11421
11422 CHK(putkey(closure, hd->keyname));
11423
11424 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
11425 if (symbolic_name) {
11426 print_data(p, "\"", 1);
11427 putstring(p, symbolic_name, strlen(symbolic_name));
11428 print_data(p, "\"", 1);
11429 } else {
11430 putint32_t(closure, NULL, val);
11431 }
11432
11433 return true;
11434 }
11435
11436 static void print_enum_symbolic_name(upb_json_printer *p,
11437 const upb_enumdef *def,
11438 int32_t val) {
11439 const char *symbolic_name = upb_enumdef_iton(def, val);
11440 if (symbolic_name) {
11441 print_data(p, "\"", 1);
11442 putstring(p, symbolic_name, strlen(symbolic_name));
11443 print_data(p, "\"", 1);
11444 } else {
11445 putint32_t(p, NULL, val);
11446 }
11447 }
11448
11449 static bool repeated_enum(void *closure, const void *handler_data,
11450 int32_t val) {
11451 const EnumHandlerData *hd = handler_data;
11452 upb_json_printer *p = closure;
11453 print_comma(p);
11454
11455 print_enum_symbolic_name(p, hd->enumdef, val);
11456
11457 return true;
11458 }
11459
11460 static bool mapvalue_enum(void *closure, const void *handler_data,
11461 int32_t val) {
11462 const EnumHandlerData *hd = handler_data;
11463 upb_json_printer *p = closure;
11464
11465 print_enum_symbolic_name(p, hd->enumdef, val);
11466
11467 return true;
11468 }
11469
11470 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
11471 return putkey(closure, handler_data) ? closure : UPB_BREAK;
11472 }
11473
11474 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
11475 upb_json_printer *p = closure;
11476 UPB_UNUSED(handler_data);
11477 print_comma(p);
11478 return closure;
11479 }
11480
11481 static void start_frame(upb_json_printer *p) {
11482 p->depth_++;
11483 p->first_elem_[p->depth_] = true;
11484 print_data(p, "{", 1);
11485 }
11486
11487 static void end_frame(upb_json_printer *p) {
11488 print_data(p, "}", 1);
11489 p->depth_--;
11490 }
11491
11492 static bool printer_startmsg(void *closure, const void *handler_data) {
11493 upb_json_printer *p = closure;
11494 UPB_UNUSED(handler_data);
11495 if (p->depth_ == 0) {
11496 upb_bytessink_start(p->output_, 0, &p->subc_);
11497 }
11498 start_frame(p);
11499 return true;
11500 }
11501
11502 static bool printer_endmsg(void *closure, const void *handler_data, upb_status * s) {
11503 upb_json_printer *p = closure;
11504 UPB_UNUSED(handler_data);
11505 UPB_UNUSED(s);
11506 end_frame(p);
11507 if (p->depth_ == 0) {
11508 upb_bytessink_end(p->output_);
11509 }
11510 return true;
11511 }
11512
11513 static void *startseq(void *closure, const void *handler_data) {
11514 upb_json_printer *p = closure;
11515 CHK(putkey(closure, handler_data));
11516 p->depth_++;
11517 p->first_elem_[p->depth_] = true;
11518 print_data(p, "[", 1);
11519 return closure;
11520 }
11521
11522 static bool endseq(void *closure, const void *handler_data) {
11523 upb_json_printer *p = closure;
11524 UPB_UNUSED(handler_data);
11525 print_data(p, "]", 1);
11526 p->depth_--;
11527 return true;
11528 }
11529
11530 static void *startmap(void *closure, const void *handler_data) {
11531 upb_json_printer *p = closure;
11532 CHK(putkey(closure, handler_data));
11533 p->depth_++;
11534 p->first_elem_[p->depth_] = true;
11535 print_data(p, "{", 1);
11536 return closure;
11537 }
11538
11539 static bool endmap(void *closure, const void *handler_data) {
11540 upb_json_printer *p = closure;
11541 UPB_UNUSED(handler_data);
11542 print_data(p, "}", 1);
11543 p->depth_--;
11544 return true;
11545 }
11546
11547 static size_t putstr(void *closure, const void *handler_data, const char *str,
11548 size_t len, const upb_bufhandle *handle) {
11549 upb_json_printer *p = closure;
11550 UPB_UNUSED(handler_data);
11551 UPB_UNUSED(handle);
11552 putstring(p, str, len);
11553 return len;
11554 }
11555
11556 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
11557 static size_t putbytes(void *closure, const void *handler_data, const char *str,
11558 size_t len, const upb_bufhandle *handle) {
11559 upb_json_printer *p = closure;
11560
11561 /* This is the regular base64, not the "web-safe" version. */
11562 static const char base64[] =
11563 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11564
11565 /* Base64-encode. */
11566 char data[16000];
11567 const char *limit = data + sizeof(data);
11568 const unsigned char *from = (const unsigned char*)str;
11569 char *to = data;
11570 size_t remaining = len;
11571 size_t bytes;
11572
11573 UPB_UNUSED(handler_data);
11574 UPB_UNUSED(handle);
11575
11576 while (remaining > 2) {
11577 /* TODO(haberman): handle encoded lengths > sizeof(data) */
11578 UPB_ASSERT_VAR(limit, (limit - to) >= 4);
11579
11580 to[0] = base64[from[0] >> 2];
11581 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11582 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
11583 to[3] = base64[from[2] & 0x3f];
11584
11585 remaining -= 3;
11586 to += 4;
11587 from += 3;
11588 }
11589
11590 switch (remaining) {
11591 case 2:
11592 to[0] = base64[from[0] >> 2];
11593 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11594 to[2] = base64[(from[1] & 0xf) << 2];
11595 to[3] = '=';
11596 to += 4;
11597 from += 2;
11598 break;
11599 case 1:
11600 to[0] = base64[from[0] >> 2];
11601 to[1] = base64[((from[0] & 0x3) << 4)];
11602 to[2] = '=';
11603 to[3] = '=';
11604 to += 4;
11605 from += 1;
11606 break;
11607 }
11608
11609 bytes = to - data;
11610 print_data(p, "\"", 1);
11611 putstring(p, data, bytes);
11612 print_data(p, "\"", 1);
11613 return len;
11614 }
11615
11616 static void *scalar_startstr(void *closure, const void *handler_data,
11617 size_t size_hint) {
11618 upb_json_printer *p = closure;
11619 UPB_UNUSED(handler_data);
11620 UPB_UNUSED(size_hint);
11621 CHK(putkey(closure, handler_data));
11622 print_data(p, "\"", 1);
11623 return p;
11624 }
11625
11626 static size_t scalar_str(void *closure, const void *handler_data,
11627 const char *str, size_t len,
11628 const upb_bufhandle *handle) {
11629 CHK(putstr(closure, handler_data, str, len, handle));
11630 return len;
11631 }
11632
11633 static bool scalar_endstr(void *closure, const void *handler_data) {
11634 upb_json_printer *p = closure;
11635 UPB_UNUSED(handler_data);
11636 print_data(p, "\"", 1);
11637 return true;
11638 }
11639
11640 static void *repeated_startstr(void *closure, const void *handler_data,
11641 size_t size_hint) {
11642 upb_json_printer *p = closure;
11643 UPB_UNUSED(handler_data);
11644 UPB_UNUSED(size_hint);
11645 print_comma(p);
11646 print_data(p, "\"", 1);
11647 return p;
11648 }
11649
11650 static size_t repeated_str(void *closure, const void *handler_data,
11651 const char *str, size_t len,
11652 const upb_bufhandle *handle) {
11653 CHK(putstr(closure, handler_data, str, len, handle));
11654 return len;
11655 }
11656
11657 static bool repeated_endstr(void *closure, const void *handler_data) {
11658 upb_json_printer *p = closure;
11659 UPB_UNUSED(handler_data);
11660 print_data(p, "\"", 1);
11661 return true;
11662 }
11663
11664 static void *mapkeyval_startstr(void *closure, const void *handler_data,
11665 size_t size_hint) {
11666 upb_json_printer *p = closure;
11667 UPB_UNUSED(handler_data);
11668 UPB_UNUSED(size_hint);
11669 print_data(p, "\"", 1);
11670 return p;
11671 }
11672
11673 static size_t mapkey_str(void *closure, const void *handler_data,
11674 const char *str, size_t len,
11675 const upb_bufhandle *handle) {
11676 CHK(putstr(closure, handler_data, str, len, handle));
11677 return len;
11678 }
11679
11680 static bool mapkey_endstr(void *closure, const void *handler_data) {
11681 upb_json_printer *p = closure;
11682 UPB_UNUSED(handler_data);
11683 print_data(p, "\":", 2);
11684 return true;
11685 }
11686
11687 static bool mapvalue_endstr(void *closure, const void *handler_data) {
11688 upb_json_printer *p = closure;
11689 UPB_UNUSED(handler_data);
11690 print_data(p, "\"", 1);
11691 return true;
11692 }
11693
11694 static size_t scalar_bytes(void *closure, const void *handler_data,
11695 const char *str, size_t len,
11696 const upb_bufhandle *handle) {
11697 CHK(putkey(closure, handler_data));
11698 CHK(putbytes(closure, handler_data, str, len, handle));
11699 return len;
11700 }
11701
11702 static size_t repeated_bytes(void *closure, const void *handler_data,
11703 const char *str, size_t len,
11704 const upb_bufhandle *handle) {
11705 upb_json_printer *p = closure;
11706 print_comma(p);
11707 CHK(putbytes(closure, handler_data, str, len, handle));
11708 return len;
11709 }
11710
11711 static size_t mapkey_bytes(void *closure, const void *handler_data,
11712 const char *str, size_t len,
11713 const upb_bufhandle *handle) {
11714 upb_json_printer *p = closure;
11715 CHK(putbytes(closure, handler_data, str, len, handle));
11716 print_data(p, ":", 1);
11717 return len;
11718 }
11719
11720 static void set_enum_hd(upb_handlers *h,
11721 const upb_fielddef *f,
11722 upb_handlerattr *attr) {
11723 EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
11724 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
11725 hd->keyname = newstrpc(h, f);
11726 upb_handlers_addcleanup(h, hd, free);
11727 upb_handlerattr_sethandlerdata(attr, hd);
11728 }
11729
11730 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
11731 * in a map).
11732 *
11733 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
11734 * key or value cases properly. The right way to do this is to allocate a
11735 * temporary structure at the start of a mapentry submessage, store key and
11736 * value data in it as key and value handlers are called, and then print the
11737 * key/value pair once at the end of the submessage. If we don't do this, we
11738 * should at least detect the case and throw an error. However, so far all of
11739 * our sources that emit mapentry messages do so canonically (with one key
11740 * field, and then one value field), so this is not a pressing concern at the
11741 * moment. */
11742 void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
11743 const upb_msgdef *md = upb_handlers_msgdef(h);
11744
11745 /* A mapentry message is printed simply as '"key": value'. Rather than
11746 * special-case key and value for every type below, we just handle both
11747 * fields explicitly here. */
11748 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
11749 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
11750
11751 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11752
11753 UPB_UNUSED(closure);
11754
11755 switch (upb_fielddef_type(key_field)) {
11756 case UPB_TYPE_INT32:
11757 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
11758 break;
11759 case UPB_TYPE_INT64:
11760 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
11761 break;
11762 case UPB_TYPE_UINT32:
11763 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
11764 break;
11765 case UPB_TYPE_UINT64:
11766 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
11767 break;
11768 case UPB_TYPE_BOOL:
11769 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
11770 break;
11771 case UPB_TYPE_STRING:
11772 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
11773 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
11774 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
11775 break;
11776 case UPB_TYPE_BYTES:
11777 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
11778 break;
11779 default:
11780 assert(false);
11781 break;
11782 }
11783
11784 switch (upb_fielddef_type(value_field)) {
11785 case UPB_TYPE_INT32:
11786 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
11787 break;
11788 case UPB_TYPE_INT64:
11789 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
11790 break;
11791 case UPB_TYPE_UINT32:
11792 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
11793 break;
11794 case UPB_TYPE_UINT64:
11795 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
11796 break;
11797 case UPB_TYPE_BOOL:
11798 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
11799 break;
11800 case UPB_TYPE_FLOAT:
11801 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
11802 break;
11803 case UPB_TYPE_DOUBLE:
11804 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
11805 break;
11806 case UPB_TYPE_STRING:
11807 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
11808 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
11809 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
11810 break;
11811 case UPB_TYPE_BYTES:
11812 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
11813 break;
11814 case UPB_TYPE_ENUM: {
11815 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11816 set_enum_hd(h, value_field, &enum_attr);
11817 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
11818 upb_handlerattr_uninit(&enum_attr);
11819 break;
11820 }
11821 case UPB_TYPE_MESSAGE:
11822 /* No handler necessary -- the submsg handlers will print the message
11823 * as appropriate. */
11824 break;
11825 }
11826
11827 upb_handlerattr_uninit(&empty_attr);
11828 }
11829
11830 void printer_sethandlers(const void *closure, upb_handlers *h) {
11831 const upb_msgdef *md = upb_handlers_msgdef(h);
11832 bool is_mapentry = upb_msgdef_mapentry(md);
11833 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11834 upb_msg_field_iter i;
11835
11836 UPB_UNUSED(closure);
11837
11838 if (is_mapentry) {
11839 /* mapentry messages are sufficiently different that we handle them
11840 * separately. */
11841 printer_sethandlers_mapentry(closure, h);
11842 return;
11843 }
11844
11845 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
11846 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
11847
11848 #define TYPE(type, name, ctype) \
11849 case type: \
11850 if (upb_fielddef_isseq(f)) { \
11851 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
11852 } else { \
11853 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
11854 } \
11855 break;
11856
11857 upb_msg_field_begin(&i, md);
11858 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
11859 const upb_fielddef *f = upb_msg_iter_field(&i);
11860
11861 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
11862 upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
11863
11864 if (upb_fielddef_ismap(f)) {
11865 upb_handlers_setstartseq(h, f, startmap, &name_attr);
11866 upb_handlers_setendseq(h, f, endmap, &name_attr);
11867 } else if (upb_fielddef_isseq(f)) {
11868 upb_handlers_setstartseq(h, f, startseq, &name_attr);
11869 upb_handlers_setendseq(h, f, endseq, &empty_attr);
11870 }
11871
11872 switch (upb_fielddef_type(f)) {
11873 TYPE(UPB_TYPE_FLOAT, float, float);
11874 TYPE(UPB_TYPE_DOUBLE, double, double);
11875 TYPE(UPB_TYPE_BOOL, bool, bool);
11876 TYPE(UPB_TYPE_INT32, int32, int32_t);
11877 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
11878 TYPE(UPB_TYPE_INT64, int64, int64_t);
11879 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
11880 case UPB_TYPE_ENUM: {
11881 /* For now, we always emit symbolic names for enums. We may want an
11882 * option later to control this behavior, but we will wait for a real
11883 * need first. */
11884 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11885 set_enum_hd(h, f, &enum_attr);
11886
11887 if (upb_fielddef_isseq(f)) {
11888 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
11889 } else {
11890 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
11891 }
11892
11893 upb_handlerattr_uninit(&enum_attr);
11894 break;
11895 }
11896 case UPB_TYPE_STRING:
11897 if (upb_fielddef_isseq(f)) {
11898 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
11899 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
11900 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
11901 } else {
11902 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
11903 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
11904 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
11905 }
11906 break;
11907 case UPB_TYPE_BYTES:
11908 /* XXX: this doesn't support strings that span buffers yet. The base64
11909 * encoder will need to be made resumable for this to work properly. */
11910 if (upb_fielddef_isseq(f)) {
11911 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
11912 } else {
11913 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
11914 }
11915 break;
11916 case UPB_TYPE_MESSAGE:
11917 if (upb_fielddef_isseq(f)) {
11918 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
11919 } else {
11920 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
11921 }
11922 break;
11923 }
11924
11925 upb_handlerattr_uninit(&name_attr);
11926 }
11927
11928 upb_handlerattr_uninit(&empty_attr);
11929 #undef TYPE
11930 }
11931
11932 static void json_printer_reset(upb_json_printer *p) {
11933 p->depth_ = 0;
11934 }
11935
11936
11937 /* Public API *****************************************************************/
11938
11939 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11940 upb_bytessink *output) {
11941 #ifndef NDEBUG
11942 size_t size_before = upb_env_bytesallocated(e);
11943 #endif
11944
11945 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11946 if (!p) return NULL;
11947
11948 p->output_ = output;
11949 json_printer_reset(p);
11950 upb_sink_reset(&p->input_, h, p);
11951
11952 /* If this fails, increase the value in printer.h. */
11953 assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11954 return p;
11955 }
11956
11957 upb_sink *upb_json_printer_input(upb_json_printer *p) {
11958 return &p->input_;
11959 }
11960
11961 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
11962 const void *owner) {
11963 return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
11964 }
OLDNEW
« no previous file with comments | « third_party/protobuf/ruby/ext/google/protobuf_c/upb.h ('k') | third_party/protobuf/ruby/google-protobuf.gemspec » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698