OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2011 The Native Client Authors. All rights reserved. | |
3 * Use of this source code is governed by a BSD-style license that can be | |
4 * found in the LICENSE file. | |
5 */ | |
6 | |
7 /* | |
8 * ncdis.c - disassemble using NaCl decoder. | |
9 * Mostly for testing. | |
10 */ | |
11 | |
12 | |
13 #ifndef NACL_TRUSTED_BUT_NOT_TCB | |
14 #error("This file is not meant for use in the TCB") | |
15 #endif | |
16 | |
17 #include <errno.h> | |
18 #include <stdarg.h> | |
19 #include <stdio.h> | |
20 #include <stdlib.h> | |
21 #include <string.h> | |
22 | |
23 #include "native_client/src/shared/gio/gio.h" | |
24 #include "native_client/src/shared/utils/types.h" | |
25 #include "native_client/src/shared/utils/flags.h" | |
26 #include "native_client/src/shared/platform/nacl_log.h" | |
27 #include "native_client/src/trusted/validator/ncfileutil.h" | |
28 #include "native_client/src/trusted/validator/x86/decoder/nc_inst_state.h" | |
29 #include "native_client/src/trusted/validator/x86/decoder/ncopcode_desc.h" | |
30 #include "native_client/src/trusted/validator/x86/decoder/nc_decode_tables.h" | |
31 #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncdecode_verbose
.h" | |
32 #include "native_client/src/trusted/validator/x86/ncval_seg_sfi/ncvalidate_inter
naltypes.h" | |
33 #include "native_client/src/trusted/validator_x86/nc_read_segment.h" | |
34 #include "native_client/src/trusted/validator_x86/ncdis_segments.h" | |
35 | |
36 /* True if we should use the full decoder when decoding. */ | |
37 /* TODO(karl): When the full_decoder is working for both the x86-32 and | |
38 * x86-64 platforms, change to use full decoder for both as default. | |
39 */ | |
40 static Bool NACL_FLAGS_full_decoder = | |
41 #if NACL_TARGET_SUBARCH == 64 | |
42 TRUE | |
43 #else | |
44 FALSE | |
45 #endif | |
46 ; | |
47 | |
48 /* True if we should use the validator decoder when decoding. */ | |
49 static Bool NACL_FLAGS_validator_decoder = | |
50 #if NACL_TARGET_SUBARCH == 64 | |
51 FALSE | |
52 #else | |
53 TRUE | |
54 #endif | |
55 ; | |
56 | |
57 /* True if we should print internal representations while decoding. */ | |
58 static Bool NACL_FLAGS_internal = FALSE; | |
59 | |
60 /* The name of the executable that is being run. */ | |
61 static const char* exec_name = "???"; | |
62 | |
63 static void Fatal(const char *fmt, ...) { | |
64 FILE* fp = stdout; | |
65 va_list ap; | |
66 fprintf(fp, "Fatal: "); | |
67 va_start(ap, fmt); | |
68 vfprintf(fp, fmt, ap); | |
69 va_end(ap); | |
70 exit(-1); | |
71 } | |
72 | |
73 void Info(const char *fmt, ...) { | |
74 FILE* fp = stdout; | |
75 va_list ap; | |
76 fprintf(fp, "Info: "); | |
77 va_start(ap, fmt); | |
78 vfprintf(fp, fmt, ap); | |
79 va_end(ap); | |
80 } | |
81 | |
82 static void usage(void) { | |
83 fprintf(stderr, | |
84 "usage: ncdis [options] [file]\n" | |
85 "\n" | |
86 "Options are:\n" | |
87 "--commands=<file>\n" | |
88 "\tAdditional command line arguments are specified in the given\n" | |
89 "\tfile ('#' acts as a comment character). Use '-' as its value to\n" | |
90 "\tredirect command line arguments from standard input.\n" | |
91 "--full_decoder\n" | |
92 "\tDisassemble the elf executable using native client's\n" | |
93 "\tfull decoder.\n" | |
94 "--help\n" | |
95 "\tPrint out this usage message\n" | |
96 "--hex_text=<file>\n" | |
97 "\tDefine code section as sequence of (textual) hexidecimal bytes\n" | |
98 "\tdefined in the given file. Lines beginning with '#' will be\n" | |
99 "\treated as comments. If the first non-comment line begins with\n" | |
100 "\t'@' the following hexidecimal number will be used as the\n" | |
101 "\tbeginning (RIP/EIP) instruction address of the code segment.\n" | |
102 "\tUse '-' as its value to redirect standard input as the\n" | |
103 "\ttext file to process.\n" | |
104 "-i=XXXX\n" | |
105 "\tXXXX specifies the sequence of hexidecimal digits that define\n" | |
106 "\tan instruction to be decoded.\n" | |
107 "--internal\n" | |
108 "\tFor the iterator model (only), prints out each the decoded\n" | |
109 "\tinstruction, followed by the internals for the matched\n" | |
110 "\tinstruction.\n" | |
111 "--pc=XXX\n" | |
112 "\tSet program counter (i.e. RIP or EIP) to XXX.\n" | |
113 "--self_document\n" | |
114 "\tProcess input hext_text file in such a way, that it also\n" | |
115 "\trepresents the output that will be generated by ncdis.\n" | |
116 "\tThat is, copy comment lines (i.e. lines beginning with\n" | |
117 "\t'#') to stdout. In addition, it assumes that each line\n" | |
118 "\tconsists of an '-i' command line argument (and possibly\n" | |
119 "\ta '--pc' command line argument, followed by a '#',\n" | |
120 "\tfollowed by the corresponding disassembled text. On such\n" | |
121 "\tlines, the input is copied up to (and including) the '#'.,\n" | |
122 "\tand then the disassembled instruction is printed.\n" | |
123 "--validator_decoder\n" | |
124 "\tDisassemble the file using the partial instruction decoder used\n" | |
125 "\tby the validator.\n" | |
126 ); | |
127 exit(1); | |
128 } | |
129 | |
130 /* Converts command line flags to corresponding disassemble flags. */ | |
131 static NaClDisassembleFlags NaClGetDisassembleFlags(void) { | |
132 NaClDisassembleFlags flags = 0; | |
133 if (NACL_FLAGS_validator_decoder) { | |
134 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleValidatorDecoder)); | |
135 } | |
136 if (NACL_FLAGS_full_decoder) { | |
137 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleFull)); | |
138 } | |
139 if (NACL_FLAGS_internal) { | |
140 NaClAddBits(flags, NACL_DISASSEMBLE_FLAG(NaClDisassembleAddInternals)); | |
141 } | |
142 return flags; | |
143 } | |
144 | |
145 static int AnalyzeSections(ncfile *ncf) { | |
146 int badsections = 0; | |
147 int ii; | |
148 const Elf_Shdr* shdr = ncf->sheaders; | |
149 | |
150 for (ii = 0; ii < ncf->shnum; ii++) { | |
151 Info("section %d sh_addr %x offset %x flags %x\n", | |
152 ii, (uint32_t)shdr[ii].sh_addr, | |
153 (uint32_t)shdr[ii].sh_offset, (uint32_t)shdr[ii].sh_flags); | |
154 if ((shdr[ii].sh_flags & SHF_EXECINSTR) != SHF_EXECINSTR) | |
155 continue; | |
156 Info("parsing section %d\n", ii); | |
157 NaClDisassembleSegment(ncf->data + (shdr[ii].sh_addr - ncf->vbase), | |
158 shdr[ii].sh_addr, shdr[ii].sh_size, | |
159 NaClGetDisassembleFlags()); | |
160 } | |
161 return -badsections; | |
162 } | |
163 | |
164 static void AnalyzeCodeSegments(ncfile *ncf, const char *fname) { | |
165 if (AnalyzeSections(ncf) < 0) { | |
166 fprintf(stderr, "%s: text validate failed\n", fname); | |
167 } | |
168 } | |
169 | |
170 /* Capture a sequence of bytes defining an instruction (up to a | |
171 * MAX_BYTES_PER_X86_INSTRUCTION). This sequence is used to run | |
172 * a (debug) test of the disassembler. | |
173 */ | |
174 static uint8_t FLAGS_decode_instruction[NACL_MAX_BYTES_PER_X86_INSTRUCTION]; | |
175 | |
176 /* Define the number of bytes supplied for a debug instruction. */ | |
177 static int FLAGS_decode_instruction_size = 0; | |
178 | |
179 /* Flag defining the value of the pc to use when decoding an instruction | |
180 * through decode_instruction. | |
181 */ | |
182 static uint32_t FLAGS_decode_pc = 0; | |
183 | |
184 /* Flag defining an input file to use as command line arguments | |
185 * (one per input line). When specified, run the disassembler | |
186 * on each command line. The empty string "" denotes that no command | |
187 * line file was specified. A dash ("-") denotes that standard input | |
188 * should be used to get command line arguments. | |
189 */ | |
190 static char* FLAGS_commands = ""; | |
191 | |
192 /* Flag defining the name of a hex text to be used as the code segment. Assumes | |
193 * that the pc associated with the code segment is defined by | |
194 * FLAGS_decode_pc. | |
195 */ | |
196 static char* FLAGS_hex_text = ""; | |
197 | |
198 /* Flag, when used in combination with the commands flag, will turn | |
199 * on input copy rules, making the genrated output contain comments | |
200 * and the command line arguments as part of the corresponding | |
201 * generated output. For more details on this, see ProcessInputFile | |
202 * below. | |
203 */ | |
204 static Bool FLAGS_self_document = FALSE; | |
205 | |
206 /* | |
207 * Store default values of flags on the first call. On subsequent | |
208 * calls, resets the flags to the default value. | |
209 * | |
210 * *WARNING* In order for this to work, this function must be | |
211 * called before GrokFlags | |
212 * | |
213 * NOTE: we only allow the specification of -use_iter at the top-level | |
214 * command line.. | |
215 */ | |
216 static void ResetFlags(void) { | |
217 int i; | |
218 static uint32_t DEFAULT_decode_pc; | |
219 static char* DEFAULT_commands; | |
220 static Bool DEFAULT_self_document; | |
221 static Bool is_first_call = TRUE; | |
222 if (is_first_call) { | |
223 DEFAULT_decode_pc = FLAGS_decode_pc; | |
224 DEFAULT_commands = FLAGS_commands; | |
225 DEFAULT_self_document = FLAGS_self_document; | |
226 is_first_call = FALSE; | |
227 } | |
228 | |
229 FLAGS_decode_pc = DEFAULT_decode_pc; | |
230 FLAGS_commands = DEFAULT_commands; | |
231 FLAGS_self_document = DEFAULT_self_document; | |
232 /* Always clear the decode instruction. */ | |
233 FLAGS_decode_instruction_size = 0; | |
234 for (i = 0; i < NACL_MAX_BYTES_PER_X86_INSTRUCTION; ++i) { | |
235 FLAGS_decode_instruction[i] = 0; | |
236 } | |
237 } | |
238 | |
239 /* Returns true if all characters in the string are zero. */ | |
240 static Bool IsZero(const char* arg) { | |
241 while (*arg) { | |
242 if ('0' != *arg) { | |
243 return FALSE; | |
244 } | |
245 ++arg; | |
246 } | |
247 return TRUE; | |
248 } | |
249 | |
250 uint8_t HexToByte(const char* hex_value) { | |
251 unsigned long value = strtoul(hex_value, NULL, 16); | |
252 /* Verify that arg is all zeros when zero is returned. Otherwise, | |
253 * assume that the zero value was due to an error. | |
254 */ | |
255 if (0L == value && !IsZero(hex_value)) { | |
256 Fatal("-i option specifies illegal hex value '%s'\n", hex_value); | |
257 } | |
258 return (uint8_t) value; | |
259 } | |
260 | |
261 /* Recognizes flags in argv, processes them, and then removes them. | |
262 * Returns the updated value for argc. | |
263 */ | |
264 int GrokFlags(int argc, const char *argv[]) { | |
265 int i; | |
266 int new_argc; | |
267 char* hex_instruction; | |
268 Bool help = FALSE; | |
269 if (argc == 0) return 0; | |
270 exec_name = argv[0]; | |
271 new_argc = 1; | |
272 for (i = 1; i < argc; ++i) { | |
273 const char* arg = argv[i]; | |
274 if (GrokUint32HexFlag("--pc", arg, &FLAGS_decode_pc) || | |
275 GrokCstringFlag("--commands", arg, &FLAGS_commands) || | |
276 GrokCstringFlag("--hex_text", arg, &FLAGS_hex_text) || | |
277 GrokBoolFlag("--self_document", arg, &FLAGS_self_document) || | |
278 GrokBoolFlag("--internal", arg, &NACL_FLAGS_internal) || | |
279 GrokBoolFlag("--help", arg, &help)) { | |
280 if (help) usage(); | |
281 } else if (GrokBoolFlag("--validator_decoder", arg, | |
282 &NACL_FLAGS_validator_decoder)) { | |
283 NACL_FLAGS_full_decoder = !NACL_FLAGS_validator_decoder; | |
284 } else if (GrokBoolFlag("--full_decoder", arg, | |
285 &NACL_FLAGS_full_decoder)) { | |
286 NACL_FLAGS_validator_decoder = !NACL_FLAGS_full_decoder; | |
287 } else if (GrokCstringFlag("-i", arg, &hex_instruction)) { | |
288 int i = 0; | |
289 char buffer[3]; | |
290 char* buf = &(hex_instruction[0]); | |
291 buffer[2] = '\0'; | |
292 while (*buf) { | |
293 buffer[i++] = *(buf++); | |
294 if (i == 2) { | |
295 uint8_t byte = HexToByte(buffer); | |
296 FLAGS_decode_instruction[FLAGS_decode_instruction_size++] = byte; | |
297 if (FLAGS_decode_instruction_size > | |
298 NACL_MAX_BYTES_PER_X86_INSTRUCTION) { | |
299 Fatal("-i=%s specifies too long of a hex value\n", hex_instruction); | |
300 } | |
301 i = 0; | |
302 } | |
303 } | |
304 if (i != 0) { | |
305 Fatal("-i=%s doesn't specify a sequence of bytes\n", hex_instruction); | |
306 } | |
307 } else { | |
308 argv[new_argc++] = argv[i]; | |
309 } | |
310 } | |
311 return new_argc; | |
312 } | |
313 | |
314 /* Process the command line arguments. */ | |
315 static const char* GrokArgv(int argc, const char* argv[]) { | |
316 if (argc != 2) { | |
317 Fatal("no filename specified\n"); | |
318 } | |
319 return argv[argc-1]; | |
320 } | |
321 | |
322 static void ProcessCommandLine(int argc, const char* argv[]); | |
323 | |
324 /* Defines the maximum number of characters allowed on an input line | |
325 * of the input text defined by the commands command line option. | |
326 */ | |
327 #define MAX_INPUT_LINE 4096 | |
328 | |
329 /* Defines the characters used as (token) separators to recognize command | |
330 * line arguments when processing lines of text in the text file specified | |
331 * by the commands command line option. | |
332 */ | |
333 #define CL_SEPARATORS " \t\n" | |
334 | |
335 /* Copies the text from the input line (which should be command line options), | |
336 * up to any trailing comments (i.e. the pound sign). | |
337 * input_line - The line of text to process. | |
338 * tokens - The extracted text from the input_line. | |
339 * max_length - The maximum length of input_line and tokens. | |
340 * | |
341 * Note: If input_line doesn't end with a null terminator, one is automatically | |
342 * inserted. | |
343 */ | |
344 static void CopyCommandLineTokens(char* input_line, | |
345 char* token_text, | |
346 size_t max_length) { | |
347 size_t i; | |
348 for (i = 0; i < max_length; ++i) { | |
349 char ch; | |
350 if (max_length == i + 1) { | |
351 /* Be sure we end the string with a null terminator. */ | |
352 input_line[i] = '\0'; | |
353 } | |
354 ch = input_line[i]; | |
355 token_text[i] = ch; | |
356 if (ch == '\0') return; | |
357 if (ch == '#') { | |
358 token_text[i] = '\0'; | |
359 return; | |
360 } | |
361 } | |
362 } | |
363 | |
364 /* Tokenize the given text to find command line arguments, and | |
365 * add them to the given list of command line arguments. | |
366 * | |
367 * *WARNING* This function will (destructively) modify the | |
368 * contents of token_text, by converting command line option | |
369 * separator characters into newlines. | |
370 */ | |
371 static void ExtractTokensAndAddToArgv( | |
372 char* token_text, | |
373 int* argc, | |
374 const char* argv[]) { | |
375 /* Note: Assume that each command line argument corresponds to | |
376 * non-blank text, which is a HACK, but should be sufficient for | |
377 * what we need. | |
378 */ | |
379 char* token = strtok(token_text, CL_SEPARATORS); | |
380 while (token != NULL) { | |
381 argv[(*argc)++] = token; | |
382 token = strtok(NULL, CL_SEPARATORS); | |
383 } | |
384 } | |
385 | |
386 /* Print out the contents of text, up to the first occurence of the | |
387 * pound sign. | |
388 */ | |
389 static void PrintUpToPound(const char text[]) { | |
390 int i; | |
391 struct Gio* g = NaClLogGetGio(); | |
392 for (i = 0; i < MAX_INPUT_LINE; ++i) { | |
393 char ch = text[i]; | |
394 switch (ch) { | |
395 case '#': | |
396 gprintf(g, "%c", ch); | |
397 return; | |
398 case '\0': | |
399 return; | |
400 default: | |
401 gprintf(g, "%c", ch); | |
402 break; | |
403 } | |
404 } | |
405 } | |
406 | |
407 /* Reads the given text file and processes the command line options specified | |
408 * inside of it. Each line specifies a separate sequence of command line | |
409 * arguments to process. | |
410 * | |
411 * Note: | |
412 * (a) The '#' is used as a comment delimiter. | |
413 * (b) whitespace lines are ignored. | |
414 * (c) If flag --self_document is specified, comment lines and whitespace | |
415 * lines will automatically be copied to stdout. In addition, command | |
416 * line arguments will be copied to stdout before processing them. | |
417 * Further, if the command line arguments are followed by a comment, | |
418 * only text up to (and including) the '#' will be copied. This allows | |
419 * the input file to contain the (hopefully single lined) output that | |
420 * would be generated by the given command line arguments. Therefore, | |
421 * if set up correctly, the output of the disassembler (in this case) | |
422 * should be the same as the input file (making it easy to use the | |
423 * input file as the the corresponding GOLD file to test against). | |
424 */ | |
425 static void ProcessInputFile(FILE* file) { | |
426 char input_line[MAX_INPUT_LINE]; | |
427 const Bool self_document = FLAGS_self_document; | |
428 while (fgets(input_line, MAX_INPUT_LINE, file) != NULL) { | |
429 char token_text[MAX_INPUT_LINE]; | |
430 const char* line_argv[MAX_INPUT_LINE]; | |
431 int line_argc = 0; | |
432 | |
433 /* Copy the input line (up to the first #) into token_text */ | |
434 CopyCommandLineTokens(input_line, token_text, MAX_INPUT_LINE); | |
435 | |
436 /* Tokenize the commands to build argv. | |
437 * Note: Since each token is separated by a blank, | |
438 * and the input is no more than MAX_INPUT_LINE, | |
439 * we know (without checking) that line_argc | |
440 * will not exceed MAX_INPUT_LINE. | |
441 */ | |
442 line_argv[line_argc++] = exec_name; | |
443 ExtractTokensAndAddToArgv(token_text, &line_argc, line_argv); | |
444 | |
445 /* Process the parsed input line. */ | |
446 if (1 == line_argc) { | |
447 /* No command line arguments. */ | |
448 if (self_document) { | |
449 printf("%s", input_line); | |
450 } | |
451 } else { | |
452 /* Process the tokenized command line. */ | |
453 if (self_document) { | |
454 PrintUpToPound(input_line); | |
455 } | |
456 ProcessCommandLine(line_argc, line_argv); | |
457 } | |
458 } | |
459 ResetFlags(); | |
460 } | |
461 | |
462 /* Run the disassembler using the given command line arguments. */ | |
463 static void ProcessCommandLine(int argc, const char* argv[]) { | |
464 int new_argc; | |
465 | |
466 ResetFlags(); | |
467 new_argc = GrokFlags(argc, argv); | |
468 if (FLAGS_decode_instruction_size > 0) { | |
469 /* Command line options specify an instruction to decode, run | |
470 * the disassembler on the instruction to print out the decoded | |
471 * results. | |
472 */ | |
473 if (new_argc > 1) { | |
474 Fatal("unrecognized option '%s'\n", argv[1]); | |
475 } | |
476 NaClDisassembleSegment(FLAGS_decode_instruction, FLAGS_decode_pc, | |
477 FLAGS_decode_instruction_size, | |
478 NaClGetDisassembleFlags()); | |
479 } else if (0 != strcmp(FLAGS_hex_text, "")) { | |
480 uint8_t bytes[MAX_INPUT_LINE]; | |
481 size_t num_bytes; | |
482 NaClPcAddress pc; | |
483 if (0 == strcmp(FLAGS_hex_text, "-")) { | |
484 num_bytes = NaClReadHexTextWithPc(stdin, &pc, bytes, MAX_INPUT_LINE); | |
485 NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes, | |
486 NaClGetDisassembleFlags()); | |
487 } else { | |
488 FILE* input = fopen(FLAGS_hex_text, "r"); | |
489 if (NULL == input) { | |
490 Fatal("Can't open hex text file: %s\n", FLAGS_hex_text); | |
491 } | |
492 num_bytes = NaClReadHexTextWithPc(input, &pc, bytes, MAX_INPUT_LINE); | |
493 fclose(input); | |
494 NaClDisassembleSegment(bytes, pc, (NaClMemorySize) num_bytes, | |
495 NaClGetDisassembleFlags()); | |
496 } | |
497 } else if (0 != strcmp(FLAGS_commands, "")) { | |
498 /* Use the given input file to find command line arguments, | |
499 * and process. | |
500 */ | |
501 if (0 == strcmp(FLAGS_commands, "-")) { | |
502 ProcessInputFile(stdin); | |
503 } else { | |
504 FILE* input = fopen(FLAGS_commands, "r"); | |
505 if (NULL == input) { | |
506 Fatal("Can't open commands file: %s\n", FLAGS_commands); | |
507 } | |
508 ProcessInputFile(input); | |
509 fclose(input); | |
510 } | |
511 } else { | |
512 /* Command line should specify an executable to disassemble. | |
513 * Read the file and disassemble it. | |
514 */ | |
515 ncfile *ncf; | |
516 const char* filename = GrokArgv(new_argc, argv); | |
517 | |
518 Info("processing %s", filename); | |
519 ncf = nc_loadfile_depending(filename, NULL); | |
520 if (ncf == NULL) { | |
521 Fatal("nc_loadfile(%s): %s\n", filename, strerror(errno)); | |
522 } | |
523 | |
524 AnalyzeCodeSegments(ncf, filename); | |
525 | |
526 nc_freefile(ncf); | |
527 } | |
528 } | |
529 | |
530 int main(int argc, const char *argv[]) { | |
531 struct GioFile gout_file; | |
532 struct Gio* gout = (struct Gio*) &gout_file; | |
533 if (!GioFileRefCtor(&gout_file, stdout)) { | |
534 fprintf(stderr, "Unable to create gio file for stdout!\n"); | |
535 return 1; | |
536 } | |
537 NaClLogModuleInitExtended(LOG_INFO, gout); | |
538 ProcessCommandLine(argc, argv); | |
539 NaClLogModuleFini(); | |
540 GioFileDtor(gout); | |
541 return 0; | |
542 } | |
OLD | NEW |