Chromium Code Reviews| Index: samples/pdfium_test.cc |
| diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc |
| index 5ee14642e4b7993f2d5de807f67f7427ca65ddfb..8276a70836556d11b78eed7364fa04ea4da25135 100644 |
| --- a/samples/pdfium_test.cc |
| +++ b/samples/pdfium_test.cc |
| @@ -42,6 +42,7 @@ |
| enum OutputFormat { |
| OUTPUT_NONE, |
| + OUTPUT_TEXT, |
| OUTPUT_PPM, |
| OUTPUT_PNG, |
| #ifdef _WIN32 |
| @@ -112,6 +113,37 @@ static void WritePpm(const char* pdf_name, int num, const void* buffer_void, |
| fclose(fp); |
| } |
| +void WriteText(FPDF_PAGE page, const char* pdf_name, int num) { |
| + char filename[256]; |
| + int chars_formatted = |
| + snprintf(filename, sizeof(filename), "%s.%d.txt", pdf_name, num); |
| + if (chars_formatted < 0 || |
| + static_cast<size_t>(chars_formatted) >= sizeof(filename)) { |
| + fprintf(stderr, "Filename %s is too long\n", filename); |
| + return; |
| + } |
| + |
| + FILE* fp = fopen(filename, "w"); |
| + if (!fp) { |
| + fprintf(stderr, "Failed to open %s for output\n", filename); |
| + return; |
| + } |
| + |
| + // Output in UTF32-LE. |
| + unsigned char bom[] = {0xFF, 0xFE, 0x00, 0x00}; |
|
hal.canary
2016/06/14 20:54:11
I would prefer:
uint32_t bom = 0x0000FEFF;
dsinclair
2016/06/14 21:00:59
Done.
|
| + fwrite(bom, sizeof(unsigned char), sizeof(bom), fp); |
| + |
| + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); |
| + for (int i = 0; i < FPDFText_CountChars(textpage); i++) { |
| + unsigned int c = FPDFText_GetUnicode(textpage, i); |
| + fwrite(&c, sizeof(unsigned int), 1, fp); |
| + } |
| + |
| + FPDFText_ClosePage(textpage); |
| + |
| + (void)fclose(fp); |
| +} |
| + |
| static void WritePng(const char* pdf_name, int num, const void* buffer_void, |
| int stride, int width, int height) { |
| if (!CheckDimensions(stride, width, height)) |
| @@ -354,6 +386,12 @@ bool ParseCommandLine(const std::vector<std::string>& args, |
| return false; |
| } |
| options->output_format = OUTPUT_PNG; |
| + } else if (cur_arg == "--txt") { |
| + if (options->output_format != OUTPUT_NONE) { |
| + fprintf(stderr, "Duplicate or conflicting --txt argument\n"); |
| + return false; |
| + } |
| + options->output_format = OUTPUT_TEXT; |
| #ifdef PDF_ENABLE_SKIA |
| } else if (cur_arg == "--skp") { |
| if (options->output_format != OUTPUT_NONE) { |
| @@ -528,6 +566,10 @@ bool RenderPage(const std::string& name, |
| WriteEmf(page, name.c_str(), page_index); |
| break; |
| #endif |
| + case OUTPUT_TEXT: |
| + WriteText(page, name.c_str(), page_index); |
| + break; |
| + |
| case OUTPUT_PNG: |
| WritePng(name.c_str(), page_index, buffer, stride, width, height); |
| break; |
| @@ -746,6 +788,7 @@ static const char usage_string[] = |
| " --bin-dir=<path> - override path to v8 external data\n" |
| " --font-dir=<path> - override path to external fonts\n" |
| " --scale=<number> - scale output size by number (e.g. 0.5)\n" |
| + " --txt - write page text in UTF32-LE <pdf-name.<page-number>.txt\n" |
| #ifdef _WIN32 |
| " --bmp - write page images <pdf-name>.<page-number>.bmp\n" |
| " --emf - write page meta files <pdf-name>.<page-number>.emf\n" |