From 799bacdc8667b4b0af0779bc0e06419842c525d3 Mon Sep 17 00:00:00 2001 From: David Morton Date: Fri, 26 Oct 2018 20:41:47 +0000 Subject: [PATCH 01/13] Adding the form field name onto the text input for reference --- src/HTMLRenderer/form.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index 6b5162264..b75c03dd0 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -41,9 +41,13 @@ void HTMLRenderer::process_form(ofstream & out) if(w->getType() == formText) { - double font_size = height / 2; + double font_size = height / 2; + FormField *f = w->getField(); + Object *o = f->getObj(); + char *name = o->getDict()->lookup((char *)"T", o)->getString()->getCString(); - out << " Date: Fri, 26 Oct 2018 21:43:04 +0000 Subject: [PATCH 02/13] Fixing CSS issues with form fields during print --- share/base.css.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/share/base.css.in b/share/base.css.in index 515134dc1..88a19c959 100644 --- a/share/base.css.in +++ b/share/base.css.in @@ -70,6 +70,9 @@ background-color:transparent; } .@CSS_CSS_DRAW_CN@ { display:none; } + .@CSS_INPUT_TEXT_CN@ { + zoom: 1.775; + } } /* Part 2: Page Elements: Modify with caution * The followings are base classes, some of which are meant to be override by PDF specific classes From 3aff83ef6e4b20626881c28361d27f6b2d79add7 Mon Sep 17 00:00:00 2001 From: David Morton Date: Tue, 30 Oct 2018 14:43:59 +0000 Subject: [PATCH 03/13] Switching from an input to a div so to properly fill in the form fields. --- src/HTMLRenderer/form.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index b75c03dd0..9b6020bee 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -46,15 +46,14 @@ void HTMLRenderer::process_form(ofstream & out) Object *o = f->getObj(); char *name = o->getDict()->lookup((char *)"T", o)->getString()->getCString(); - out << "" << endl; + << font_size << "px;\" >" << endl; } else if(w->getType() == formButton) { From 5ce58332c35ca1cd116d8ed2b8db2a56246e0086 Mon Sep 17 00:00:00 2001 From: David Morton Date: Wed, 31 Oct 2018 15:30:11 +0000 Subject: [PATCH 04/13] A couple small changes to support better alignment and font --- share/base.css.in | 2 +- src/HTMLRenderer/form.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/share/base.css.in b/share/base.css.in index 88a19c959..070abc82e 100644 --- a/share/base.css.in +++ b/share/base.css.in @@ -71,7 +71,7 @@ } .@CSS_CSS_DRAW_CN@ { display:none; } .@CSS_INPUT_TEXT_CN@ { - zoom: 1.775; + zoom: 1.778; } } /* Part 2: Page Elements: Modify with caution diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index 9b6020bee..6c7bd0545 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -49,7 +49,7 @@ void HTMLRenderer::process_form(ofstream & out) out << "
Date: Sat, 10 Nov 2018 12:25:58 +0000 Subject: [PATCH 05/13] Always type check lookup values. --- src/HTMLRenderer/form.cc | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index 6c7bd0545..77fb2cdf7 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -41,19 +41,27 @@ void HTMLRenderer::process_form(ofstream & out) if(w->getType() == formText) { + double font_size = height / 2; FormField *f = w->getField(); Object *o = f->getObj(); - char *name = o->getDict()->lookup((char *)"T", o)->getString()->getCString(); - out << "
" << endl; + // the following line throws if the result of lookup doesnt return a string. + Object *lo = o->getDict()->lookup((char *)"T", o); + + if (lo->getType() == objString) { + char *name = lo->getString()->getCString(); + + out << "
" << endl; + } + } else if(w->getType() == formButton) { From 449e55fcfb2bf530c50bd7001fa7cf272ee7b5cc Mon Sep 17 00:00:00 2001 From: David Morton Date: Tue, 13 Nov 2018 04:40:07 +0000 Subject: [PATCH 06/13] Auto-detection of landscape --- src/HTMLRenderer/general.cc | 58 ++++++++++++++++++++----------------- src/StateManager.h | 30 +++++++++++++++++++ 2 files changed, 62 insertions(+), 26 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 6a54194e5..c9fdd422a 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -190,6 +190,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) void HTMLRenderer::endPage() { long long wid = all_manager.width.install(html_text_page.get_width()); long long hid = all_manager.height.install(html_text_page.get_height()); + all_manager.landscape_manager.install(html_text_page.get_width(), html_text_page.get_height()); (*f_curpage) << "
> value_map; }; +class LandscapeManager +{ +public: + void install(double width, double height) { + value_map.insert(std::make_pair(width, height)); + } + + void dump_css(std::ostream & out) { + for (auto & p : value_map) + { + if (p.first > p.second) { + out << "@page{size:landscape;}"; + out << std::endl; + } + } + } + + void dump_print_css(std::ostream & out, double scale) { + for(auto & p : value_map) { + if (p.first > p.second) { + out << "@page{size:landscape;}"; + out << std::endl; + } + } + } +private: + std::unordered_map value_map; +}; + struct AllStateManager { TransformMatrixManager transform_matrix; @@ -423,6 +452,7 @@ struct AllStateManager WidthManager width; LeftManager left; BGImageSizeManager bgimage_size; + LandscapeManager landscape_manager; }; } // namespace pdf2htmlEX From 52e01083499d284a17d3db5f810434127bd95eff Mon Sep 17 00:00:00 2001 From: David Morton Date: Wed, 21 Nov 2018 12:56:01 -0600 Subject: [PATCH 07/13] New changes to allow options to be mapped to check boxes. --- src/HTMLRenderer/form.cc | 66 +++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index 77fb2cdf7..bc6442612 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -38,11 +38,12 @@ void HTMLRenderer::process_form(ofstream & out) double width = x2 - x1; double height = y2 - y1; + double font_size = height / 2; + if(w->getType() == formText) { - double font_size = height / 2; FormField *f = w->getField(); Object *o = f->getObj(); @@ -61,21 +62,58 @@ void HTMLRenderer::process_form(ofstream & out) << "px; line-height: " << std::to_string(height) << "px; font-size: " << font_size << "px;\" >
" << endl; } - } - else if(w->getType() == formButton) + else if (w->getType() == formButton) { - //Ideally would check w->getButtonType() - //for more specific rendering - width += 3; - height += 3; - - out << "
" << endl; + FormWidgetButton *b = dynamic_cast(w); + + if (b->getButtonType() == formButtonCheck) { + // grab the export value of the form button type.... + FormField *f = w->getField(); + Object *o = f->getObj(); + + if (o->getType() == objDict) { + cerr << "Is Dictionary" << endl; + cerr << o->getType() << endl; + + //Object *widgeto = b->getObj(); + + Dict *od = o->getDict(); + + + // the following line throws if the result of lookup doesnt return a string. + Object *formfielddescription = od->lookup((char *)"Parent", o); + + if (formfielddescription->getType() == objDict) { + + Dict *formfielddictionary = formfielddescription->getDict(); + + Object *lo = formfielddictionary->lookup((char *)"T", formfielddescription); + + if (lo->getType() == objString) { + char *fieldname = lo->getString()->getCString(); + + cerr << fieldname << endl; + + char *name = b->getOnStr(); + + cerr << name << endl; + + out << "
" << endl; + } else { + cerr << lo->getType() << endl; + } + } + } + } } else { From 5ee8bd83c7d4afb4e2fe288d4d86a2986bb4ae57 Mon Sep 17 00:00:00 2001 From: David Morton Date: Wed, 16 Jan 2019 15:49:51 -0600 Subject: [PATCH 08/13] Another commit to try to rope in those pesky checkboxes --- src/HTMLRenderer/form.cc | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index bc6442612..e39b6fad4 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -15,10 +15,10 @@ #include "util/namespace.h" #include "util/misc.h" +using namespace std; + namespace pdf2htmlEX { -using std::ofstream; -using std::cerr; void HTMLRenderer::process_form(ofstream & out) { @@ -65,6 +65,7 @@ void HTMLRenderer::process_form(ofstream & out) } else if (w->getType() == formButton) { + cerr << "Found a button" << endl; FormWidgetButton *b = dynamic_cast(w); if (b->getButtonType() == formButtonCheck) { @@ -84,6 +85,8 @@ void HTMLRenderer::process_form(ofstream & out) // the following line throws if the result of lookup doesnt return a string. Object *formfielddescription = od->lookup((char *)"Parent", o); + cerr << "didnt throw" << endl; + if (formfielddescription->getType() == objDict) { Dict *formfielddictionary = formfielddescription->getDict(); @@ -93,6 +96,7 @@ void HTMLRenderer::process_form(ofstream & out) if (lo->getType() == objString) { char *fieldname = lo->getString()->getCString(); + cerr << "first branch" << endl; cerr << fieldname << endl; char *name = b->getOnStr(); @@ -103,16 +107,31 @@ void HTMLRenderer::process_form(ofstream & out) << "\" form-field=\"" << fieldname << "\" export-value=\"" << name << "\" class=\"" << CSS::INPUT_TEXT_CN - << "\" style=\"position: absolute; font-family:arial; left: " << x1 + << "\" style=\"position: absolute; font -family:arial; left: " << x1 << "px; bottom: " << y1 << "px;" << " width: " << width << "px; height: " << std::to_string(height) << "px; line-height: " << std::to_string(height) << "px; font-size: " << font_size << "px;\" >
" << endl; - } else { - cerr << lo->getType() << endl; } + } else if (od->lookup((char *)"T", o)->getType() == objString) { + char *fieldname = od->lookup((char *)"T", o)->getString()->getCString(); + char *name = b->getOnStr(); + + cerr << "second branch" << endl; + cerr << fieldname << endl; + cerr << name << endl; + + out << "
" << endl; } - } + } } } else @@ -120,6 +139,7 @@ void HTMLRenderer::process_form(ofstream & out) cerr << "Unsupported form field detected" << endl; } } + cerr << flush; } } From ce83fd83a5574cc426ee704edd2d019161ff57dd Mon Sep 17 00:00:00 2001 From: David Morton Date: Tue, 26 Feb 2019 13:35:10 +0000 Subject: [PATCH 09/13] Allowing more than one form field to have the same value --- src/HTMLRenderer/form.cc | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index e39b6fad4..e0b0a5520 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -25,6 +25,8 @@ void HTMLRenderer::process_form(ofstream & out) FormPageWidgets * widgets = cur_catalog->getPage(pageNum)->getFormWidgets(); int num = widgets->getNumWidgets(); + cerr << "number of widgets: " << num << endl; + for(int i = 0; i < num; i++) { FormWidget * w = widgets->getWidget(i); @@ -40,19 +42,33 @@ void HTMLRenderer::process_form(ofstream & out) double height = y2 - y1; double font_size = height / 2; + tm_transform(default_ctm, x1, y1); if(w->getType() == formText) { - + cerr << "Found a text box" << endl; FormField *f = w->getField(); Object *o = f->getObj(); // the following line throws if the result of lookup doesnt return a string. Object *lo = o->getDict()->lookup((char *)"T", o); + if (lo->getType() == objNull) { + // check the parent + Object *wObj = w->getObj(); + cerr << "lo is obj null, so looking for parent" << endl; + Dict *wDict = wObj->getDict(); + cerr << "Fetched parent dictionary" << endl; + Object *parent = wDict->lookup((char *)"Parent", wObj); + if (parent->getType() == objDict) { + cerr << "Fetching from parent dictionary" << endl; + lo = parent->getDict()->lookup((char *)"T", parent); + } + } + if (lo->getType() == objString) { char *name = lo->getString()->getCString(); - + cerr << "Writing text box for " << name << endl; out << "
" << endl; + } else if (lo->getType() == objNull) { + //Object *parentText = o->getDict()->lookup((char *)"Parent", o); + + cerr << "Found something else..." << o->getType() << endl; } } else if (w->getType() == formButton) From 3d25965b0bdcf226a4507472f9f5ad01feb52dc8 Mon Sep 17 00:00:00 2001 From: David Morton Date: Tue, 26 Feb 2019 15:32:22 +0000 Subject: [PATCH 10/13] Adding multiline class on multiline items --- src/HTMLRenderer/form.cc | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index e0b0a5520..adb577989 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -51,7 +51,28 @@ void HTMLRenderer::process_form(ofstream & out) Object *o = f->getObj(); // the following line throws if the result of lookup doesnt return a string. - Object *lo = o->getDict()->lookup((char *)"T", o); + Object *lo = new Object(); + o->getDict()->lookup((char *)"T", lo); + Object *ff = new Object(); + o->getDict()->lookup((char *)"Ff", ff); + //Object *po = o->getDict()->lookup((char *)"T", o); + cerr << "Success" << endl; + //o = f->getObj(); + //Object *ffEntry = o->getDict()->lookup((char *)"T", o); + int ffvalue = 0; + int multiline_flag = 4096; + if (ff->getType() == objInt) { + // cerr << "FFlo stuff" << endl; + ffvalue = ff->getInt(); + cerr << "Found fflo" << ffvalue << endl; + } + std::string classes = ""; + classes = classes + CSS::INPUT_TEXT_CN; + + if (ffvalue & multiline_flag) { + cerr << "This is a multiline field" << endl; + classes = classes + " multiline"; + } if (lo->getType() == objNull) { // check the parent @@ -66,12 +87,14 @@ void HTMLRenderer::process_form(ofstream & out) } } - if (lo->getType() == objString) { + if (lo->getType() == objString) { + + //cerr << "Flags: " << fflo->getType() << endl; char *name = lo->getString()->getCString(); cerr << "Writing text box for " << name << endl; out << "
Date: Fri, 16 Aug 2019 13:04:28 -0500 Subject: [PATCH 11/13] Added blurb to readme --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 64e37d177..99e5d0dae 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +# David's Branch of pdf2htmlEX +This isn't really a maintained branch anymore. I've created this branch to fix a few issues with PDF forms, and how they might translate to HTML blobs. I'm not a pro at C++, so if you, in the great wild internet, are reading these words I'm shouting into the void right now, and you feel like criticizing my commits, feel free. I mostly just get into this code and then get out as quickly as I can before someone claims I know C++. + +# Original Readme content + pdf2htmlEX is no longer under active development. New maintainers are [wanted](http://pdf2htmlex.blogspot.ch/2016/12/looking-for-new-maintainer.html). #![](http://coolwanglu.github.io/pdf2htmlEX/images/pdf2htmlEX-64x64.png) pdf2htmlEX From 2ee45f1c23bb08bf9e31422d3fe1287559e863d3 Mon Sep 17 00:00:00 2001 From: David Morton Date: Fri, 16 Aug 2019 13:05:06 -0500 Subject: [PATCH 12/13] Readme formatting --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 99e5d0dae..f4c24a0db 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,15 @@ -# David's Branch of pdf2htmlEX +# David Morton's Personal Fork of pdf2htmlEX!!! This isn't really a maintained branch anymore. I've created this branch to fix a few issues with PDF forms, and how they might translate to HTML blobs. I'm not a pro at C++, so if you, in the great wild internet, are reading these words I'm shouting into the void right now, and you feel like criticizing my commits, feel free. I mostly just get into this code and then get out as quickly as I can before someone claims I know C++. + + + + + + + + + # Original Readme content pdf2htmlEX is no longer under active development. New maintainers are [wanted](http://pdf2htmlex.blogspot.ch/2016/12/looking-for-new-maintainer.html). From 7bf60118ad0f7e535ef5066274cda2a92e44d0ff Mon Sep 17 00:00:00 2001 From: Hunter Windham Date: Fri, 2 Jun 2023 15:22:44 -0500 Subject: [PATCH 13/13] Changes to fix fontsize and alignmnet issues --- src/HTMLRenderer/form.cc | 65 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/src/HTMLRenderer/form.cc b/src/HTMLRenderer/form.cc index adb577989..454947e46 100644 --- a/src/HTMLRenderer/form.cc +++ b/src/HTMLRenderer/form.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include "HTMLRenderer.h" #include "util/namespace.h" @@ -41,7 +42,8 @@ void HTMLRenderer::process_form(ofstream & out) double width = x2 - x1; double height = y2 - y1; double font_size = height / 2; - + // Define font_unit with a default of px this will change to pt if there is a defined font in the pdf template + std::string font_unit = "px"; tm_transform(default_ctm, x1, y1); if(w->getType() == formText) @@ -55,6 +57,18 @@ void HTMLRenderer::process_form(ofstream & out) o->getDict()->lookup((char *)"T", lo); Object *ff = new Object(); o->getDict()->lookup((char *)"Ff", ff); + + // DA is an null || string that represents a few things such font family, font size ect. + // Ex. looks like '/Helv 6 Tf 0 g' where Helv is the font family + // and 6 is the font size. + Object *da = new Object(); + o->getDict()->lookup((char *)"DA", da); + + // Q is an null || int that represents the text alignmnet if it is null or 0 it means + // left aligned which is default. 1 stands for center aligned and 2 stands for right aligned. + Object *q = new Object(); + o->getDict()->lookup((char *)"Q", q); + //Object *po = o->getDict()->lookup((char *)"T", o); cerr << "Success" << endl; //o = f->getObj(); @@ -83,9 +97,55 @@ void HTMLRenderer::process_form(ofstream & out) Object *parent = wDict->lookup((char *)"Parent", wObj); if (parent->getType() == objDict) { cerr << "Fetching from parent dictionary" << endl; + + // If lo was null this means that there is a parent with the + // missing info. The same is likely the case both da and q which + // live at the same level so we repeat the lookups if they are null. + if (da->getType() == objNull) { + parent->getDict()->lookup((char *)"DA", da); + } + + if (q->getType() == objNull) { + parent->getDict()->lookup((char *)"Q", q); + } + lo = parent->getDict()->lookup((char *)"T", parent); } } + + // If we have a value for da we want to use it to set the font size + // however, the font size is stored with other data so we must strip + // it down to just font size. Ex. looks like '/Helv 6 Tf 0 g' + // where Helv is the font family and 6 is the font size. + if (da->getType() == objString) { + // da is just a pointer so we need to output the value into a temporary variable. + std::string davalue = da->getString()->getCString(); + // Removes everything from string that is not a digit or space + // this will convert it from looking like '/Helv 6 Tf 0 g' + // to looking something like '6 0' + davalue.erase( + std::remove_if(davalue.begin(), davalue.end(), [](char ch) { return !std::isdigit(ch) && !std::isspace(ch);}), + davalue.end() + ); + + // Call stoi() passing the davalue this will grab the first number and stop at space character + // this will convert it from something like '6 0' to '6'. + font_size = std::stoi(davalue); + // If da has a value then we will have a font size that needs to be pt not px + font_unit = "pt"; + } + + // Default value for text_align is left + std::string text_align = "left"; + // If we have a q value this means that we have an alignmnet and must set it + // based off of 1 for center and 2 for right alignment. + if (q->getType() == objInt) { + if(q->getInt() == 1) { + text_align = "center"; + } else if(q->getInt() == 2) { + text_align = "right"; + } + } if (lo->getType() == objString) { @@ -97,9 +157,10 @@ void HTMLRenderer::process_form(ofstream & out) << "\" class=\"" << classes << "\" style=\"position: absolute; font-family:arial; left: " << x1 << "px; bottom: " << y1 << "px;" + << " text-align: " << text_align << ";" << " width: " << width << "px; height: " << std::to_string(height) << "px; line-height: " << std::to_string(height) << "px; font-size: " - << font_size << "px;\" >
" << endl; + << font_size << font_unit << ";\" >" << endl; } else if (lo->getType() == objNull) { //Object *parentText = o->getDict()->lookup((char *)"Parent", o);