@@ -22,28 +22,15 @@ Example how to extract images from a PDF document.
2222
2323// Example how to extract tables from a PDF document and save them to csv format.
2424// GetText processes each element recursively. If the element is a text, saves it to the output stream.
25- void GetText (PdeElement* element, std::ofstream& ofs, bool eof) {
26- PdfElementType elem_type = element->GetType ();
27- if (elem_type == kPdeText ) {
28- PdeText* text_elem = static_cast <PdeText*>(element);
29- std::wstring text;
30- text.resize (text_elem->GetText (nullptr , 0 ));
31- text_elem->GetText ((wchar_t *)text.c_str (), text.size ());
32- std::string str = ToUtf8 (text);
33- ofs << str;
34- if (eof)
35- ofs << std::endl;
36- }
37- else {
38- int count = element->GetNumChildren ();
39- if (count == 0 )
40- return ;
41- for (int i = 0 ; i < count; i++) {
42- PdeElement* child = element->GetChild (i);
43- if (child)
44- GetText (child, ofs, eof);
45- }
46- }
25+ void GetText (PdeText* element, std::ofstream& ofs, bool eof) {
26+ PdeText* text_elem = static_cast <PdeText*>(element);
27+ std::wstring text;
28+ text.resize (text_elem->GetText (nullptr , 0 ));
29+ text_elem->GetText ((wchar_t *)text.c_str (), text.size ());
30+ std::string str = ToUtf8 (text);
31+ ofs << str;
32+ if (eof)
33+ ofs << std::endl;
4734}
4835
4936// SaveTable processes each element recursively.
@@ -77,7 +64,7 @@ void SaveTable(PdeElement* element, std::wstring save_path, int& table_index) {
7764 for (int i = 0 ; i < count; i++) {
7865 PdeElement* child = cell->GetChild (i);
7966 if (child && (child->GetType () == kPdeText )) {
80- GetText (child, ofs, false );
67+ GetText ((PdeText*) child, ofs, false );
8168 }
8269 if (i < count - 1 ) {
8370 ofs << " " ;
0 commit comments