diff --git a/include/uPDFParser.h b/include/uPDFParser.h index 633c395..cd10ac2 100644 --- a/include/uPDFParser.h +++ b/include/uPDFParser.h @@ -104,6 +104,7 @@ namespace uPDFParser bool parseXref(); bool parseTrailer(); + char prevChar(); std::string nextToken(bool exceptionOnEOF=true, bool readComment=false); DataType* parseType(std::string& token, Object* object, std::map& dict); @@ -121,6 +122,7 @@ namespace uPDFParser void writeBuffer(int fd, const char* buffer, int size); void writeUpdate(const std::string& filename); + char c; int version_major, version_minor; std::vector _objects; Object trailer, *xrefObject; diff --git a/src/uPDFParser.cpp b/src/uPDFParser.cpp index 524ab71..eafddf4 100644 --- a/src/uPDFParser.cpp +++ b/src/uPDFParser.cpp @@ -157,19 +157,22 @@ namespace uPDFParser } } + char Parser::prevChar() { return c; } + /** * @brief Find next token to analyze */ std::string Parser::nextToken(bool exceptionOnEOF, bool readComment) { - char c = 0, prev_c; + char prev_c; std::string res(""); int i; static const char delims[] = " \t<>[]()/"; static const char whitespace_prev_delims[] = "+-"; // Need whitespace before static const char start_delims[] = "<>[]()"; bool found = false; - + + c = 0; while (!found) { prev_c = c; @@ -574,9 +577,19 @@ namespace uPDFParser { off_t startOffset, endOffset, endStream; std::string token; + char c = 0; // std::cout << "parseStream" << std::endl; + // Remove \n after \r if there is one + if (prevChar() == '\r' && read(fd, &c, 1) == 1) + { + if (c != '\n') + { + lseek(fd, -1, SEEK_CUR); + } + } + startOffset = lseek(fd, 0, SEEK_CUR); if (!object->hasKey("Length")) @@ -622,9 +635,16 @@ namespace uPDFParser ret = read(fd, &c, 1); if (ret <= 0) break; - if (c != '\n' && c != '\r') - break; - lseek(fd, -1, SEEK_CUR); + if (c == '\r') + { + lseek(fd, -1, SEEK_CUR); + continue; + } + else if (c == '\n') + { + lseek(fd, -1, SEEK_CUR); + } + break; } // Adjust final position lseek(fd, endStream, SEEK_SET); diff --git a/src/uPDFTypes.cpp b/src/uPDFTypes.cpp index 8f9b5e3..a025f58 100644 --- a/src/uPDFTypes.cpp +++ b/src/uPDFTypes.cpp @@ -105,19 +105,10 @@ namespace uPDFParser std::string Stream::str() { - std::string res = "stream"; + std::string res = "stream\n"; const char* streamData = (const char*)data(); // Force reading if not in memory - if (_dataLength && - streamData[0] != '\n' && - streamData[0] != '\r') - res += "\n"; res += std::string(streamData, _dataLength); - // Be sure there is a final line return - if (_dataLength && - streamData[_dataLength-1] != '\n' && - streamData[_dataLength-1] != '\r') - res += "\n"; - res += "endstream\n"; + res += "\nendstream\n"; return res; }