From b5406461998d1eff98c33c92cd0a10273b288a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Tue, 22 Feb 2022 21:14:58 +0100 Subject: [PATCH] Rework stream parsing (manage \n and \r before endstream token) --- src/uPDFParser.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/uPDFParser.cpp b/src/uPDFParser.cpp index a053127..357c79f 100644 --- a/src/uPDFParser.cpp +++ b/src/uPDFParser.cpp @@ -543,7 +543,7 @@ namespace uPDFParser Stream* Parser::parseStream(Object* object) { - off_t startOffset, endOffset; + off_t startOffset, endOffset, endStream; std::string token; // std::cout << "parseStream" << std::endl; @@ -574,15 +574,28 @@ namespace uPDFParser while (1) { char buffer[4*1024]; - char* subs; + char* subs, c; int ret; - ret = readline(fd, buffer, sizeof(buffer)); + ret = read(fd, buffer, sizeof(buffer)); subs = (char*)memmem((void*)buffer, ret, (void*)"endstream", 9); if (subs) { unsigned long pos = (unsigned long)subs - (unsigned long)buffer; - lseek(fd, -(ret-pos-9), SEEK_CUR); - endOffset = lseek(fd, 0, SEEK_CUR)-10; + // Here we're juste before "enstream" + endOffset = lseek(fd, -(ret-pos), SEEK_CUR); + // Final position must be after endstream\n + endStream = endOffset + 10; + // Remove trailing \r and \n before endstream + for (;endOffset > startOffset; endOffset--) + { + lseek(fd, -1, SEEK_CUR); + read(fd, &c, 1); + if (c != '\n' && c != '\r') + break; + lseek(fd, -1, SEEK_CUR); + } + // Adjust final position + lseek(fd, endStream, SEEK_SET); break; } }