Rework stream parsing (manage \n and \r before endstream token)

This commit is contained in:
Grégory Soutadé 2022-02-22 21:14:58 +01:00
parent 47143308ca
commit b540646199

View File

@ -543,7 +543,7 @@ namespace uPDFParser
Stream* Parser::parseStream(Object* object) Stream* Parser::parseStream(Object* object)
{ {
off_t startOffset, endOffset; off_t startOffset, endOffset, endStream;
std::string token; std::string token;
// std::cout << "parseStream" << std::endl; // std::cout << "parseStream" << std::endl;
@ -574,15 +574,28 @@ namespace uPDFParser
while (1) while (1)
{ {
char buffer[4*1024]; char buffer[4*1024];
char* subs; char* subs, c;
int ret; int ret;
ret = readline(fd, buffer, sizeof(buffer)); ret = read(fd, buffer, sizeof(buffer));
subs = (char*)memmem((void*)buffer, ret, (void*)"endstream", 9); subs = (char*)memmem((void*)buffer, ret, (void*)"endstream", 9);
if (subs) if (subs)
{ {
unsigned long pos = (unsigned long)subs - (unsigned long)buffer; unsigned long pos = (unsigned long)subs - (unsigned long)buffer;
lseek(fd, -(ret-pos-9), SEEK_CUR); // Here we're juste before "enstream"
endOffset = lseek(fd, 0, SEEK_CUR)-10; endOffset = lseek(fd, -(ret-pos), SEEK_CUR);
// Final position must be after endstream\n
endStream = endOffset + 10;
// Remove trailing \r and \n before endstream
for (;endOffset > startOffset; endOffset--)
{
lseek(fd, -1, SEEK_CUR);
read(fd, &c, 1);
if (c != '\n' && c != '\r')
break;
lseek(fd, -1, SEEK_CUR);
}
// Adjust final position
lseek(fd, endStream, SEEK_SET);
break; break;
} }
} }