Handle cases where no xref table is present (use startxref offset instead) and where trailer is not conform (use values from XRef object if present)

This commit is contained in:
Grégory Soutadé 2022-03-14 19:57:25 +01:00
parent ea37dcbded
commit 27b9d8ec3e
2 changed files with 69 additions and 32 deletions

View File

@ -44,7 +44,8 @@ namespace uPDFParser
{ {
public: public:
Parser(int version_major=1, int version_minor=6): Parser(int version_major=1, int version_minor=6):
version_major(version_major), version_minor(version_minor), fd(0) version_major(version_major), version_minor(version_minor),
xrefObject(0), xrefOffset((off_t)-1), fd(0), curOffset(0)
{} {}
~Parser() ~Parser()
@ -116,11 +117,12 @@ namespace uPDFParser
Stream* parseStream(Object* object); Stream* parseStream(Object* object);
Name* parseName(std::string& token); Name* parseName(std::string& token);
void repairTrailer();
void writeUpdate(const std::string& filename); void writeUpdate(const std::string& filename);
int version_major, version_minor; int version_major, version_minor;
std::vector<Object*> _objects; std::vector<Object*> _objects;
Object trailer; Object trailer, *xrefObject;
off_t xrefOffset; off_t xrefOffset;
int fd; int fd;
off_t curOffset; off_t curOffset;

View File

@ -68,6 +68,31 @@ namespace uPDFParser
return res.str(); return res.str();
} }
static DataType* tokenToNumber(std::string& token, char sign='\0')
{
int i;
float fvalue;
int ivalue;
for(i=0; i<(int)token.size(); i++)
{
if (token[i] == '.')
{
if (i==0) token = std::string("0") + token;
fvalue = std::stof(token);
if (sign == '-')
fvalue = -fvalue;
return new Real(fvalue, (sign!='\0'));
}
}
ivalue = std::stoi(token);
if (sign == '-')
ivalue = -ivalue;
return new Integer(ivalue, (sign!='\0'));
}
/** /**
* @brief Read data until '\n' or '\r' is found or buffer is full * @brief Read data until '\n' or '\r' is found or buffer is full
*/ */
@ -291,11 +316,11 @@ namespace uPDFParser
void Parser::parseStartXref() void Parser::parseStartXref()
{ {
std::string token; std::string offset, token;
// std::cout << "Parse startxref" << std::endl; // std::cout << "Parse startxref" << std::endl;
token = nextToken(); // XREF offset offset = nextToken(); // XREF offset
token = nextToken(false, true); // %%EOF token = nextToken(false, true); // %%EOF
if (strncmp(token.c_str(), "%%EOF", 5)) if (strncmp(token.c_str(), "%%EOF", 5))
EXCEPTION(INVALID_TRAILER, "Invalid trailer at offset " << curOffset); EXCEPTION(INVALID_TRAILER, "Invalid trailer at offset " << curOffset);
@ -305,6 +330,17 @@ namespace uPDFParser
*/ */
if (token.size() > 5) if (token.size() > 5)
lseek(fd, curOffset+5, SEEK_SET); lseek(fd, curOffset+5, SEEK_SET);
/* Case where no xref table present */
if (xrefOffset == (off_t)-1)
{
DataType* integer = tokenToNumber(offset);
if (integer->type() != DataType::TYPE::INTEGER)
EXCEPTION(INVALID_TRAILER, "Invalid startxref offset");
xrefOffset = ((Integer*)integer)->value();
}
} }
bool Parser::parseTrailer() bool Parser::parseTrailer()
@ -370,31 +406,6 @@ namespace uPDFParser
res = parseTrailer(); res = parseTrailer();
return res; return res;
} }
static DataType* tokenToNumber(std::string& token, char sign='\0')
{
int i;
float fvalue;
int ivalue;
for(i=0; i<(int)token.size(); i++)
{
if (token[i] == '.')
{
if (i==0) token = std::string("0") + token;
fvalue = std::stof(token);
if (sign == '-')
fvalue = -fvalue;
return new Real(fvalue, (sign!='\0'));
}
}
ivalue = std::stoi(token);
if (sign == '-')
ivalue = -ivalue;
return new Integer(ivalue, (sign!='\0'));
}
DataType* Parser::parseSignedNumber(std::string& token) DataType* Parser::parseSignedNumber(std::string& token)
{ {
@ -707,6 +718,10 @@ namespace uPDFParser
datas.push_back(res); datas.push_back(res);
} }
} }
// Keep a reference to last xrefObject
if (object->hasKey("Type") && (*object)["Type"]->str() == "/XRef")
xrefObject = object;
} }
void Parser::parse(const std::string& filename) void Parser::parse(const std::string& filename)
@ -789,6 +804,22 @@ namespace uPDFParser
return 0; return 0;
} }
void Parser::repairTrailer()
{
// Try to fill manadatory values not present in original trailer
// with xrefObject if there is one
if (!xrefObject)
return;
static const char* keys[] = {"Root", "Info", "Encrypt", "ID"};
for (int i=0; i<sizeof(keys)/sizeof(keys[0]); i++)
{
if (!trailer.hasKey(keys[i]) && xrefObject->hasKey(keys[i]))
trailer.dictionary().addData(keys[i], (*xrefObject)[keys[i]]->clone());
}
}
void Parser::writeUpdate(const std::string& filename) void Parser::writeUpdate(const std::string& filename)
{ {
@ -850,8 +881,11 @@ namespace uPDFParser
::write(newFd, xrefStr.c_str(), xrefStr.size()); ::write(newFd, xrefStr.c_str(), xrefStr.size());
trailer.deleteKey("Prev"); trailer.deleteKey("Prev");
trailer.dictionary().addData("Prev", new Integer((int)xrefOffset)); if (xrefOffset != (off_t)-1)
trailer.dictionary().addData("Prev", new Integer((int)xrefOffset));
repairTrailer();
std::string trailerStr = trailer.dictionary().str(); std::string trailerStr = trailer.dictionary().str();
::write(newFd, "trailer\n", 8); ::write(newFd, "trailer\n", 8);
::write(newFd, trailerStr.c_str(), trailerStr.size()); ::write(newFd, trailerStr.c_str(), trailerStr.size());
@ -921,7 +955,6 @@ namespace uPDFParser
} }
} }
off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR); off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR);
std::string xrefStr = xref.str(); std::string xrefStr = xref.str();
@ -934,7 +967,9 @@ namespace uPDFParser
trailer.deleteKey("XRefStm"); trailer.deleteKey("XRefStm");
if (xrefStmOffset != 0) if (xrefStmOffset != 0)
trailer.dictionary().addData("XRefStm", new Integer(xrefStmOffset)); trailer.dictionary().addData("XRefStm", new Integer(xrefStmOffset));
repairTrailer();
std::string trailerStr = trailer.dictionary().str(); std::string trailerStr = trailer.dictionary().str();
::write(newFd, "trailer\n", 8); ::write(newFd, "trailer\n", 8);
::write(newFd, trailerStr.c_str(), trailerStr.size()); ::write(newFd, trailerStr.c_str(), trailerStr.size());