Add support for full document write operation

This commit is contained in:
Grégory Soutadé 2021-12-18 17:29:41 +01:00
parent cadd567d52
commit 47143308ca
5 changed files with 136 additions and 11 deletions

View File

@ -2,7 +2,7 @@ Introduction
------------ ------------
A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...). A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...).
It currently only allows to update PDF file with new objects. It's also possible to write a new PDF or update one.
Compilation Compilation

View File

@ -35,14 +35,16 @@
namespace uPDFParser namespace uPDFParser
{ {
class XRefValue;
/** /**
* @brief PDF Parser * @brief PDF Parser
*/ */
class Parser class Parser
{ {
public: public:
Parser(): Parser(int version_major=1, int version_minor=6):
fd(0) version_major(version_major), version_minor(version_minor), fd(0)
{} {}
~Parser() ~Parser()
@ -78,8 +80,25 @@ namespace uPDFParser
*/ */
void addObject(Object* object) { _objects.push_back(object); } void addObject(Object* object) { _objects.push_back(object); }
/**
* @brief Return trailer object
*/
Object& getTrailer() {return trailer; }
/**
* @brief Return xref table. This table is read and updated only once after parse
* Further add/delete will make it incoherent
*/
const std::vector<XRefValue>& xrefTable() {return _xrefTable;}
/**
* @brief Return a specific object
*/
Object* getObject(int objectId, int generationNumber=0);
private: private:
void parseObject(std::string& token); void parseObject(std::string& token);
void parseHeader();
void parseStartXref(); void parseStartXref();
bool parseXref(); bool parseXref();
bool parseTrailer(); bool parseTrailer();
@ -99,11 +118,37 @@ namespace uPDFParser
void writeUpdate(const std::string& filename); void writeUpdate(const std::string& filename);
int version_major, version_minor;
std::vector<Object*> _objects; std::vector<Object*> _objects;
Object trailer; Object trailer;
off_t xrefOffset; off_t xrefOffset;
int fd; int fd;
off_t curOffset; off_t curOffset;
std::vector<XRefValue> _xrefTable;
};
class XRefValue
{
public:
XRefValue(int objectId, int offset, int generationNumber, bool used, Object* object=0):
_objectId(objectId), _offset(offset), _generationNumber(generationNumber), _used(used),
_object(object)
{}
int objectId() {return _objectId;}
int offset() {return _offset;}
int generationNumber() {return _generationNumber;}
bool used() {return _used;}
void setObject(Object* object) { _object = object; }
Object* object() { return _object; }
private:
int _objectId;
int _offset;
int _generationNumber;
bool _used;
Object* _object;
}; };
} }

View File

@ -259,6 +259,29 @@ namespace uPDFParser
std::map<std::string, DataType*>& value() {return _value;} std::map<std::string, DataType*>& value() {return _value;}
virtual std::string str(); virtual std::string str();
bool empty() { return _value.empty(); }
bool hasKey(const std::string& key) { return _value.count(key)?true:false;}
void deleteKey(const std::string& key) {
if (!hasKey(key)) return;
delete _value[key];
_value.erase(key);
}
void replace(const std::string& key, DataType* data, bool freeData=true)
{
if (hasKey(key))
{
if (freeData)
deleteKey(key);
else
_value.erase(key);
addData(key, data);
}
}
private: private:
std::map<std::string, DataType*> _value; std::map<std::string, DataType*> _value;
}; };

View File

@ -799,10 +799,8 @@ namespace uPDFParser
std::string xrefStr = xref.str(); std::string xrefStr = xref.str();
::write(newFd, xrefStr.c_str(), xrefStr.size()); ::write(newFd, xrefStr.c_str(), xrefStr.size());
if (trailer.hasKey("Prev")) trailer.deleteKey("Prev");
delete trailer["Prev"]; trailer.dictionary().addData("Prev", new Integer((int)xrefOffset));
trailer["Prev"] = new Integer((int)xrefOffset);
std::string trailerStr = trailer.dictionary().str(); std::string trailerStr = trailer.dictionary().str();
::write(newFd, "trailer\n", 8); ::write(newFd, "trailer\n", 8);
@ -821,8 +819,65 @@ namespace uPDFParser
{ {
if (update) if (update)
return writeUpdate(filename); return writeUpdate(filename);
else
EXCEPTION(NOT_IMPLEMENTED, "Full write not implemented");
}
int newFd = open(filename.c_str(), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
if (newFd <= 0)
EXCEPTION(UNABLE_TO_OPEN_FILE, "Unable to open " << filename << " (%m)");
char header[18];
int ret = snprintf(header, sizeof(header), "%%PDF-%d.%d\r%%%c%c%c%c\r\n",
version_major, version_minor,
0xe2, 0xe3, 0xcf, 0xd3);
::write(newFd, header, ret);
int nbObjects = 1;
std::stringstream xref;
xref << std::setfill('0');
xref << "xref\n";
xref << "0 1 f\r\n";
xref << "0000000000 65535 f\r\n";
std::vector<Object*>::iterator it;
for(it=_objects.begin(); it!=_objects.end(); it++)
{
std::string objStr = (*it)->str();
curOffset = lseek(newFd, 0, SEEK_CUR);
::write(newFd, objStr.c_str(), objStr.size());
xref << std::setw(0) << (*it)->objectId() << " 1\n";
xref << std::setw(10) << curOffset << " " << std::setw(5) << (*it)->generationNumber();
if ((*it)->used())
xref << " n";
else
xref << " f" ;
xref << "\r\n" ; // Here \r seems important
nbObjects++;
}
off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR);
std::string xrefStr = xref.str();
::write(newFd, xrefStr.c_str(), xrefStr.size());
trailer.deleteKey("Prev");
trailer.deleteKey("Size");
trailer.dictionary().addData("Size", new Integer((int)nbObjects));
trailer.deleteKey("XRefStm");
std::string trailerStr = trailer.dictionary().str();
::write(newFd, "trailer\n", 8);
::write(newFd, trailerStr.c_str(), trailerStr.size());
std::stringstream startxref;
startxref << "startxref\n" << newXrefOffset << "\n%%EOF";
std::string startxrefStr = startxref.str();
::write(newFd, startxrefStr.c_str(), startxrefStr.size());
close(newFd);
}
} }

View File

@ -16,6 +16,8 @@ int main(int argc, char** argv)
try try
{ {
parser.parse(argv[1]); parser.parse(argv[1]);
std::cout << "Write a.pdf" << std::endl;
parser.write("a.pdf");
} }
catch(uPDFParser::Exception e) catch(uPDFParser::Exception e)
{ {