Add support for full document write operation
This commit is contained in:
parent
cadd567d52
commit
47143308ca
|
@ -2,7 +2,7 @@ Introduction
|
||||||
------------
|
------------
|
||||||
|
|
||||||
A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...).
|
A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...).
|
||||||
It currently only allows to update PDF file with new objects.
|
It's also possible to write a new PDF or update one.
|
||||||
|
|
||||||
|
|
||||||
Compilation
|
Compilation
|
||||||
|
|
|
@ -35,14 +35,16 @@
|
||||||
|
|
||||||
namespace uPDFParser
|
namespace uPDFParser
|
||||||
{
|
{
|
||||||
|
class XRefValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief PDF Parser
|
* @brief PDF Parser
|
||||||
*/
|
*/
|
||||||
class Parser
|
class Parser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Parser():
|
Parser(int version_major=1, int version_minor=6):
|
||||||
fd(0)
|
version_major(version_major), version_minor(version_minor), fd(0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~Parser()
|
~Parser()
|
||||||
|
@ -78,8 +80,25 @@ namespace uPDFParser
|
||||||
*/
|
*/
|
||||||
void addObject(Object* object) { _objects.push_back(object); }
|
void addObject(Object* object) { _objects.push_back(object); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return trailer object
|
||||||
|
*/
|
||||||
|
Object& getTrailer() {return trailer; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return xref table. This table is read and updated only once after parse
|
||||||
|
* Further add/delete will make it incoherent
|
||||||
|
*/
|
||||||
|
const std::vector<XRefValue>& xrefTable() {return _xrefTable;}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return a specific object
|
||||||
|
*/
|
||||||
|
Object* getObject(int objectId, int generationNumber=0);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void parseObject(std::string& token);
|
void parseObject(std::string& token);
|
||||||
|
void parseHeader();
|
||||||
void parseStartXref();
|
void parseStartXref();
|
||||||
bool parseXref();
|
bool parseXref();
|
||||||
bool parseTrailer();
|
bool parseTrailer();
|
||||||
|
@ -99,11 +118,37 @@ namespace uPDFParser
|
||||||
|
|
||||||
void writeUpdate(const std::string& filename);
|
void writeUpdate(const std::string& filename);
|
||||||
|
|
||||||
|
int version_major, version_minor;
|
||||||
std::vector<Object*> _objects;
|
std::vector<Object*> _objects;
|
||||||
Object trailer;
|
Object trailer;
|
||||||
off_t xrefOffset;
|
off_t xrefOffset;
|
||||||
int fd;
|
int fd;
|
||||||
off_t curOffset;
|
off_t curOffset;
|
||||||
|
std::vector<XRefValue> _xrefTable;
|
||||||
|
};
|
||||||
|
|
||||||
|
class XRefValue
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
XRefValue(int objectId, int offset, int generationNumber, bool used, Object* object=0):
|
||||||
|
_objectId(objectId), _offset(offset), _generationNumber(generationNumber), _used(used),
|
||||||
|
_object(object)
|
||||||
|
{}
|
||||||
|
|
||||||
|
int objectId() {return _objectId;}
|
||||||
|
int offset() {return _offset;}
|
||||||
|
int generationNumber() {return _generationNumber;}
|
||||||
|
bool used() {return _used;}
|
||||||
|
|
||||||
|
void setObject(Object* object) { _object = object; }
|
||||||
|
Object* object() { return _object; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int _objectId;
|
||||||
|
int _offset;
|
||||||
|
int _generationNumber;
|
||||||
|
bool _used;
|
||||||
|
Object* _object;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -259,6 +259,29 @@ namespace uPDFParser
|
||||||
std::map<std::string, DataType*>& value() {return _value;}
|
std::map<std::string, DataType*>& value() {return _value;}
|
||||||
virtual std::string str();
|
virtual std::string str();
|
||||||
|
|
||||||
|
bool empty() { return _value.empty(); }
|
||||||
|
|
||||||
|
bool hasKey(const std::string& key) { return _value.count(key)?true:false;}
|
||||||
|
|
||||||
|
void deleteKey(const std::string& key) {
|
||||||
|
if (!hasKey(key)) return;
|
||||||
|
delete _value[key];
|
||||||
|
_value.erase(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
void replace(const std::string& key, DataType* data, bool freeData=true)
|
||||||
|
{
|
||||||
|
if (hasKey(key))
|
||||||
|
{
|
||||||
|
if (freeData)
|
||||||
|
deleteKey(key);
|
||||||
|
else
|
||||||
|
_value.erase(key);
|
||||||
|
|
||||||
|
addData(key, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<std::string, DataType*> _value;
|
std::map<std::string, DataType*> _value;
|
||||||
};
|
};
|
||||||
|
|
|
@ -799,10 +799,8 @@ namespace uPDFParser
|
||||||
std::string xrefStr = xref.str();
|
std::string xrefStr = xref.str();
|
||||||
::write(newFd, xrefStr.c_str(), xrefStr.size());
|
::write(newFd, xrefStr.c_str(), xrefStr.size());
|
||||||
|
|
||||||
if (trailer.hasKey("Prev"))
|
trailer.deleteKey("Prev");
|
||||||
delete trailer["Prev"];
|
trailer.dictionary().addData("Prev", new Integer((int)xrefOffset));
|
||||||
|
|
||||||
trailer["Prev"] = new Integer((int)xrefOffset);
|
|
||||||
|
|
||||||
std::string trailerStr = trailer.dictionary().str();
|
std::string trailerStr = trailer.dictionary().str();
|
||||||
::write(newFd, "trailer\n", 8);
|
::write(newFd, "trailer\n", 8);
|
||||||
|
@ -821,8 +819,65 @@ namespace uPDFParser
|
||||||
{
|
{
|
||||||
if (update)
|
if (update)
|
||||||
return writeUpdate(filename);
|
return writeUpdate(filename);
|
||||||
else
|
|
||||||
EXCEPTION(NOT_IMPLEMENTED, "Full write not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
int newFd = open(filename.c_str(), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
|
||||||
|
|
||||||
|
if (newFd <= 0)
|
||||||
|
EXCEPTION(UNABLE_TO_OPEN_FILE, "Unable to open " << filename << " (%m)");
|
||||||
|
|
||||||
|
char header[18];
|
||||||
|
int ret = snprintf(header, sizeof(header), "%%PDF-%d.%d\r%%%c%c%c%c\r\n",
|
||||||
|
version_major, version_minor,
|
||||||
|
0xe2, 0xe3, 0xcf, 0xd3);
|
||||||
|
|
||||||
|
::write(newFd, header, ret);
|
||||||
|
|
||||||
|
int nbObjects = 1;
|
||||||
|
std::stringstream xref;
|
||||||
|
|
||||||
|
xref << std::setfill('0');
|
||||||
|
xref << "xref\n";
|
||||||
|
xref << "0 1 f\r\n";
|
||||||
|
xref << "0000000000 65535 f\r\n";
|
||||||
|
|
||||||
|
std::vector<Object*>::iterator it;
|
||||||
|
for(it=_objects.begin(); it!=_objects.end(); it++)
|
||||||
|
{
|
||||||
|
std::string objStr = (*it)->str();
|
||||||
|
curOffset = lseek(newFd, 0, SEEK_CUR);
|
||||||
|
::write(newFd, objStr.c_str(), objStr.size());
|
||||||
|
xref << std::setw(0) << (*it)->objectId() << " 1\n";
|
||||||
|
xref << std::setw(10) << curOffset << " " << std::setw(5) << (*it)->generationNumber();
|
||||||
|
if ((*it)->used())
|
||||||
|
xref << " n";
|
||||||
|
else
|
||||||
|
xref << " f" ;
|
||||||
|
xref << "\r\n" ; // Here \r seems important
|
||||||
|
nbObjects++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR);
|
||||||
|
|
||||||
|
std::string xrefStr = xref.str();
|
||||||
|
::write(newFd, xrefStr.c_str(), xrefStr.size());
|
||||||
|
|
||||||
|
trailer.deleteKey("Prev");
|
||||||
|
trailer.deleteKey("Size");
|
||||||
|
trailer.dictionary().addData("Size", new Integer((int)nbObjects));
|
||||||
|
|
||||||
|
trailer.deleteKey("XRefStm");
|
||||||
|
|
||||||
|
std::string trailerStr = trailer.dictionary().str();
|
||||||
|
::write(newFd, "trailer\n", 8);
|
||||||
|
::write(newFd, trailerStr.c_str(), trailerStr.size());
|
||||||
|
|
||||||
|
std::stringstream startxref;
|
||||||
|
startxref << "startxref\n" << newXrefOffset << "\n%%EOF";
|
||||||
|
|
||||||
|
std::string startxrefStr = startxref.str();
|
||||||
|
::write(newFd, startxrefStr.c_str(), startxrefStr.size());
|
||||||
|
|
||||||
|
close(newFd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,8 @@ int main(int argc, char** argv)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parser.parse(argv[1]);
|
parser.parse(argv[1]);
|
||||||
|
std::cout << "Write a.pdf" << std::endl;
|
||||||
|
parser.write("a.pdf");
|
||||||
}
|
}
|
||||||
catch(uPDFParser::Exception e)
|
catch(uPDFParser::Exception e)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue
Block a user