diff --git a/README.md b/README.md index 447ad84..36c01fe 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Introduction ------------ A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...). -It currently only allows to update PDF file with new objects. +It's also possible to write a new PDF or update one. Compilation diff --git a/include/uPDFParser.h b/include/uPDFParser.h index 6b88b2f..75716be 100644 --- a/include/uPDFParser.h +++ b/include/uPDFParser.h @@ -35,14 +35,16 @@ namespace uPDFParser { + class XRefValue; + /** * @brief PDF Parser */ class Parser { public: - Parser(): - fd(0) + Parser(int version_major=1, int version_minor=6): + version_major(version_major), version_minor(version_minor), fd(0) {} ~Parser() @@ -77,9 +79,26 @@ namespace uPDFParser * @brief Add an object */ void addObject(Object* object) { _objects.push_back(object); } + + /** + * @brief Return trailer object + */ + Object& getTrailer() {return trailer; } + + /** + * @brief Return xref table. This table is read and updated only once after parse + * Further add/delete will make it incoherent + */ + const std::vector& xrefTable() {return _xrefTable;} + + /** + * @brief Return a specific object + */ + Object* getObject(int objectId, int generationNumber=0); private: void parseObject(std::string& token); + void parseHeader(); void parseStartXref(); bool parseXref(); bool parseTrailer(); @@ -98,12 +117,38 @@ namespace uPDFParser Name* parseName(std::string& token); void writeUpdate(const std::string& filename); - + + int version_major, version_minor; std::vector _objects; Object trailer; off_t xrefOffset; int fd; off_t curOffset; + std::vector _xrefTable; + }; + + class XRefValue + { + public: + XRefValue(int objectId, int offset, int generationNumber, bool used, Object* object=0): + _objectId(objectId), _offset(offset), _generationNumber(generationNumber), _used(used), + _object(object) + {} + + int objectId() {return _objectId;} + int offset() {return _offset;} + int generationNumber() {return _generationNumber;} + bool used() {return _used;} + + void setObject(Object* object) { _object = object; } + Object* object() { return _object; } + + private: + int _objectId; + int _offset; + int _generationNumber; + bool _used; + Object* _object; }; } diff --git a/include/uPDFTypes.h b/include/uPDFTypes.h index 8e94ee3..7e4b545 100644 --- a/include/uPDFTypes.h +++ b/include/uPDFTypes.h @@ -259,6 +259,29 @@ namespace uPDFParser std::map& value() {return _value;} virtual std::string str(); + bool empty() { return _value.empty(); } + + bool hasKey(const std::string& key) { return _value.count(key)?true:false;} + + void deleteKey(const std::string& key) { + if (!hasKey(key)) return; + delete _value[key]; + _value.erase(key); + } + + void replace(const std::string& key, DataType* data, bool freeData=true) + { + if (hasKey(key)) + { + if (freeData) + deleteKey(key); + else + _value.erase(key); + + addData(key, data); + } + } + private: std::map _value; }; diff --git a/src/uPDFParser.cpp b/src/uPDFParser.cpp index 8606ca1..a053127 100644 --- a/src/uPDFParser.cpp +++ b/src/uPDFParser.cpp @@ -799,10 +799,8 @@ namespace uPDFParser std::string xrefStr = xref.str(); ::write(newFd, xrefStr.c_str(), xrefStr.size()); - if (trailer.hasKey("Prev")) - delete trailer["Prev"]; - - trailer["Prev"] = new Integer((int)xrefOffset); + trailer.deleteKey("Prev"); + trailer.dictionary().addData("Prev", new Integer((int)xrefOffset)); std::string trailerStr = trailer.dictionary().str(); ::write(newFd, "trailer\n", 8); @@ -821,8 +819,65 @@ namespace uPDFParser { if (update) return writeUpdate(filename); - else - EXCEPTION(NOT_IMPLEMENTED, "Full write not implemented"); - } + int newFd = open(filename.c_str(), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); + + if (newFd <= 0) + EXCEPTION(UNABLE_TO_OPEN_FILE, "Unable to open " << filename << " (%m)"); + + char header[18]; + int ret = snprintf(header, sizeof(header), "%%PDF-%d.%d\r%%%c%c%c%c\r\n", + version_major, version_minor, + 0xe2, 0xe3, 0xcf, 0xd3); + + ::write(newFd, header, ret); + + int nbObjects = 1; + std::stringstream xref; + + xref << std::setfill('0'); + xref << "xref\n"; + xref << "0 1 f\r\n"; + xref << "0000000000 65535 f\r\n"; + + std::vector::iterator it; + for(it=_objects.begin(); it!=_objects.end(); it++) + { + std::string objStr = (*it)->str(); + curOffset = lseek(newFd, 0, SEEK_CUR); + ::write(newFd, objStr.c_str(), objStr.size()); + xref << std::setw(0) << (*it)->objectId() << " 1\n"; + xref << std::setw(10) << curOffset << " " << std::setw(5) << (*it)->generationNumber(); + if ((*it)->used()) + xref << " n"; + else + xref << " f" ; + xref << "\r\n" ; // Here \r seems important + nbObjects++; + } + + + off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR); + + std::string xrefStr = xref.str(); + ::write(newFd, xrefStr.c_str(), xrefStr.size()); + + trailer.deleteKey("Prev"); + trailer.deleteKey("Size"); + trailer.dictionary().addData("Size", new Integer((int)nbObjects)); + + trailer.deleteKey("XRefStm"); + + std::string trailerStr = trailer.dictionary().str(); + ::write(newFd, "trailer\n", 8); + ::write(newFd, trailerStr.c_str(), trailerStr.size()); + + std::stringstream startxref; + startxref << "startxref\n" << newXrefOffset << "\n%%EOF"; + + std::string startxrefStr = startxref.str(); + ::write(newFd, startxrefStr.c_str(), startxrefStr.size()); + + close(newFd); + } } diff --git a/tests/test.cpp b/tests/test.cpp index 0b008d4..239c23b 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -16,6 +16,8 @@ int main(int argc, char** argv) try { parser.parse(argv[1]); + std::cout << "Write a.pdf" << std::endl; + parser.write("a.pdf"); } catch(uPDFParser::Exception e) {