Initial commit
This commit is contained in:
		
							
								
								
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| *~ | ||||
| libupdfparser.a | ||||
| libupdfparser.so | ||||
|  | ||||
|  | ||||
							
								
								
									
										165
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										165
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,165 @@ | ||||
|                    GNU LESSER GENERAL PUBLIC LICENSE | ||||
|                        Version 3, 29 June 2007 | ||||
|  | ||||
|  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> | ||||
|  Everyone is permitted to copy and distribute verbatim copies | ||||
|  of this license document, but changing it is not allowed. | ||||
|  | ||||
|  | ||||
|   This version of the GNU Lesser General Public License incorporates | ||||
| the terms and conditions of version 3 of the GNU General Public | ||||
| License, supplemented by the additional permissions listed below. | ||||
|  | ||||
|   0. Additional Definitions. | ||||
|  | ||||
|   As used herein, "this License" refers to version 3 of the GNU Lesser | ||||
| General Public License, and the "GNU GPL" refers to version 3 of the GNU | ||||
| General Public License. | ||||
|  | ||||
|   "The Library" refers to a covered work governed by this License, | ||||
| other than an Application or a Combined Work as defined below. | ||||
|  | ||||
|   An "Application" is any work that makes use of an interface provided | ||||
| by the Library, but which is not otherwise based on the Library. | ||||
| Defining a subclass of a class defined by the Library is deemed a mode | ||||
| of using an interface provided by the Library. | ||||
|  | ||||
|   A "Combined Work" is a work produced by combining or linking an | ||||
| Application with the Library.  The particular version of the Library | ||||
| with which the Combined Work was made is also called the "Linked | ||||
| Version". | ||||
|  | ||||
|   The "Minimal Corresponding Source" for a Combined Work means the | ||||
| Corresponding Source for the Combined Work, excluding any source code | ||||
| for portions of the Combined Work that, considered in isolation, are | ||||
| based on the Application, and not on the Linked Version. | ||||
|  | ||||
|   The "Corresponding Application Code" for a Combined Work means the | ||||
| object code and/or source code for the Application, including any data | ||||
| and utility programs needed for reproducing the Combined Work from the | ||||
| Application, but excluding the System Libraries of the Combined Work. | ||||
|  | ||||
|   1. Exception to Section 3 of the GNU GPL. | ||||
|  | ||||
|   You may convey a covered work under sections 3 and 4 of this License | ||||
| without being bound by section 3 of the GNU GPL. | ||||
|  | ||||
|   2. Conveying Modified Versions. | ||||
|  | ||||
|   If you modify a copy of the Library, and, in your modifications, a | ||||
| facility refers to a function or data to be supplied by an Application | ||||
| that uses the facility (other than as an argument passed when the | ||||
| facility is invoked), then you may convey a copy of the modified | ||||
| version: | ||||
|  | ||||
|    a) under this License, provided that you make a good faith effort to | ||||
|    ensure that, in the event an Application does not supply the | ||||
|    function or data, the facility still operates, and performs | ||||
|    whatever part of its purpose remains meaningful, or | ||||
|  | ||||
|    b) under the GNU GPL, with none of the additional permissions of | ||||
|    this License applicable to that copy. | ||||
|  | ||||
|   3. Object Code Incorporating Material from Library Header Files. | ||||
|  | ||||
|   The object code form of an Application may incorporate material from | ||||
| a header file that is part of the Library.  You may convey such object | ||||
| code under terms of your choice, provided that, if the incorporated | ||||
| material is not limited to numerical parameters, data structure | ||||
| layouts and accessors, or small macros, inline functions and templates | ||||
| (ten or fewer lines in length), you do both of the following: | ||||
|  | ||||
|    a) Give prominent notice with each copy of the object code that the | ||||
|    Library is used in it and that the Library and its use are | ||||
|    covered by this License. | ||||
|  | ||||
|    b) Accompany the object code with a copy of the GNU GPL and this license | ||||
|    document. | ||||
|  | ||||
|   4. Combined Works. | ||||
|  | ||||
|   You may convey a Combined Work under terms of your choice that, | ||||
| taken together, effectively do not restrict modification of the | ||||
| portions of the Library contained in the Combined Work and reverse | ||||
| engineering for debugging such modifications, if you also do each of | ||||
| the following: | ||||
|  | ||||
|    a) Give prominent notice with each copy of the Combined Work that | ||||
|    the Library is used in it and that the Library and its use are | ||||
|    covered by this License. | ||||
|  | ||||
|    b) Accompany the Combined Work with a copy of the GNU GPL and this license | ||||
|    document. | ||||
|  | ||||
|    c) For a Combined Work that displays copyright notices during | ||||
|    execution, include the copyright notice for the Library among | ||||
|    these notices, as well as a reference directing the user to the | ||||
|    copies of the GNU GPL and this license document. | ||||
|  | ||||
|    d) Do one of the following: | ||||
|  | ||||
|        0) Convey the Minimal Corresponding Source under the terms of this | ||||
|        License, and the Corresponding Application Code in a form | ||||
|        suitable for, and under terms that permit, the user to | ||||
|        recombine or relink the Application with a modified version of | ||||
|        the Linked Version to produce a modified Combined Work, in the | ||||
|        manner specified by section 6 of the GNU GPL for conveying | ||||
|        Corresponding Source. | ||||
|  | ||||
|        1) Use a suitable shared library mechanism for linking with the | ||||
|        Library.  A suitable mechanism is one that (a) uses at run time | ||||
|        a copy of the Library already present on the user's computer | ||||
|        system, and (b) will operate properly with a modified version | ||||
|        of the Library that is interface-compatible with the Linked | ||||
|        Version. | ||||
|  | ||||
|    e) Provide Installation Information, but only if you would otherwise | ||||
|    be required to provide such information under section 6 of the | ||||
|    GNU GPL, and only to the extent that such information is | ||||
|    necessary to install and execute a modified version of the | ||||
|    Combined Work produced by recombining or relinking the | ||||
|    Application with a modified version of the Linked Version. (If | ||||
|    you use option 4d0, the Installation Information must accompany | ||||
|    the Minimal Corresponding Source and Corresponding Application | ||||
|    Code. If you use option 4d1, you must provide the Installation | ||||
|    Information in the manner specified by section 6 of the GNU GPL | ||||
|    for conveying Corresponding Source.) | ||||
|  | ||||
|   5. Combined Libraries. | ||||
|  | ||||
|   You may place library facilities that are a work based on the | ||||
| Library side by side in a single library together with other library | ||||
| facilities that are not Applications and are not covered by this | ||||
| License, and convey such a combined library under terms of your | ||||
| choice, if you do both of the following: | ||||
|  | ||||
|    a) Accompany the combined library with a copy of the same work based | ||||
|    on the Library, uncombined with any other library facilities, | ||||
|    conveyed under the terms of this License. | ||||
|  | ||||
|    b) Give prominent notice with the combined library that part of it | ||||
|    is a work based on the Library, and explaining where to find the | ||||
|    accompanying uncombined form of the same work. | ||||
|  | ||||
|   6. Revised Versions of the GNU Lesser General Public License. | ||||
|  | ||||
|   The Free Software Foundation may publish revised and/or new versions | ||||
| of the GNU Lesser General Public License from time to time. Such new | ||||
| versions will be similar in spirit to the present version, but may | ||||
| differ in detail to address new problems or concerns. | ||||
|  | ||||
|   Each version is given a distinguishing version number. If the | ||||
| Library as you received it specifies that a certain numbered version | ||||
| of the GNU Lesser General Public License "or any later version" | ||||
| applies to it, you have the option of following the terms and | ||||
| conditions either of that published version or of any later version | ||||
| published by the Free Software Foundation. If the Library as you | ||||
| received it does not specify a version number of the GNU Lesser | ||||
| General Public License, you may choose any version of the GNU Lesser | ||||
| General Public License ever published by the Free Software Foundation. | ||||
|  | ||||
|   If the Library as you received it specifies that a proxy can decide | ||||
| whether future versions of the GNU Lesser General Public License shall | ||||
| apply, that proxy's public statement of acceptance of any version is | ||||
| permanent authorization for you to choose that version for the | ||||
| Library. | ||||
							
								
								
									
										53
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
|  | ||||
| AR ?= $(CROSS)ar | ||||
| CXX ?= $(CROSS)g++ | ||||
|  | ||||
| CXXFLAGS=-Wall -fPIC -I./include | ||||
| LDFLAGS= | ||||
|  | ||||
| BUILD_STATIC ?= 0 | ||||
| BUILD_SHARED ?= 1 | ||||
|  | ||||
| TARGETS = | ||||
| ifneq (BUILD_STATIC, 0) | ||||
|   TARGETS += libupdfparser.a | ||||
| endif | ||||
| ifneq (BUILD_SHARED, 0) | ||||
|   TARGETS += libupdfparser.so | ||||
| endif | ||||
|  | ||||
| ifneq ($(DEBUG),) | ||||
| CXXFLAGS += -ggdb -O0 | ||||
| else | ||||
| CXXFLAGS += -O2 | ||||
| endif | ||||
|  | ||||
| SRCDIR      := src | ||||
| INCDIR      := inc | ||||
| BUILDDIR    := obj | ||||
| TARGETDIR   := bin | ||||
| SRCEXT      := cpp | ||||
| OBJEXT      := o | ||||
|  | ||||
| SOURCES = src/uPDFParser.cpp src/uPDFTypes.cpp | ||||
| OBJECTS     := $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(SOURCES:.$(SRCEXT)=.$(OBJEXT))) | ||||
|  | ||||
| all: obj $(TARGETS) | ||||
|  | ||||
| obj: | ||||
| 	mkdir obj | ||||
|  | ||||
| $(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SRCEXT) | ||||
| 	$(CXX) $(CXXFLAGS) -c $^ -o $@ | ||||
|  | ||||
| libupdfparser.a: $(OBJECTS) | ||||
| 	$(AR) crs $@ obj/*.o | ||||
|  | ||||
| libupdfparser.so: $(OBJECTS) | ||||
| 	$(CXX) obj/*.o $(LDFLAGS) -o $@ -shared | ||||
|  | ||||
| test: test.c libupdfparser.a | ||||
| 	g++ -ggdb -O0 $^ -o $@ -Iinclude libupdfparser.a | ||||
|  | ||||
| clean: | ||||
| 	rm -rf libupdfparser.so libupdfparser.a obj | ||||
							
								
								
									
										33
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| Introduction | ||||
| ------------ | ||||
|  | ||||
| A very simple PDF parser that will load PDF objects without interpretation (zlib, streams, string encoding...). | ||||
| It currently only allows to update PDF file with new objects. | ||||
|  | ||||
|  | ||||
| Compilation | ||||
| ----------- | ||||
|  | ||||
| Use _make_ command | ||||
|  | ||||
|     make [CROSS=XXX] [DEBUG=1] [BUILD_STATIC=(0|1)] [BUILD_SHARED=(0|1)] | ||||
|  | ||||
| CROSS can define a cross compiler prefix (ie arm-linux-gnueabihf-) | ||||
|  | ||||
| DEBUG can be set to compile in DEBUG mode | ||||
|  | ||||
| BUILD_STATIC build libupdfparser.a if 1, nothing if 0 (default value), can be combined with BUILD_SHARED | ||||
|  | ||||
| BUILD_SHARED build libupdfparser.so if 1 (default value), nothing if 0, can be combined with BUILD_STATIC | ||||
|  | ||||
|  | ||||
| Copyright | ||||
| --------- | ||||
|  | ||||
| Grégory Soutadé | ||||
|  | ||||
|  | ||||
| License | ||||
| ------- | ||||
|  | ||||
| LGPL v3 or later | ||||
							
								
								
									
										150
									
								
								include/uPDFObject.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								include/uPDFObject.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | ||||
| /* | ||||
|   Copyright 2021 Grégory Soutadé | ||||
|  | ||||
|   This file is part of uPDFParser. | ||||
|  | ||||
|   uPDFParser is free software: you can redistribute it and/or modify | ||||
|   it under the terms of the GNU Lesser General Public License as published by | ||||
|   the Free Software Foundation, either version 3 of the License, or | ||||
|   (at your option) any later version. | ||||
|  | ||||
|   uPDFParser is distributed in the hope that it will be useful, | ||||
|   but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|   GNU Lesser General Public License for more details. | ||||
|  | ||||
|   You should have received a copy of the GNU Lesser General Public License | ||||
|   along with uPDFParser. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
|  | ||||
| #ifndef _UPDFOBJECT_HPP_ | ||||
| #define _UPDFOBJECT_HPP_ | ||||
|  | ||||
| #include "uPDFTypes.h" | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     /** | ||||
|      * @brief PDF Object | ||||
|      */ | ||||
|     class Object | ||||
|     { | ||||
|     public: | ||||
| 	Object(): | ||||
| 	    _objectId(0), _generationNumber(0), | ||||
| 	    offset(0), _isNew(false), indirectOffset(0) | ||||
| 	{} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Object constructor | ||||
| 	 * | ||||
| 	 * @param objectId          Object ID | ||||
| 	 * @param generationNumber  Object generation number | ||||
| 	 * @param offset            Offset of object in current PDF file | ||||
| 	 * @param isNew             false if object has been read from file, | ||||
| 	 *                          true if it has been created or updated | ||||
| 	 * @param indirectOffset    Object is indirect | ||||
| 	 */ | ||||
| 	Object(int objectId, int generationNumber, uint64_t offset, bool isNew=false, | ||||
| 	       off_t indirectOffset=0): | ||||
| 	    _objectId(objectId), _generationNumber(generationNumber), | ||||
| 	    offset(offset), _isNew(isNew), indirectOffset(indirectOffset) | ||||
| 	{} | ||||
|  | ||||
| 	~Object() | ||||
| 	{ | ||||
| 	    std::vector<DataType*>::iterator it; | ||||
| 	    for(it=_data.begin(); it!=_data.end(); it++) | ||||
| 		delete *it; | ||||
| 	} | ||||
|  | ||||
| 	Object(const Object& other) | ||||
| 	{ | ||||
| 	    _objectId = other._objectId; | ||||
| 	    _generationNumber = other._generationNumber; | ||||
| 	    offset = other.offset; | ||||
| 	    indirectOffset = other.indirectOffset; | ||||
| 	    _isNew = true; | ||||
|  | ||||
| 	    std::vector<DataType*>::const_iterator it; | ||||
| 	    for(it=other._data.begin(); it!=other._data.end(); it++) | ||||
| 		_data.push_back((*it)->clone()); | ||||
|  | ||||
| 	    const std::map<std::string, DataType*> _dict = ((Dictionary)other._dictionary).value(); | ||||
| 	    std::map<std::string, DataType*>& _myDict = _dictionary.value(); | ||||
| 	    std::map<std::string, DataType*>::const_iterator it2; | ||||
| 	    for(it2=_dict.begin(); it2!=_dict.end(); it2++) | ||||
| 		_myDict[it2->first] = it2->second->clone(); | ||||
| 	} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Clone current object (call copy constructor) | ||||
| 	 */ | ||||
| 	Object* clone() { return new Object(*this); } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Return internal dictionary | ||||
| 	 */ | ||||
| 	Dictionary& dictionary() {return _dictionary;} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Return vector of data contained into object | ||||
| 	 */ | ||||
| 	std::vector<DataType*>& data() {return _data;} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Object string representation | ||||
| 	 */ | ||||
| 	std::string str(); | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Set object as indirect if offset != 0 or not indirect if offset == 0 | ||||
| 	 */ | ||||
| 	void setIndirectOffset(off_t offset) {indirectOffset = offset;} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief is object indirect (indirectOffset != 0) | ||||
| 	 */ | ||||
| 	bool isIndirect() {return indirectOffset != 0;} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Get dictionary value | ||||
| 	 */ | ||||
| 	DataType*& operator[](const std::string& key) { return _dictionary.value()[key]; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Check for key in object's dictionary | ||||
| 	 */ | ||||
| 	bool hasKey(const std::string& key) { return _dictionary.value().count(key)?true:false; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief is object new (or not updated) ? | ||||
| 	 */ | ||||
| 	bool isNew() { return _isNew; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Mark object as updated | ||||
| 	 */ | ||||
| 	void update(void) { _isNew = true; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Return object's id | ||||
| 	 */ | ||||
| 	int objectId() { return _objectId; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Return object's generation number | ||||
| 	 */ | ||||
| 	int generationNumber() { return _generationNumber; } | ||||
| 	 | ||||
|     private: | ||||
| 	int _objectId; | ||||
| 	int _generationNumber; | ||||
| 	off_t offset; | ||||
| 	bool _isNew; | ||||
| 	off_t indirectOffset; | ||||
| 	Dictionary _dictionary; | ||||
| 	std::vector<DataType*> _data; | ||||
|     }; | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										109
									
								
								include/uPDFParser.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								include/uPDFParser.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
| /* | ||||
|   Copyright 2021 Grégory Soutadé | ||||
|  | ||||
|   This file is part of uPDFParser. | ||||
|  | ||||
|   uPDFParser is free software: you can redistribute it and/or modify | ||||
|   it under the terms of the GNU Lesser General Public License as published by | ||||
|   the Free Software Foundation, either version 3 of the License, or | ||||
|   (at your option) any later version. | ||||
|  | ||||
|   uPDFParser is distributed in the hope that it will be useful, | ||||
|   but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|   GNU Lesser General Public License for more details. | ||||
|  | ||||
|   You should have received a copy of the GNU Lesser General Public License | ||||
|   along with uPDFParser. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
|  | ||||
| #ifndef _UPDFPARSER_HPP_ | ||||
| #define _UPDFPARSER_HPP_ | ||||
|  | ||||
| #include <exception> | ||||
| #include <map> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <sstream> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <string.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| #include "uPDFTypes.h" | ||||
| #include "uPDFObject.h" | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     /** | ||||
|      * @brief PDF Parser | ||||
|      */ | ||||
|     class Parser | ||||
|     { | ||||
|     public: | ||||
| 	Parser(): | ||||
| 	    fd(0) | ||||
| 	{} | ||||
|  | ||||
| 	~Parser() | ||||
| 	{ | ||||
| 	    if (fd) close(fd); | ||||
| 	     | ||||
| 	    std::vector<Object*>::iterator it; | ||||
| 	    for(it=_objects.begin(); it!=_objects.end(); it++) | ||||
| 		delete *it; | ||||
| 	} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Parse a file | ||||
| 	 */ | ||||
| 	void parse(const std::string& filename); | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Write a PDF file with internal objects | ||||
| 	 * | ||||
| 	 * @param filename File path | ||||
| 	 * @param update   Only append new objects if true | ||||
| 	 *                 Write a new PDF file if false (not supported for now) | ||||
| 	 */ | ||||
| 	void write(const std::string& filename, bool update=false); | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Get internals (or parsed) objects | ||||
| 	 */ | ||||
| 	std::vector<Object*>& objects() { return _objects; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Add an object | ||||
| 	 */ | ||||
| 	void addObject(Object* object) { _objects.push_back(object); } | ||||
| 	 | ||||
|     private: | ||||
| 	void parseObject(std::string& token); | ||||
| 	void parseXref(); | ||||
| 	void parseTrailer(); | ||||
|  | ||||
| 	std::string nextToken(bool exceptionOnEOF=true); | ||||
| 	 | ||||
| 	DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict); | ||||
| 	void parseDictionary(Object* object, std::map<std::string, DataType*>& dict); | ||||
| 	DataType* parseSignedNumber(std::string& token); | ||||
| 	DataType* parseNumber(std::string& token); | ||||
| 	DataType* parseNumberOrReference(std::string& token); | ||||
| 	Array* parseArray(Object* object); | ||||
| 	String* parseString(); | ||||
| 	HexaString* parseHexaString(); | ||||
| 	Stream* parseStream(); | ||||
| 	Name* parseName(std::string& token); | ||||
|  | ||||
| 	void writeUpdate(const std::string& filename); | ||||
| 	 | ||||
| 	std::vector<Object*> _objects; | ||||
| 	Object trailer; | ||||
| 	off_t xrefOffset; | ||||
| 	int fd; | ||||
| 	off_t curOffset; | ||||
|     }; | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										72
									
								
								include/uPDFParser_common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								include/uPDFParser_common.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| #ifndef _UPDFPARSER_COMMON_HPP_ | ||||
| #define _UPDFPARSER_COMMON_HPP_ | ||||
|  | ||||
| #include <sstream> | ||||
| #include <iomanip> | ||||
| #include <string.h> | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     enum PARSING_ERROR { | ||||
| 	UNABLE_TO_OPEN_FILE = 1, | ||||
| 	TRUNCATED_FILE, | ||||
| 	INVALID_HEADER, | ||||
| 	INVALID_LINE, | ||||
| 	INVALID_FOOTER, | ||||
| 	INVALID_DICTIONARY, | ||||
| 	INVALID_NAME, | ||||
| 	INVALID_BOOLEAN, | ||||
| 	INVALID_NUMBER, | ||||
| 	INVALID_STREAM, | ||||
| 	INVALID_TOKEN, | ||||
| 	INVALID_OBJECT, | ||||
| 	INVALID_TRAILER, | ||||
| 	INVALID_HEXASTRING, | ||||
| 	NOT_IMPLEMENTED | ||||
| 	 | ||||
|     }; | ||||
|  | ||||
|     /** | ||||
|      * @brief Exception class | ||||
|      */ | ||||
|     class Exception : public std::exception | ||||
|     { | ||||
|     public: | ||||
|  | ||||
| 	Exception(int code, const char* message, const char* file, int line): | ||||
| 	    code(code), line(line), file(file) | ||||
| 	{ | ||||
| 	    std::stringstream msg; | ||||
| 	    msg << "Exception code : 0x" << std::setbase(16) << code << std::endl; | ||||
| 	    msg << "Message        : " << message << std::endl; | ||||
| 	    msg << "File           : " << file << ":" << std::setbase(10) << line << std::endl; | ||||
| 	    fullmessage = strdup(msg.str().c_str()); | ||||
| 	} | ||||
|  | ||||
| 	Exception(const Exception& other) | ||||
| 	{ | ||||
| 	    this->code = other.code; | ||||
| 	    this->line = line; | ||||
| 	    this->file = file; | ||||
| 	    this->fullmessage = strdup(other.fullmessage); | ||||
| 	} | ||||
|  | ||||
| 	~Exception() | ||||
| 	{ | ||||
| 	    free(fullmessage); | ||||
| 	} | ||||
|  | ||||
| 	const char * what () const throw () { return fullmessage; } | ||||
| 	 | ||||
| 	int getErrorCode() {return code;} | ||||
| 	 | ||||
| 	private: | ||||
| 	int code, line; | ||||
| 	const char* message, *file; | ||||
| 	char* fullmessage; | ||||
|     }; | ||||
|      | ||||
| #define EXCEPTION(code, message)					\ | ||||
|     {std::stringstream __msg;__msg << message; throw uPDFParser::Exception(code, __msg.str().c_str(), __FILE__, __LINE__);} | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										253
									
								
								include/uPDFTypes.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								include/uPDFTypes.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,253 @@ | ||||
| /* | ||||
|   Copyright 2021 Grégory Soutadé | ||||
|  | ||||
|   This file is part of uPDFParser. | ||||
|  | ||||
|   uPDFParser is free software: you can redistribute it and/or modify | ||||
|   it under the terms of the GNU Lesser General Public License as published by | ||||
|   the Free Software Foundation, either version 3 of the License, or | ||||
|   (at your option) any later version. | ||||
|  | ||||
|   uPDFParser is distributed in the hope that it will be useful, | ||||
|   but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|   GNU Lesser General Public License for more details. | ||||
|  | ||||
|   You should have received a copy of the GNU Lesser General Public License | ||||
|   along with uPDFParser. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
|  | ||||
| #ifndef _UPDFTYPES_HPP_ | ||||
| #define _UPDFTYPES_HPP_ | ||||
|  | ||||
| #include <map> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     /** | ||||
|      * @brief Base class for PDF object type | ||||
|      * From https://resources.infosecinstitute.com/topic/pdf-file-format-basic-structure/ | ||||
|      */ | ||||
|     class DataType | ||||
|     { | ||||
|     public: | ||||
| 	enum TYPE {BOOLEAN, INTEGER, REAL, NAME, STRING, HEXASTRING, REFERENCE, ARRAY, DICTIONARY, STREAM}; | ||||
|  | ||||
| 	DataType(TYPE _type): | ||||
| 	    _type(_type) | ||||
| 	{} | ||||
|  | ||||
| 	virtual ~DataType() {} | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Get current data type | ||||
| 	 */ | ||||
| 	TYPE type() { return _type; } | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief String representation for serialization | ||||
| 	 */ | ||||
| 	virtual std::string str() = 0; | ||||
|  | ||||
| 	/** | ||||
| 	 * @brief Clone current object | ||||
| 	 */ | ||||
| 	virtual DataType* clone() = 0; | ||||
| 	 | ||||
|     protected: | ||||
| 	TYPE _type; | ||||
|     }; | ||||
|      | ||||
|     class Boolean : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Boolean(bool value): | ||||
| 	    DataType(DataType::TYPE::BOOLEAN), _value(value) | ||||
| 	{} | ||||
|  | ||||
| 	virtual DataType* clone() {return new Boolean(_value);} | ||||
| 	bool value() {return _value;} | ||||
| 	virtual std::string str() { return (_value)?" true":" false";} | ||||
| 	 | ||||
|     private: | ||||
| 	bool _value; | ||||
|     }; | ||||
|  | ||||
|     class Integer : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Integer(int value, bool _signed=false): | ||||
| 	    DataType(DataType::TYPE::INTEGER), _value(value), _signed(_signed) | ||||
| 	{} | ||||
|  | ||||
| 	virtual DataType* clone() {return new Integer(_value, _signed);} | ||||
| 	int value() {return _value;} | ||||
| 	virtual std::string str(); | ||||
|  | ||||
|     private: | ||||
| 	int _value; | ||||
| 	bool _signed; | ||||
|     }; | ||||
|      | ||||
|     class Real : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Real(float value, bool _signed=false): | ||||
| 	    DataType(DataType::TYPE::REAL), _value(value), _signed(_signed) | ||||
| 	{} | ||||
|  | ||||
| 	virtual DataType* clone() {return new Real(_value, _signed);} | ||||
| 	float value() {return _value;} | ||||
| 	virtual std::string str(); | ||||
| 	 | ||||
|     private: | ||||
| 	float _value; | ||||
| 	bool _signed; | ||||
|     }; | ||||
|  | ||||
|     class Name : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Name(const std::string&); | ||||
|  | ||||
| 	virtual DataType* clone() {return new Name(_value);} | ||||
| 	std::string value() { | ||||
| 	    const char* name = _value.c_str(); | ||||
| 	    return std::string(&name[1]); | ||||
| 	} | ||||
| 	virtual std::string str() { return _value;} | ||||
| 	 | ||||
|     private: | ||||
| 	std::string _value; | ||||
|     }; | ||||
|  | ||||
|     class String : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	String(const std::string&); | ||||
|  | ||||
| 	virtual DataType* clone() {return new String(_value);} | ||||
| 	std::string value() {return _value;} | ||||
|  | ||||
| 	// Escape '(' and ')' characters | ||||
| 	virtual std::string str() { | ||||
| 	    char prev = '\0'; | ||||
| 	    std::string res("("); | ||||
|  | ||||
| 	    for(unsigned int i=0; i<_value.size(); i++) | ||||
| 	    { | ||||
| 		if ((_value[i] == '(' || _value[i] == ')') && | ||||
| 		    prev != '\\') | ||||
| 		    res += '\\'; | ||||
| 		res += _value[i]; | ||||
| 		prev = _value[i]; | ||||
| 	    } | ||||
|  | ||||
| 	    res += ")"; | ||||
| 	    return res; | ||||
| 	} | ||||
|  | ||||
|     private: | ||||
| 	std::string _value; | ||||
|     }; | ||||
|  | ||||
|     class HexaString : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	HexaString(const std::string&); | ||||
|  | ||||
| 	virtual DataType* clone() {return new HexaString(_value);} | ||||
| 	std::string value() {return _value;} | ||||
| 	virtual std::string str() { return std::string("<") + _value + std::string(">");} | ||||
|  | ||||
|     private: | ||||
| 	std::string _value; | ||||
|     }; | ||||
|  | ||||
|     class Reference : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Reference(int objectId, int generationNumber): | ||||
| 	    DataType(DataType::TYPE::REFERENCE), objectId(objectId), generationNumber(generationNumber) | ||||
| 	{} | ||||
| 	 | ||||
| 	virtual DataType* clone() {return new Reference(objectId, generationNumber);} | ||||
| 	int value() {return objectId;} | ||||
| 	virtual std::string str() { | ||||
| 	    std::stringstream res; | ||||
| 	    res << " " << objectId << " " << generationNumber << " R"; | ||||
| 	    return res.str(); | ||||
| 	} | ||||
|  | ||||
|     private: | ||||
| 	int objectId, generationNumber; | ||||
|     }; | ||||
|  | ||||
|     class Array : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Array(): | ||||
| 	    DataType(DataType::TYPE::ARRAY) | ||||
| 	{} | ||||
|  | ||||
| 	void addData(DataType* data) {_value.push_back(data);} | ||||
| 	 | ||||
| 	virtual DataType* clone() { | ||||
| 	    Array* res = new Array(); | ||||
| 	    std::vector<DataType*>::iterator it; | ||||
| 	    for(it=_value.begin(); it!=_value.end(); it++) | ||||
| 		res->addData((*it)->clone()); | ||||
| 	    return res; | ||||
| 	} | ||||
| 	std::vector<DataType*>& value() {return _value;} | ||||
| 	virtual std::string str(); | ||||
|  | ||||
|     private: | ||||
| 	std::vector<DataType*> _value; | ||||
|     }; | ||||
|  | ||||
|     class Dictionary : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Dictionary(): | ||||
| 	    DataType(DataType::TYPE::DICTIONARY) | ||||
| 	{} | ||||
|  | ||||
| 	void addData(const std::string&, DataType*); | ||||
|  | ||||
| 	virtual DataType* clone() { | ||||
| 	    Dictionary* res = new Dictionary(); | ||||
| 	    std::map<std::string, DataType*>::iterator it; | ||||
| 	    for(it=_value.begin(); it!=_value.end(); it++) | ||||
| 	    { | ||||
| 		res->addData(it->first, it->second->clone()); | ||||
| 	    } | ||||
| 	    return res; | ||||
| 	} | ||||
| 	std::map<std::string, DataType*>& value() {return _value;} | ||||
| 	virtual std::string str(); | ||||
|  | ||||
|     private: | ||||
| 	std::map<std::string, DataType*> _value; | ||||
|     }; | ||||
|  | ||||
|     class Stream : public DataType | ||||
|     { | ||||
|     public: | ||||
| 	Stream(int startOffset, int endOffset): | ||||
| 	    DataType(DataType::TYPE::STREAM), startOffset(startOffset), | ||||
| 	    endOffset(endOffset) | ||||
| 	{} | ||||
| 	virtual DataType* clone() {return new Stream(startOffset, endOffset);} | ||||
| 	virtual std::string str() { return "stream\nendstream\n";} | ||||
|  | ||||
|     private: | ||||
| 	int startOffset, endOffset; | ||||
|     }; | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										616
									
								
								src/uPDFParser.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										616
									
								
								src/uPDFParser.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,616 @@ | ||||
| /* | ||||
|   Copyright 2021 Grégory Soutadé | ||||
|  | ||||
|   This file is part of uPDFParser. | ||||
|  | ||||
|   uPDFParser is free software: you can redistribute it and/or modify | ||||
|   it under the terms of the GNU Lesser General Public License as published by | ||||
|   the Free Software Foundation, either version 3 of the License, or | ||||
|   (at your option) any later version. | ||||
|  | ||||
|   uPDFParser is distributed in the hope that it will be useful, | ||||
|   but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|   GNU Lesser General Public License for more details. | ||||
|  | ||||
|   You should have received a copy of the GNU Lesser General Public License | ||||
|   along with uPDFParser. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
|  | ||||
| #include <sys/types.h> | ||||
| #include <sys/stat.h> | ||||
| #include <fcntl.h> | ||||
| #include <unistd.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "uPDFParser.h" | ||||
| #include "uPDFParser_common.h" | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     std::string Object::str() | ||||
|     { | ||||
| 	std::stringstream res; | ||||
|  | ||||
| 	res << _objectId << " " << _generationNumber << " obj\n"; | ||||
| 	res << _dictionary.str(); | ||||
|  | ||||
| 	std::vector<DataType*>::iterator it; | ||||
| 	for(it=_data.begin(); it!=_data.end(); it++) | ||||
| 	    res << (*it)->str(); | ||||
|  | ||||
| 	res << "endobj\n"; | ||||
|  | ||||
| 	return res.str(); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Read data until '\n' or '\r' is found or buffer is full | ||||
|      */ | ||||
|     static inline int readline(int fd, char* buffer, int size, bool exceptionOnEOF=true) | ||||
|     { | ||||
| 	int res = 0; | ||||
| 	char c; | ||||
|  | ||||
| 	buffer[0] = 0; | ||||
| 	 | ||||
| 	for (;size;size--,res++) | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) != 1) | ||||
| 	    { | ||||
| 		if (exceptionOnEOF) | ||||
| 		    EXCEPTION(TRUNCATED_FILE, "Unexpected end of file"); | ||||
| 		return -1; | ||||
| 	    } | ||||
|  | ||||
| 	    if (c == '\n' || c == '\r') | ||||
| 		break; | ||||
|  | ||||
| 	    buffer[res] = c; | ||||
| 	} | ||||
|  | ||||
| 	if (size) | ||||
| 	    buffer[res] = 0; | ||||
| 	 | ||||
| 	return res; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Read data until EOF, '\n' or '\r' is found | ||||
|      */ | ||||
|     static inline void finishLine(int fd) | ||||
|     { | ||||
| 	char c; | ||||
| 	 | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) != 1) | ||||
| 		break; | ||||
|  | ||||
| 	    if (c == '\n') | ||||
| 		break; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * @brief Find next token to analyze | ||||
|      */ | ||||
|     std::string Parser::nextToken(bool exceptionOnEOF) | ||||
|     { | ||||
| 	char c; | ||||
| 	std::string res(""); | ||||
| 	int i; | ||||
| 	static const char delims[] = " \t<>[]()+-/"; | ||||
| 	static const char start_delims[] = "<>[]()"; | ||||
| 	bool found = false; | ||||
| 	 | ||||
| 	while (!found) | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) != 1) | ||||
| 	    { | ||||
| 		if (exceptionOnEOF) | ||||
| 		    EXCEPTION(TRUNCATED_FILE, "Unexpected end of file"); | ||||
| 		break; | ||||
| 	    } | ||||
|  | ||||
| 	    // Comment, skip line | ||||
| 	    if (c == '%') | ||||
| 	    { | ||||
| 		finishLine(fd); | ||||
| 		break; | ||||
| 	    } | ||||
|  | ||||
| 	    // White character while empty result, continue | ||||
| 	    if ((c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0') && !res.size()) | ||||
| 		continue; | ||||
|  | ||||
| 	    // Quit on line return without lseek(fd, -1, SEEK_CUR) | ||||
| 	    if (c == '\n' || c == '\r') | ||||
| 	    { | ||||
| 		if (res.size()) | ||||
| 		    break; | ||||
| 		else | ||||
| 		    continue; | ||||
| 	    } | ||||
|  | ||||
| 	    if (res.size()) | ||||
| 	    { | ||||
| 		// Push character until delimiter is found | ||||
| 		for (i=0; i<(int)sizeof(delims); i++) | ||||
| 		{ | ||||
| 		    if (c == delims[i]) | ||||
| 		    { | ||||
| 			lseek(fd, -1, SEEK_CUR); | ||||
| 			found = true; | ||||
| 			break; | ||||
| 		    } | ||||
| 		} | ||||
| 		 | ||||
| 		if (!found) | ||||
| 		    res += c; | ||||
| 	    } | ||||
| 	    else | ||||
| 	    { | ||||
| 		curOffset = lseek(fd, 0, SEEK_CUR)-1; | ||||
|  | ||||
| 		// First character, is it a delimiter ? | ||||
| 		for (i=0; i<(int)sizeof(start_delims); i++) | ||||
| 		{ | ||||
| 		    if (c == start_delims[i]) | ||||
| 		    { | ||||
| 			found = true; | ||||
| 			break; | ||||
| 		    } | ||||
| 		} | ||||
|  | ||||
| 		res += c; | ||||
| 	    } | ||||
| 	} | ||||
|  | ||||
| 	// Double '>' and '<' to compute dictionary | ||||
| 	if (res == ">" || res == "<") | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) == 1) | ||||
| 	    { | ||||
| 		if (c == res[0]) | ||||
| 		    res += c; | ||||
| 		else | ||||
| 		    lseek(fd, -1, SEEK_CUR); | ||||
| 	    } | ||||
| 	} | ||||
| 	 | ||||
| 	return res; | ||||
|     } | ||||
|  | ||||
|     void Parser::parseTrailer() | ||||
|     { | ||||
| 	std::string token; | ||||
| 	char buffer[10]; | ||||
|  | ||||
| 	// std::cout << "Parse trailer" << std::endl; | ||||
|  | ||||
| 	token = nextToken(); | ||||
|  | ||||
| 	if (token != "<<") | ||||
| 	    EXCEPTION(INVALID_TRAILER, "Invalid trailer at offset " << curOffset); | ||||
|  | ||||
| 	parseDictionary(&trailer, trailer.dictionary().value()); | ||||
|  | ||||
| 	token = nextToken(); | ||||
| 	if (token != "startxref") | ||||
| 	    EXCEPTION(INVALID_TRAILER, "Invalid trailer at offset " << curOffset); | ||||
|  | ||||
| 	token = nextToken(); | ||||
| 	readline(fd, buffer, sizeof(buffer), false); | ||||
| 	if (strncmp(buffer, "%%EOF", 5)) | ||||
| 	    EXCEPTION(INVALID_TRAILER, "Invalid trailer at offset " << curOffset); | ||||
|     } | ||||
|      | ||||
|     void Parser::parseXref() | ||||
|     { | ||||
| 	std::string token; | ||||
|  | ||||
| 	// std::cout << "Parse xref" << std::endl; | ||||
| 	xrefOffset = curOffset; | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    token = nextToken(); | ||||
|  | ||||
| 	    if (token == "trailer") | ||||
| 	    { | ||||
| 		parseTrailer(); | ||||
| 		break; | ||||
| 	    } | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|     static DataType* tokenToNumber(std::string& token, char sign='\0') | ||||
|     { | ||||
| 	int i; | ||||
| 	float fvalue; | ||||
| 	int ivalue; | ||||
| 	 | ||||
| 	for(i=0; i<(int)token.size(); i++) | ||||
| 	{ | ||||
| 	    if (token[i] == '.') | ||||
| 	    { | ||||
| 		if (i==0) token = std::string("0") + token; | ||||
| 		fvalue = std::stof(token); | ||||
| 		if (sign == '-') | ||||
| 		    fvalue = -fvalue; | ||||
| 		return new Real(fvalue, (sign!='\0')); | ||||
| 	    } | ||||
| 	} | ||||
|  | ||||
| 	ivalue = std::stoi(token); | ||||
| 	if (sign == '-') | ||||
| 	    ivalue = -ivalue; | ||||
| 	 | ||||
| 	return new Integer(ivalue, (sign!='\0')); | ||||
|     } | ||||
|      | ||||
|     DataType* Parser::parseSignedNumber(std::string& token) | ||||
|     { | ||||
| 	char sign = token[0]; | ||||
| 	token = std::string(&((token.c_str())[1])); | ||||
| 	return tokenToNumber(token, sign); | ||||
|     } | ||||
|      | ||||
|     DataType* Parser::parseNumber(std::string& token) | ||||
|     { | ||||
| 	return tokenToNumber(token); | ||||
|     } | ||||
|  | ||||
|     DataType* Parser::parseNumberOrReference(std::string& token) | ||||
|     { | ||||
| 	DataType* res = tokenToNumber(token); | ||||
|  | ||||
| 	if (res->type() == DataType::TYPE::REAL) | ||||
| 	    return res; | ||||
| 	 | ||||
| 	off_t offset = lseek(fd, 0, SEEK_CUR); | ||||
| 	std::string token2 = nextToken(); | ||||
| 	std::string token3 = nextToken(); | ||||
|  | ||||
| 	DataType* generationNumber = 0; | ||||
| 	try | ||||
| 	{ | ||||
| 	    generationNumber = tokenToNumber(token2); | ||||
| 	} | ||||
| 	catch (std::invalid_argument& e) | ||||
| 	{ | ||||
| 	    lseek(fd, offset, SEEK_SET); | ||||
| 	    return res; | ||||
| 	} | ||||
| 	 | ||||
| 	if ((generationNumber->type() != DataType::TYPE::INTEGER) || | ||||
| 	    token3.size() != 1 || token3[0] != 'R') | ||||
| 	{ | ||||
| 	    delete generationNumber; | ||||
| 	    lseek(fd, offset, SEEK_SET); | ||||
| 	    return res; | ||||
| 	} | ||||
|  | ||||
| 	DataType* res2 = new Reference(((Integer*)res)->value(), | ||||
| 				       ((Integer*)generationNumber)->value()); | ||||
| 	delete res; | ||||
| 	return res2; | ||||
|     } | ||||
|      | ||||
|     DataType* Parser::parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict) | ||||
|     { | ||||
| 	DataType* value = 0; | ||||
| 	Dictionary* _value = 0; | ||||
|  | ||||
| 	if (token == "<<") | ||||
| 	{ | ||||
| 	    _value = new Dictionary(); | ||||
| 	    value = _value; | ||||
| 	    parseDictionary(object, _value->value()); | ||||
| 	} | ||||
| 	else if (token == "[") | ||||
| 	    value = parseArray(object); | ||||
| 	else if (token == "(") | ||||
| 	    value = parseString(); | ||||
| 	else if (token == "<") | ||||
| 	    value = parseHexaString(); | ||||
| 	else if (token == "stream") | ||||
| 	    value = parseStream(); | ||||
| 	else if (token[0] >= '1' && token[0] <= '9') | ||||
| 	    value = parseNumberOrReference(token); | ||||
| 	else if (token[0] == '/') | ||||
| 	    value = parseName(token); | ||||
| 	else if (token[0] == '+' || token[0] == '-') | ||||
| 	    value = parseSignedNumber(token); | ||||
| 	else if (token[0] == '0' || token[0] == '.') | ||||
| 	    value = parseNumber(token); | ||||
| 	else if (token == "true") | ||||
| 	    return new Boolean(true); | ||||
| 	else if (token == "false") | ||||
| 	    return new Boolean(false); | ||||
| 	else | ||||
| 	    EXCEPTION(INVALID_TOKEN, "Invalid token " << token << " at offset " << curOffset); | ||||
|  | ||||
| 	return value; | ||||
|     } | ||||
|  | ||||
|     Array* Parser::parseArray(Object* object) | ||||
|     { | ||||
| 	std::string token; | ||||
| 	DataType* value; | ||||
|  | ||||
| 	Array* res = new Array(); | ||||
| 	 | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    token = nextToken(); | ||||
|  | ||||
| 	    if (token == "]") | ||||
| 		break; | ||||
|  | ||||
| 	    value = parseType(token, object, object->dictionary().value()); | ||||
| 	    //std::cout << "Add " << value->str() << std::endl; | ||||
| 	    res->addData(value); | ||||
| 	} | ||||
|  | ||||
| 	return res; | ||||
|     } | ||||
|      | ||||
|     String* Parser::parseString() | ||||
|     { | ||||
| 	std::string res(""); | ||||
| 	char c; | ||||
| 	bool escaped = false; | ||||
| 	 | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) != 1) | ||||
| 		break; | ||||
|  | ||||
| 	    if (c == ')' && !escaped) | ||||
| 		break; | ||||
|  | ||||
| 	    escaped = (c == '\\'); | ||||
|  | ||||
| 	    res += c; | ||||
| 	} | ||||
|  | ||||
| 	return new String(res); | ||||
|     } | ||||
|      | ||||
|     HexaString* Parser::parseHexaString() | ||||
|     { | ||||
| 	std::string res(""); | ||||
| 	char c; | ||||
| 	 | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    if (read(fd, &c, 1) != 1) | ||||
| 		break; | ||||
|  | ||||
| 	    if (c == '>') | ||||
| 		break; | ||||
|  | ||||
| 	    res += c; | ||||
| 	} | ||||
|  | ||||
| 	if ((res.size() % 2)) | ||||
| 	    EXCEPTION(INVALID_HEXASTRING, "Invalid hexa String at offset " << curOffset); | ||||
| 	     | ||||
| 	return new HexaString(res); | ||||
|     } | ||||
|  | ||||
|     Stream* Parser::parseStream() | ||||
|     { | ||||
| 	char buffer[1024]; | ||||
| 	off_t endOffset; | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    endOffset = lseek(fd, 0, SEEK_CUR); | ||||
| 	    readline(fd, buffer, sizeof(buffer)); | ||||
| 	    if (!strncmp(buffer, "endstream", 9)) | ||||
| 		break; | ||||
| 	} | ||||
|  | ||||
| 	return new Stream(curOffset, endOffset); | ||||
|     } | ||||
|      | ||||
|     Name* Parser::parseName(std::string& name) | ||||
|     { | ||||
| 	if (!name.size() || name[0] != '/') | ||||
| 	    EXCEPTION(INVALID_NAME, "Invalid Name at offset " << curOffset); | ||||
|  | ||||
| 	//std::cout << "Name " << name << std::endl; | ||||
| 	return new Name(name); | ||||
|     } | ||||
|     | ||||
|     void Parser::parseDictionary(Object* object, std::map<std::string, DataType*>& dict) | ||||
|     { | ||||
| 	std::string token; | ||||
| 	Name* key; | ||||
| 	DataType* value; | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    token = nextToken(); | ||||
| 	    if (token == ">>") | ||||
| 		break; | ||||
|  | ||||
| 	    key = parseName(token); | ||||
|  | ||||
| 	    token = nextToken(); | ||||
| 	    if (token == ">>") | ||||
| 	    { | ||||
| 		dict[key->value()] = 0; | ||||
| 		break; | ||||
| 	    } | ||||
|  | ||||
| 	    value = parseType(token, object, dict); | ||||
| 	    dict[key->value()] = value; | ||||
| 	} | ||||
|     } | ||||
|      | ||||
|     void Parser::parseObject(std::string& token) | ||||
|     { | ||||
| 	off_t offset; | ||||
| 	int objectId, generationNumber; | ||||
| 	Object* object; | ||||
|  | ||||
| 	offset = curOffset; | ||||
| 	try | ||||
| 	{ | ||||
| 	    objectId = std::stoi(token); | ||||
| 	    token = nextToken(); | ||||
| 	    generationNumber = std::stoi(token); | ||||
| 	} | ||||
| 	catch(std::invalid_argument& e) | ||||
| 	{ | ||||
| 	    EXCEPTION(INVALID_OBJECT, "Invalid object at offset " << curOffset); | ||||
| 	} | ||||
|  | ||||
| 	token = nextToken(); | ||||
|  | ||||
| 	if (token != "obj") | ||||
| 	    EXCEPTION(INVALID_OBJECT, "Invalid object at offset " << curOffset); | ||||
|  | ||||
| 	std::cout << "New obj " << objectId << " " << generationNumber << std::endl; | ||||
| 	 | ||||
| 	object = new Object(objectId, generationNumber, offset); | ||||
| 	_objects.push_back(object); | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    token = nextToken(); | ||||
|  | ||||
| 	    if (token == "endobj") | ||||
| 		break; | ||||
|  | ||||
| 	    if (token == "<<") | ||||
| 		parseDictionary(object, object->dictionary().value()); | ||||
| 	    else if (token[0] >= '1' && token[0] <= '9') | ||||
| 	    { | ||||
| 		DataType* _offset = tokenToNumber(token); | ||||
| 		if (_offset->type() != DataType::TYPE::INTEGER) | ||||
| 		    EXCEPTION(INVALID_OBJECT, "Invalid object at offset " << curOffset); | ||||
| 		object->setIndirectOffset(((Integer*)_offset)->value()); | ||||
| 	    } | ||||
| 	    else | ||||
| 		parseType(token, object, object->dictionary().value()); | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|     void Parser::parse(const std::string& filename) | ||||
|     { | ||||
| 	char buf[16]; | ||||
| 	std::string token; | ||||
|  | ||||
| 	if (fd) | ||||
| 	    close(fd); | ||||
|  | ||||
| 	fd = open(filename.c_str(), O_RDONLY); | ||||
| 	 | ||||
| 	if (fd <= 0) | ||||
| 	    EXCEPTION(UNABLE_TO_OPEN_FILE, "Unable to open " << filename << " (%m)"); | ||||
|  | ||||
| 	// Check %PDF at startup | ||||
| 	readline(fd, buf, 4); | ||||
| 	if (strncmp(buf, "%PDF", 4)) | ||||
| 	    EXCEPTION(INVALID_HEADER, "Invalid PDF header"); | ||||
| 	finishLine(fd); | ||||
|  | ||||
| 	curOffset = lseek(fd, 0, SEEK_CUR); | ||||
|  | ||||
| 	// // Check %%EOF at then end | ||||
| 	// lseek(fd, -5, SEEK_END); | ||||
| 	// readline(fd, buf, 5); | ||||
| 	// if (strncmp(buf, "%%EOF", 5)) | ||||
| 	//     EXCEPTION(INVALID_FOOTER, "Invalid PDF footer"); | ||||
|  | ||||
| 	lseek(fd, curOffset, SEEK_SET); | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 	    token = nextToken(false); | ||||
|  | ||||
| 	    if (!token.size()) | ||||
| 		break; | ||||
|  | ||||
| 	    if (token == "xref") | ||||
| 		parseXref(); | ||||
| 	    else if (token[0] >= '1' && token[0] <= '9') | ||||
| 		parseObject(token); | ||||
| 	    else | ||||
| 		EXCEPTION(INVALID_LINE, "Invalid Line at offset " << curOffset); | ||||
| 	} | ||||
| 	 | ||||
| 	close(fd); | ||||
|     } | ||||
|  | ||||
|     void Parser::writeUpdate(const std::string& filename) | ||||
|     { | ||||
| 	int newFd = open(filename.c_str(), O_WRONLY|O_APPEND|O_CREAT, S_IRUSR|S_IWUSR); | ||||
|  | ||||
| 	if (newFd <= 0) | ||||
| 	    EXCEPTION(UNABLE_TO_OPEN_FILE, "Unable to open " << filename << " (%m)"); | ||||
|  | ||||
| 	::write(newFd, "\r", 1); | ||||
|  | ||||
| 	std::stringstream xref; | ||||
| 	int nbNewObjects = 0; | ||||
|  | ||||
| 	xref << std::setfill('0'); | ||||
| 	xref << "xref\n"; | ||||
| 	 | ||||
| 	std::vector<Object*>::iterator it; | ||||
| 	for(it=_objects.begin(); it!=_objects.end(); it++) | ||||
| 	{ | ||||
| 	    if (!(*it)->isNew()) | ||||
| 		continue; | ||||
| 	    nbNewObjects ++; | ||||
| 	    std::string objStr = (*it)->str(); | ||||
| 	    curOffset = lseek(newFd, 0, SEEK_CUR); | ||||
| 	    ::write(newFd, objStr.c_str(), objStr.size()); | ||||
| 	    xref << std::setw(0) << (*it)->objectId() << " 1\n"; | ||||
| 	    xref << std::setw(10) << curOffset << " " << std::setw(5) << (*it)->generationNumber() << " n\r\n"; // Here \r seems important  | ||||
| 	} | ||||
|  | ||||
| 	if (!nbNewObjects) | ||||
| 	{ | ||||
| 	    close(newFd); | ||||
| 	    return; | ||||
| 	} | ||||
|  | ||||
| 	off_t newXrefOffset = lseek(newFd, 0, SEEK_CUR); | ||||
|  | ||||
| 	std::string xrefStr = xref.str(); | ||||
| 	::write(newFd, xrefStr.c_str(), xrefStr.size()); | ||||
|  | ||||
| 	if (trailer.hasKey("Prev")) | ||||
| 	    delete trailer["Prev"]; | ||||
| 	 | ||||
| 	trailer["Prev"] = new Integer((int)xrefOffset); | ||||
|  | ||||
| 	std::string trailerStr = trailer.dictionary().str(); | ||||
| 	::write(newFd, "trailer\n", 8); | ||||
| 	::write(newFd, trailerStr.c_str(), trailerStr.size()); | ||||
|  | ||||
| 	std::stringstream startxref; | ||||
| 	startxref << "startxref\n" << newXrefOffset << "\n%%EOF"; | ||||
| 	 | ||||
| 	std::string startxrefStr = startxref.str(); | ||||
| 	::write(newFd, startxrefStr.c_str(), startxrefStr.size()); | ||||
| 	 | ||||
| 	close(newFd); | ||||
|     } | ||||
|      | ||||
|     void Parser::write(const std::string& filename, bool update) | ||||
|     { | ||||
| 	if (update) | ||||
| 	    return writeUpdate(filename); | ||||
| 	else | ||||
| 	    EXCEPTION(NOT_IMPLEMENTED, "Full write not implemented"); | ||||
|     } | ||||
|  | ||||
| } | ||||
							
								
								
									
										105
									
								
								src/uPDFTypes.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								src/uPDFTypes.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | ||||
| /* | ||||
|   Copyright 2021 Grégory Soutadé | ||||
|  | ||||
|   This file is part of uPDFParser. | ||||
|  | ||||
|   uPDFParser is free software: you can redistribute it and/or modify | ||||
|   it under the terms of the GNU Lesser General Public License as published by | ||||
|   the Free Software Foundation, either version 3 of the License, or | ||||
|   (at your option) any later version. | ||||
|  | ||||
|   uPDFParser is distributed in the hope that it will be useful, | ||||
|   but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|   GNU Lesser General Public License for more details. | ||||
|  | ||||
|   You should have received a copy of the GNU Lesser General Public License | ||||
|   along with uPDFParser. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
|  | ||||
| #include "uPDFTypes.h" | ||||
| #include "uPDFParser_common.h" | ||||
|  | ||||
| namespace uPDFParser | ||||
| { | ||||
|     Name::Name(const std::string& name): | ||||
| 	DataType(DataType::TYPE::NAME) | ||||
|     { | ||||
| 	_value = name; | ||||
|     } | ||||
|  | ||||
|     String::String(const std::string& value): | ||||
| 	DataType(DataType::TYPE::STRING) | ||||
|     { | ||||
| 	_value = value; | ||||
|     } | ||||
|  | ||||
|     HexaString::HexaString(const std::string& value): | ||||
| 	DataType(DataType::TYPE::HEXASTRING) | ||||
|     { | ||||
| 	_value = value; | ||||
|     } | ||||
|  | ||||
|     std::string Integer::str() | ||||
|     { | ||||
| 	std::string sign(""); | ||||
| 	if (_signed) | ||||
| 	{ | ||||
| 	    if (_value >= 0) | ||||
| 		sign = "+"; | ||||
| 	    else | ||||
| 		sign = "-"; | ||||
| 	} | ||||
|  | ||||
| 	return " " + sign + std::to_string(_value); | ||||
|     } | ||||
|      | ||||
|     std::string Real::str() | ||||
|     { | ||||
| 	std::string sign(""); | ||||
| 	if (_signed) | ||||
| 	{ | ||||
| 	    if (_value >= 0) | ||||
| 		sign = "+"; | ||||
| 	    else | ||||
| 		sign = "-"; | ||||
| 	} | ||||
|  | ||||
| 	return " " + sign + std::to_string(_value); | ||||
|     } | ||||
|  | ||||
|     std::string Array::str() | ||||
|     { | ||||
| 	std::string res("["); | ||||
| 	std::vector<DataType*>::iterator it; | ||||
|  | ||||
| 	for(it = _value.begin(); it!=_value.end(); it++) | ||||
| 	{ | ||||
| 	    if (res.size() > 1) | ||||
| 		res += " "; | ||||
| 	    res += (*it)->str(); | ||||
| 	} | ||||
| 	     | ||||
| 	return res + std::string("]"); | ||||
|     } | ||||
|  | ||||
|     void Dictionary::addData(const std::string& key, DataType* value) | ||||
|     { | ||||
| 	_value[key] = value; | ||||
|     } | ||||
|      | ||||
|     std::string Dictionary::str() | ||||
|     { | ||||
| 	std::string res("<<"); | ||||
| 	std::map<std::string, DataType*>::iterator it; | ||||
|  | ||||
| 	for(it = _value.begin(); it!=_value.end(); it++) | ||||
| 	{ | ||||
| 	    res += std::string("/") + it->first; | ||||
| 	    if (it->second) | ||||
| 		res += it->second->str(); | ||||
| 	} | ||||
| 	     | ||||
| 	return res + std::string(">>\n");  | ||||
|    } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user