Initial commit

This commit is contained in:
2021-08-21 18:22:58 +02:00
commit 39e2f6ecc9
10 changed files with 1561 additions and 0 deletions

150
include/uPDFObject.h Normal file
View File

@@ -0,0 +1,150 @@
/*
Copyright 2021 Grégory Soutadé
This file is part of uPDFParser.
uPDFParser is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
uPDFParser is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _UPDFOBJECT_HPP_
#define _UPDFOBJECT_HPP_
#include "uPDFTypes.h"
namespace uPDFParser
{
/**
* @brief PDF Object
*/
class Object
{
public:
Object():
_objectId(0), _generationNumber(0),
offset(0), _isNew(false), indirectOffset(0)
{}
/**
* @brief Object constructor
*
* @param objectId Object ID
* @param generationNumber Object generation number
* @param offset Offset of object in current PDF file
* @param isNew false if object has been read from file,
* true if it has been created or updated
* @param indirectOffset Object is indirect
*/
Object(int objectId, int generationNumber, uint64_t offset, bool isNew=false,
off_t indirectOffset=0):
_objectId(objectId), _generationNumber(generationNumber),
offset(offset), _isNew(isNew), indirectOffset(indirectOffset)
{}
~Object()
{
std::vector<DataType*>::iterator it;
for(it=_data.begin(); it!=_data.end(); it++)
delete *it;
}
Object(const Object& other)
{
_objectId = other._objectId;
_generationNumber = other._generationNumber;
offset = other.offset;
indirectOffset = other.indirectOffset;
_isNew = true;
std::vector<DataType*>::const_iterator it;
for(it=other._data.begin(); it!=other._data.end(); it++)
_data.push_back((*it)->clone());
const std::map<std::string, DataType*> _dict = ((Dictionary)other._dictionary).value();
std::map<std::string, DataType*>& _myDict = _dictionary.value();
std::map<std::string, DataType*>::const_iterator it2;
for(it2=_dict.begin(); it2!=_dict.end(); it2++)
_myDict[it2->first] = it2->second->clone();
}
/**
* @brief Clone current object (call copy constructor)
*/
Object* clone() { return new Object(*this); }
/**
* @brief Return internal dictionary
*/
Dictionary& dictionary() {return _dictionary;}
/**
* @brief Return vector of data contained into object
*/
std::vector<DataType*>& data() {return _data;}
/**
* @brief Object string representation
*/
std::string str();
/**
* @brief Set object as indirect if offset != 0 or not indirect if offset == 0
*/
void setIndirectOffset(off_t offset) {indirectOffset = offset;}
/**
* @brief is object indirect (indirectOffset != 0)
*/
bool isIndirect() {return indirectOffset != 0;}
/**
* @brief Get dictionary value
*/
DataType*& operator[](const std::string& key) { return _dictionary.value()[key]; }
/**
* @brief Check for key in object's dictionary
*/
bool hasKey(const std::string& key) { return _dictionary.value().count(key)?true:false; }
/**
* @brief is object new (or not updated) ?
*/
bool isNew() { return _isNew; }
/**
* @brief Mark object as updated
*/
void update(void) { _isNew = true; }
/**
* @brief Return object's id
*/
int objectId() { return _objectId; }
/**
* @brief Return object's generation number
*/
int generationNumber() { return _generationNumber; }
private:
int _objectId;
int _generationNumber;
off_t offset;
bool _isNew;
off_t indirectOffset;
Dictionary _dictionary;
std::vector<DataType*> _data;
};
}
#endif

109
include/uPDFParser.h Normal file
View File

@@ -0,0 +1,109 @@
/*
Copyright 2021 Grégory Soutadé
This file is part of uPDFParser.
uPDFParser is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
uPDFParser is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _UPDFPARSER_HPP_
#define _UPDFPARSER_HPP_
#include <exception>
#include <map>
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <string.h>
#include <unistd.h>
#include "uPDFTypes.h"
#include "uPDFObject.h"
namespace uPDFParser
{
/**
* @brief PDF Parser
*/
class Parser
{
public:
Parser():
fd(0)
{}
~Parser()
{
if (fd) close(fd);
std::vector<Object*>::iterator it;
for(it=_objects.begin(); it!=_objects.end(); it++)
delete *it;
}
/**
* @brief Parse a file
*/
void parse(const std::string& filename);
/**
* @brief Write a PDF file with internal objects
*
* @param filename File path
* @param update Only append new objects if true
* Write a new PDF file if false (not supported for now)
*/
void write(const std::string& filename, bool update=false);
/**
* @brief Get internals (or parsed) objects
*/
std::vector<Object*>& objects() { return _objects; }
/**
* @brief Add an object
*/
void addObject(Object* object) { _objects.push_back(object); }
private:
void parseObject(std::string& token);
void parseXref();
void parseTrailer();
std::string nextToken(bool exceptionOnEOF=true);
DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict);
void parseDictionary(Object* object, std::map<std::string, DataType*>& dict);
DataType* parseSignedNumber(std::string& token);
DataType* parseNumber(std::string& token);
DataType* parseNumberOrReference(std::string& token);
Array* parseArray(Object* object);
String* parseString();
HexaString* parseHexaString();
Stream* parseStream();
Name* parseName(std::string& token);
void writeUpdate(const std::string& filename);
std::vector<Object*> _objects;
Object trailer;
off_t xrefOffset;
int fd;
off_t curOffset;
};
}
#endif

View File

@@ -0,0 +1,72 @@
#ifndef _UPDFPARSER_COMMON_HPP_
#define _UPDFPARSER_COMMON_HPP_
#include <sstream>
#include <iomanip>
#include <string.h>
namespace uPDFParser
{
enum PARSING_ERROR {
UNABLE_TO_OPEN_FILE = 1,
TRUNCATED_FILE,
INVALID_HEADER,
INVALID_LINE,
INVALID_FOOTER,
INVALID_DICTIONARY,
INVALID_NAME,
INVALID_BOOLEAN,
INVALID_NUMBER,
INVALID_STREAM,
INVALID_TOKEN,
INVALID_OBJECT,
INVALID_TRAILER,
INVALID_HEXASTRING,
NOT_IMPLEMENTED
};
/**
* @brief Exception class
*/
class Exception : public std::exception
{
public:
Exception(int code, const char* message, const char* file, int line):
code(code), line(line), file(file)
{
std::stringstream msg;
msg << "Exception code : 0x" << std::setbase(16) << code << std::endl;
msg << "Message : " << message << std::endl;
msg << "File : " << file << ":" << std::setbase(10) << line << std::endl;
fullmessage = strdup(msg.str().c_str());
}
Exception(const Exception& other)
{
this->code = other.code;
this->line = line;
this->file = file;
this->fullmessage = strdup(other.fullmessage);
}
~Exception()
{
free(fullmessage);
}
const char * what () const throw () { return fullmessage; }
int getErrorCode() {return code;}
private:
int code, line;
const char* message, *file;
char* fullmessage;
};
#define EXCEPTION(code, message) \
{std::stringstream __msg;__msg << message; throw uPDFParser::Exception(code, __msg.str().c_str(), __FILE__, __LINE__);}
}
#endif

253
include/uPDFTypes.h Normal file
View File

@@ -0,0 +1,253 @@
/*
Copyright 2021 Grégory Soutadé
This file is part of uPDFParser.
uPDFParser is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
uPDFParser is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _UPDFTYPES_HPP_
#define _UPDFTYPES_HPP_
#include <map>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
namespace uPDFParser
{
/**
* @brief Base class for PDF object type
* From https://resources.infosecinstitute.com/topic/pdf-file-format-basic-structure/
*/
class DataType
{
public:
enum TYPE {BOOLEAN, INTEGER, REAL, NAME, STRING, HEXASTRING, REFERENCE, ARRAY, DICTIONARY, STREAM};
DataType(TYPE _type):
_type(_type)
{}
virtual ~DataType() {}
/**
* @brief Get current data type
*/
TYPE type() { return _type; }
/**
* @brief String representation for serialization
*/
virtual std::string str() = 0;
/**
* @brief Clone current object
*/
virtual DataType* clone() = 0;
protected:
TYPE _type;
};
class Boolean : public DataType
{
public:
Boolean(bool value):
DataType(DataType::TYPE::BOOLEAN), _value(value)
{}
virtual DataType* clone() {return new Boolean(_value);}
bool value() {return _value;}
virtual std::string str() { return (_value)?" true":" false";}
private:
bool _value;
};
class Integer : public DataType
{
public:
Integer(int value, bool _signed=false):
DataType(DataType::TYPE::INTEGER), _value(value), _signed(_signed)
{}
virtual DataType* clone() {return new Integer(_value, _signed);}
int value() {return _value;}
virtual std::string str();
private:
int _value;
bool _signed;
};
class Real : public DataType
{
public:
Real(float value, bool _signed=false):
DataType(DataType::TYPE::REAL), _value(value), _signed(_signed)
{}
virtual DataType* clone() {return new Real(_value, _signed);}
float value() {return _value;}
virtual std::string str();
private:
float _value;
bool _signed;
};
class Name : public DataType
{
public:
Name(const std::string&);
virtual DataType* clone() {return new Name(_value);}
std::string value() {
const char* name = _value.c_str();
return std::string(&name[1]);
}
virtual std::string str() { return _value;}
private:
std::string _value;
};
class String : public DataType
{
public:
String(const std::string&);
virtual DataType* clone() {return new String(_value);}
std::string value() {return _value;}
// Escape '(' and ')' characters
virtual std::string str() {
char prev = '\0';
std::string res("(");
for(unsigned int i=0; i<_value.size(); i++)
{
if ((_value[i] == '(' || _value[i] == ')') &&
prev != '\\')
res += '\\';
res += _value[i];
prev = _value[i];
}
res += ")";
return res;
}
private:
std::string _value;
};
class HexaString : public DataType
{
public:
HexaString(const std::string&);
virtual DataType* clone() {return new HexaString(_value);}
std::string value() {return _value;}
virtual std::string str() { return std::string("<") + _value + std::string(">");}
private:
std::string _value;
};
class Reference : public DataType
{
public:
Reference(int objectId, int generationNumber):
DataType(DataType::TYPE::REFERENCE), objectId(objectId), generationNumber(generationNumber)
{}
virtual DataType* clone() {return new Reference(objectId, generationNumber);}
int value() {return objectId;}
virtual std::string str() {
std::stringstream res;
res << " " << objectId << " " << generationNumber << " R";
return res.str();
}
private:
int objectId, generationNumber;
};
class Array : public DataType
{
public:
Array():
DataType(DataType::TYPE::ARRAY)
{}
void addData(DataType* data) {_value.push_back(data);}
virtual DataType* clone() {
Array* res = new Array();
std::vector<DataType*>::iterator it;
for(it=_value.begin(); it!=_value.end(); it++)
res->addData((*it)->clone());
return res;
}
std::vector<DataType*>& value() {return _value;}
virtual std::string str();
private:
std::vector<DataType*> _value;
};
class Dictionary : public DataType
{
public:
Dictionary():
DataType(DataType::TYPE::DICTIONARY)
{}
void addData(const std::string&, DataType*);
virtual DataType* clone() {
Dictionary* res = new Dictionary();
std::map<std::string, DataType*>::iterator it;
for(it=_value.begin(); it!=_value.end(); it++)
{
res->addData(it->first, it->second->clone());
}
return res;
}
std::map<std::string, DataType*>& value() {return _value;}
virtual std::string str();
private:
std::map<std::string, DataType*> _value;
};
class Stream : public DataType
{
public:
Stream(int startOffset, int endOffset):
DataType(DataType::TYPE::STREAM), startOffset(startOffset),
endOffset(endOffset)
{}
virtual DataType* clone() {return new Stream(startOffset, endOffset);}
virtual std::string str() { return "stream\nendstream\n";}
private:
int startOffset, endOffset;
};
}
#endif