Initial commit
This commit is contained in:
150
include/uPDFObject.h
Normal file
150
include/uPDFObject.h
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
Copyright 2021 Grégory Soutadé
|
||||
|
||||
This file is part of uPDFParser.
|
||||
|
||||
uPDFParser is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
uPDFParser is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _UPDFOBJECT_HPP_
|
||||
#define _UPDFOBJECT_HPP_
|
||||
|
||||
#include "uPDFTypes.h"
|
||||
|
||||
namespace uPDFParser
|
||||
{
|
||||
/**
|
||||
* @brief PDF Object
|
||||
*/
|
||||
class Object
|
||||
{
|
||||
public:
|
||||
Object():
|
||||
_objectId(0), _generationNumber(0),
|
||||
offset(0), _isNew(false), indirectOffset(0)
|
||||
{}
|
||||
|
||||
/**
|
||||
* @brief Object constructor
|
||||
*
|
||||
* @param objectId Object ID
|
||||
* @param generationNumber Object generation number
|
||||
* @param offset Offset of object in current PDF file
|
||||
* @param isNew false if object has been read from file,
|
||||
* true if it has been created or updated
|
||||
* @param indirectOffset Object is indirect
|
||||
*/
|
||||
Object(int objectId, int generationNumber, uint64_t offset, bool isNew=false,
|
||||
off_t indirectOffset=0):
|
||||
_objectId(objectId), _generationNumber(generationNumber),
|
||||
offset(offset), _isNew(isNew), indirectOffset(indirectOffset)
|
||||
{}
|
||||
|
||||
~Object()
|
||||
{
|
||||
std::vector<DataType*>::iterator it;
|
||||
for(it=_data.begin(); it!=_data.end(); it++)
|
||||
delete *it;
|
||||
}
|
||||
|
||||
Object(const Object& other)
|
||||
{
|
||||
_objectId = other._objectId;
|
||||
_generationNumber = other._generationNumber;
|
||||
offset = other.offset;
|
||||
indirectOffset = other.indirectOffset;
|
||||
_isNew = true;
|
||||
|
||||
std::vector<DataType*>::const_iterator it;
|
||||
for(it=other._data.begin(); it!=other._data.end(); it++)
|
||||
_data.push_back((*it)->clone());
|
||||
|
||||
const std::map<std::string, DataType*> _dict = ((Dictionary)other._dictionary).value();
|
||||
std::map<std::string, DataType*>& _myDict = _dictionary.value();
|
||||
std::map<std::string, DataType*>::const_iterator it2;
|
||||
for(it2=_dict.begin(); it2!=_dict.end(); it2++)
|
||||
_myDict[it2->first] = it2->second->clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clone current object (call copy constructor)
|
||||
*/
|
||||
Object* clone() { return new Object(*this); }
|
||||
|
||||
/**
|
||||
* @brief Return internal dictionary
|
||||
*/
|
||||
Dictionary& dictionary() {return _dictionary;}
|
||||
|
||||
/**
|
||||
* @brief Return vector of data contained into object
|
||||
*/
|
||||
std::vector<DataType*>& data() {return _data;}
|
||||
|
||||
/**
|
||||
* @brief Object string representation
|
||||
*/
|
||||
std::string str();
|
||||
|
||||
/**
|
||||
* @brief Set object as indirect if offset != 0 or not indirect if offset == 0
|
||||
*/
|
||||
void setIndirectOffset(off_t offset) {indirectOffset = offset;}
|
||||
|
||||
/**
|
||||
* @brief is object indirect (indirectOffset != 0)
|
||||
*/
|
||||
bool isIndirect() {return indirectOffset != 0;}
|
||||
|
||||
/**
|
||||
* @brief Get dictionary value
|
||||
*/
|
||||
DataType*& operator[](const std::string& key) { return _dictionary.value()[key]; }
|
||||
|
||||
/**
|
||||
* @brief Check for key in object's dictionary
|
||||
*/
|
||||
bool hasKey(const std::string& key) { return _dictionary.value().count(key)?true:false; }
|
||||
|
||||
/**
|
||||
* @brief is object new (or not updated) ?
|
||||
*/
|
||||
bool isNew() { return _isNew; }
|
||||
|
||||
/**
|
||||
* @brief Mark object as updated
|
||||
*/
|
||||
void update(void) { _isNew = true; }
|
||||
|
||||
/**
|
||||
* @brief Return object's id
|
||||
*/
|
||||
int objectId() { return _objectId; }
|
||||
|
||||
/**
|
||||
* @brief Return object's generation number
|
||||
*/
|
||||
int generationNumber() { return _generationNumber; }
|
||||
|
||||
private:
|
||||
int _objectId;
|
||||
int _generationNumber;
|
||||
off_t offset;
|
||||
bool _isNew;
|
||||
off_t indirectOffset;
|
||||
Dictionary _dictionary;
|
||||
std::vector<DataType*> _data;
|
||||
};
|
||||
}
|
||||
#endif
|
109
include/uPDFParser.h
Normal file
109
include/uPDFParser.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright 2021 Grégory Soutadé
|
||||
|
||||
This file is part of uPDFParser.
|
||||
|
||||
uPDFParser is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
uPDFParser is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _UPDFPARSER_HPP_
|
||||
#define _UPDFPARSER_HPP_
|
||||
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "uPDFTypes.h"
|
||||
#include "uPDFObject.h"
|
||||
|
||||
namespace uPDFParser
|
||||
{
|
||||
/**
|
||||
* @brief PDF Parser
|
||||
*/
|
||||
class Parser
|
||||
{
|
||||
public:
|
||||
Parser():
|
||||
fd(0)
|
||||
{}
|
||||
|
||||
~Parser()
|
||||
{
|
||||
if (fd) close(fd);
|
||||
|
||||
std::vector<Object*>::iterator it;
|
||||
for(it=_objects.begin(); it!=_objects.end(); it++)
|
||||
delete *it;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse a file
|
||||
*/
|
||||
void parse(const std::string& filename);
|
||||
|
||||
/**
|
||||
* @brief Write a PDF file with internal objects
|
||||
*
|
||||
* @param filename File path
|
||||
* @param update Only append new objects if true
|
||||
* Write a new PDF file if false (not supported for now)
|
||||
*/
|
||||
void write(const std::string& filename, bool update=false);
|
||||
|
||||
/**
|
||||
* @brief Get internals (or parsed) objects
|
||||
*/
|
||||
std::vector<Object*>& objects() { return _objects; }
|
||||
|
||||
/**
|
||||
* @brief Add an object
|
||||
*/
|
||||
void addObject(Object* object) { _objects.push_back(object); }
|
||||
|
||||
private:
|
||||
void parseObject(std::string& token);
|
||||
void parseXref();
|
||||
void parseTrailer();
|
||||
|
||||
std::string nextToken(bool exceptionOnEOF=true);
|
||||
|
||||
DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict);
|
||||
void parseDictionary(Object* object, std::map<std::string, DataType*>& dict);
|
||||
DataType* parseSignedNumber(std::string& token);
|
||||
DataType* parseNumber(std::string& token);
|
||||
DataType* parseNumberOrReference(std::string& token);
|
||||
Array* parseArray(Object* object);
|
||||
String* parseString();
|
||||
HexaString* parseHexaString();
|
||||
Stream* parseStream();
|
||||
Name* parseName(std::string& token);
|
||||
|
||||
void writeUpdate(const std::string& filename);
|
||||
|
||||
std::vector<Object*> _objects;
|
||||
Object trailer;
|
||||
off_t xrefOffset;
|
||||
int fd;
|
||||
off_t curOffset;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
72
include/uPDFParser_common.h
Normal file
72
include/uPDFParser_common.h
Normal file
@@ -0,0 +1,72 @@
|
||||
#ifndef _UPDFPARSER_COMMON_HPP_
|
||||
#define _UPDFPARSER_COMMON_HPP_
|
||||
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
|
||||
namespace uPDFParser
|
||||
{
|
||||
enum PARSING_ERROR {
|
||||
UNABLE_TO_OPEN_FILE = 1,
|
||||
TRUNCATED_FILE,
|
||||
INVALID_HEADER,
|
||||
INVALID_LINE,
|
||||
INVALID_FOOTER,
|
||||
INVALID_DICTIONARY,
|
||||
INVALID_NAME,
|
||||
INVALID_BOOLEAN,
|
||||
INVALID_NUMBER,
|
||||
INVALID_STREAM,
|
||||
INVALID_TOKEN,
|
||||
INVALID_OBJECT,
|
||||
INVALID_TRAILER,
|
||||
INVALID_HEXASTRING,
|
||||
NOT_IMPLEMENTED
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Exception class
|
||||
*/
|
||||
class Exception : public std::exception
|
||||
{
|
||||
public:
|
||||
|
||||
Exception(int code, const char* message, const char* file, int line):
|
||||
code(code), line(line), file(file)
|
||||
{
|
||||
std::stringstream msg;
|
||||
msg << "Exception code : 0x" << std::setbase(16) << code << std::endl;
|
||||
msg << "Message : " << message << std::endl;
|
||||
msg << "File : " << file << ":" << std::setbase(10) << line << std::endl;
|
||||
fullmessage = strdup(msg.str().c_str());
|
||||
}
|
||||
|
||||
Exception(const Exception& other)
|
||||
{
|
||||
this->code = other.code;
|
||||
this->line = line;
|
||||
this->file = file;
|
||||
this->fullmessage = strdup(other.fullmessage);
|
||||
}
|
||||
|
||||
~Exception()
|
||||
{
|
||||
free(fullmessage);
|
||||
}
|
||||
|
||||
const char * what () const throw () { return fullmessage; }
|
||||
|
||||
int getErrorCode() {return code;}
|
||||
|
||||
private:
|
||||
int code, line;
|
||||
const char* message, *file;
|
||||
char* fullmessage;
|
||||
};
|
||||
|
||||
#define EXCEPTION(code, message) \
|
||||
{std::stringstream __msg;__msg << message; throw uPDFParser::Exception(code, __msg.str().c_str(), __FILE__, __LINE__);}
|
||||
}
|
||||
#endif
|
253
include/uPDFTypes.h
Normal file
253
include/uPDFTypes.h
Normal file
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
Copyright 2021 Grégory Soutadé
|
||||
|
||||
This file is part of uPDFParser.
|
||||
|
||||
uPDFParser is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
uPDFParser is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _UPDFTYPES_HPP_
|
||||
#define _UPDFTYPES_HPP_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace uPDFParser
|
||||
{
|
||||
/**
|
||||
* @brief Base class for PDF object type
|
||||
* From https://resources.infosecinstitute.com/topic/pdf-file-format-basic-structure/
|
||||
*/
|
||||
class DataType
|
||||
{
|
||||
public:
|
||||
enum TYPE {BOOLEAN, INTEGER, REAL, NAME, STRING, HEXASTRING, REFERENCE, ARRAY, DICTIONARY, STREAM};
|
||||
|
||||
DataType(TYPE _type):
|
||||
_type(_type)
|
||||
{}
|
||||
|
||||
virtual ~DataType() {}
|
||||
|
||||
/**
|
||||
* @brief Get current data type
|
||||
*/
|
||||
TYPE type() { return _type; }
|
||||
|
||||
/**
|
||||
* @brief String representation for serialization
|
||||
*/
|
||||
virtual std::string str() = 0;
|
||||
|
||||
/**
|
||||
* @brief Clone current object
|
||||
*/
|
||||
virtual DataType* clone() = 0;
|
||||
|
||||
protected:
|
||||
TYPE _type;
|
||||
};
|
||||
|
||||
class Boolean : public DataType
|
||||
{
|
||||
public:
|
||||
Boolean(bool value):
|
||||
DataType(DataType::TYPE::BOOLEAN), _value(value)
|
||||
{}
|
||||
|
||||
virtual DataType* clone() {return new Boolean(_value);}
|
||||
bool value() {return _value;}
|
||||
virtual std::string str() { return (_value)?" true":" false";}
|
||||
|
||||
private:
|
||||
bool _value;
|
||||
};
|
||||
|
||||
class Integer : public DataType
|
||||
{
|
||||
public:
|
||||
Integer(int value, bool _signed=false):
|
||||
DataType(DataType::TYPE::INTEGER), _value(value), _signed(_signed)
|
||||
{}
|
||||
|
||||
virtual DataType* clone() {return new Integer(_value, _signed);}
|
||||
int value() {return _value;}
|
||||
virtual std::string str();
|
||||
|
||||
private:
|
||||
int _value;
|
||||
bool _signed;
|
||||
};
|
||||
|
||||
class Real : public DataType
|
||||
{
|
||||
public:
|
||||
Real(float value, bool _signed=false):
|
||||
DataType(DataType::TYPE::REAL), _value(value), _signed(_signed)
|
||||
{}
|
||||
|
||||
virtual DataType* clone() {return new Real(_value, _signed);}
|
||||
float value() {return _value;}
|
||||
virtual std::string str();
|
||||
|
||||
private:
|
||||
float _value;
|
||||
bool _signed;
|
||||
};
|
||||
|
||||
class Name : public DataType
|
||||
{
|
||||
public:
|
||||
Name(const std::string&);
|
||||
|
||||
virtual DataType* clone() {return new Name(_value);}
|
||||
std::string value() {
|
||||
const char* name = _value.c_str();
|
||||
return std::string(&name[1]);
|
||||
}
|
||||
virtual std::string str() { return _value;}
|
||||
|
||||
private:
|
||||
std::string _value;
|
||||
};
|
||||
|
||||
class String : public DataType
|
||||
{
|
||||
public:
|
||||
String(const std::string&);
|
||||
|
||||
virtual DataType* clone() {return new String(_value);}
|
||||
std::string value() {return _value;}
|
||||
|
||||
// Escape '(' and ')' characters
|
||||
virtual std::string str() {
|
||||
char prev = '\0';
|
||||
std::string res("(");
|
||||
|
||||
for(unsigned int i=0; i<_value.size(); i++)
|
||||
{
|
||||
if ((_value[i] == '(' || _value[i] == ')') &&
|
||||
prev != '\\')
|
||||
res += '\\';
|
||||
res += _value[i];
|
||||
prev = _value[i];
|
||||
}
|
||||
|
||||
res += ")";
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string _value;
|
||||
};
|
||||
|
||||
class HexaString : public DataType
|
||||
{
|
||||
public:
|
||||
HexaString(const std::string&);
|
||||
|
||||
virtual DataType* clone() {return new HexaString(_value);}
|
||||
std::string value() {return _value;}
|
||||
virtual std::string str() { return std::string("<") + _value + std::string(">");}
|
||||
|
||||
private:
|
||||
std::string _value;
|
||||
};
|
||||
|
||||
class Reference : public DataType
|
||||
{
|
||||
public:
|
||||
Reference(int objectId, int generationNumber):
|
||||
DataType(DataType::TYPE::REFERENCE), objectId(objectId), generationNumber(generationNumber)
|
||||
{}
|
||||
|
||||
virtual DataType* clone() {return new Reference(objectId, generationNumber);}
|
||||
int value() {return objectId;}
|
||||
virtual std::string str() {
|
||||
std::stringstream res;
|
||||
res << " " << objectId << " " << generationNumber << " R";
|
||||
return res.str();
|
||||
}
|
||||
|
||||
private:
|
||||
int objectId, generationNumber;
|
||||
};
|
||||
|
||||
class Array : public DataType
|
||||
{
|
||||
public:
|
||||
Array():
|
||||
DataType(DataType::TYPE::ARRAY)
|
||||
{}
|
||||
|
||||
void addData(DataType* data) {_value.push_back(data);}
|
||||
|
||||
virtual DataType* clone() {
|
||||
Array* res = new Array();
|
||||
std::vector<DataType*>::iterator it;
|
||||
for(it=_value.begin(); it!=_value.end(); it++)
|
||||
res->addData((*it)->clone());
|
||||
return res;
|
||||
}
|
||||
std::vector<DataType*>& value() {return _value;}
|
||||
virtual std::string str();
|
||||
|
||||
private:
|
||||
std::vector<DataType*> _value;
|
||||
};
|
||||
|
||||
class Dictionary : public DataType
|
||||
{
|
||||
public:
|
||||
Dictionary():
|
||||
DataType(DataType::TYPE::DICTIONARY)
|
||||
{}
|
||||
|
||||
void addData(const std::string&, DataType*);
|
||||
|
||||
virtual DataType* clone() {
|
||||
Dictionary* res = new Dictionary();
|
||||
std::map<std::string, DataType*>::iterator it;
|
||||
for(it=_value.begin(); it!=_value.end(); it++)
|
||||
{
|
||||
res->addData(it->first, it->second->clone());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
std::map<std::string, DataType*>& value() {return _value;}
|
||||
virtual std::string str();
|
||||
|
||||
private:
|
||||
std::map<std::string, DataType*> _value;
|
||||
};
|
||||
|
||||
class Stream : public DataType
|
||||
{
|
||||
public:
|
||||
Stream(int startOffset, int endOffset):
|
||||
DataType(DataType::TYPE::STREAM), startOffset(startOffset),
|
||||
endOffset(endOffset)
|
||||
{}
|
||||
virtual DataType* clone() {return new Stream(startOffset, endOffset);}
|
||||
virtual std::string str() { return "stream\nendstream\n";}
|
||||
|
||||
private:
|
||||
int startOffset, endOffset;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user