2021-08-21 18:22:58 +02:00
|
|
|
/*
|
|
|
|
Copyright 2021 Grégory Soutadé
|
|
|
|
|
|
|
|
This file is part of uPDFParser.
|
|
|
|
|
|
|
|
uPDFParser is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
uPDFParser is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with uPDFParser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _UPDFPARSER_HPP_
|
|
|
|
#define _UPDFPARSER_HPP_
|
|
|
|
|
|
|
|
#include <exception>
|
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include <sstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <iomanip>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "uPDFTypes.h"
|
|
|
|
#include "uPDFObject.h"
|
|
|
|
|
|
|
|
namespace uPDFParser
|
|
|
|
{
|
2021-12-18 17:29:41 +01:00
|
|
|
class XRefValue;
|
|
|
|
|
2021-08-21 18:22:58 +02:00
|
|
|
/**
|
|
|
|
* @brief PDF Parser
|
|
|
|
*/
|
|
|
|
class Parser
|
|
|
|
{
|
|
|
|
public:
|
2021-12-18 17:29:41 +01:00
|
|
|
Parser(int version_major=1, int version_minor=6):
|
2022-03-14 19:57:25 +01:00
|
|
|
version_major(version_major), version_minor(version_minor),
|
|
|
|
xrefObject(0), xrefOffset((off_t)-1), fd(0), curOffset(0)
|
2021-08-21 18:22:58 +02:00
|
|
|
{}
|
|
|
|
|
|
|
|
~Parser()
|
|
|
|
{
|
|
|
|
if (fd) close(fd);
|
|
|
|
|
|
|
|
std::vector<Object*>::iterator it;
|
|
|
|
for(it=_objects.begin(); it!=_objects.end(); it++)
|
|
|
|
delete *it;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Parse a file
|
|
|
|
*/
|
|
|
|
void parse(const std::string& filename);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Write a PDF file with internal objects
|
|
|
|
*
|
|
|
|
* @param filename File path
|
|
|
|
* @param update Only append new objects if true
|
|
|
|
* Write a new PDF file if false (not supported for now)
|
|
|
|
*/
|
|
|
|
void write(const std::string& filename, bool update=false);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Get internals (or parsed) objects
|
|
|
|
*/
|
|
|
|
std::vector<Object*>& objects() { return _objects; }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Add an object
|
|
|
|
*/
|
|
|
|
void addObject(Object* object) { _objects.push_back(object); }
|
2021-12-18 17:29:41 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Return trailer object
|
|
|
|
*/
|
|
|
|
Object& getTrailer() {return trailer; }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Return xref table. This table is read and updated only once after parse
|
|
|
|
* Further add/delete will make it incoherent
|
|
|
|
*/
|
|
|
|
const std::vector<XRefValue>& xrefTable() {return _xrefTable;}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Return a specific object
|
|
|
|
*/
|
|
|
|
Object* getObject(int objectId, int generationNumber=0);
|
2021-08-21 18:22:58 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
void parseObject(std::string& token);
|
2021-12-18 17:29:41 +01:00
|
|
|
void parseHeader();
|
2021-09-09 20:49:00 +02:00
|
|
|
void parseStartXref();
|
2021-09-28 14:35:45 +02:00
|
|
|
bool parseXref();
|
|
|
|
bool parseTrailer();
|
2021-08-21 18:22:58 +02:00
|
|
|
|
2021-09-28 14:35:45 +02:00
|
|
|
std::string nextToken(bool exceptionOnEOF=true, bool readComment=false);
|
2021-08-21 18:22:58 +02:00
|
|
|
|
|
|
|
DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict);
|
|
|
|
void parseDictionary(Object* object, std::map<std::string, DataType*>& dict);
|
|
|
|
DataType* parseSignedNumber(std::string& token);
|
|
|
|
DataType* parseNumber(std::string& token);
|
|
|
|
DataType* parseNumberOrReference(std::string& token);
|
|
|
|
Array* parseArray(Object* object);
|
|
|
|
String* parseString();
|
|
|
|
HexaString* parseHexaString();
|
2021-09-09 20:46:46 +02:00
|
|
|
Stream* parseStream(Object* object);
|
2021-08-21 18:22:58 +02:00
|
|
|
Name* parseName(std::string& token);
|
|
|
|
|
2022-03-14 19:57:25 +01:00
|
|
|
void repairTrailer();
|
2021-08-21 18:22:58 +02:00
|
|
|
void writeUpdate(const std::string& filename);
|
2021-12-18 17:29:41 +01:00
|
|
|
|
|
|
|
int version_major, version_minor;
|
2021-08-21 18:22:58 +02:00
|
|
|
std::vector<Object*> _objects;
|
2022-03-14 19:57:25 +01:00
|
|
|
Object trailer, *xrefObject;
|
2021-08-21 18:22:58 +02:00
|
|
|
off_t xrefOffset;
|
|
|
|
int fd;
|
|
|
|
off_t curOffset;
|
2021-12-18 17:29:41 +01:00
|
|
|
std::vector<XRefValue> _xrefTable;
|
|
|
|
};
|
|
|
|
|
|
|
|
class XRefValue
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
XRefValue(int objectId, int offset, int generationNumber, bool used, Object* object=0):
|
|
|
|
_objectId(objectId), _offset(offset), _generationNumber(generationNumber), _used(used),
|
|
|
|
_object(object)
|
|
|
|
{}
|
|
|
|
|
|
|
|
int objectId() {return _objectId;}
|
|
|
|
int offset() {return _offset;}
|
|
|
|
int generationNumber() {return _generationNumber;}
|
|
|
|
bool used() {return _used;}
|
|
|
|
|
|
|
|
void setObject(Object* object) { _object = object; }
|
|
|
|
Object* object() { return _object; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
int _objectId;
|
|
|
|
int _offset;
|
|
|
|
int _generationNumber;
|
|
|
|
bool _used;
|
|
|
|
Object* _object;
|
2021-08-21 18:22:58 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|