23#include "HOCRTextBox.h"
24#include "resolution.h"
75 HOCRDocument(
const QImage &image, QStringList languages=QStringList()) {
read(image,languages);};
88 bool hasError()
const {
return !_error.isEmpty();};
96 QString
error()
const {
return _error; };
111 QSet<QString>
warnings()
const {
return _warnings;};
119 QSet<QString>
system()
const {
return _OCRSystem;};
136 QList<HOCRTextBox>
pages()
const {
return _pages;};
142 bool isEmpty()
const {
return _pages.isEmpty();};
163 if (_pages.size() > 0)
164 return _pages.takeFirst();
191 void read(
const QString& fileName);
210 void read(
const QImage &image,
const QStringList& languages=QStringList());
248 QString
toPDF(
const QString& fileName,
resolution _resolution,
const QString& title=QString(),
const QPageSize& overridePageSize=QPageSize(), QFont *overrideFont=0)
const;
264 QList<QImage>
toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8)
const;
308 QPageSize findPageSize(
int pageNumber,
resolution _resolution,
const QPageSize &overridePageSize)
const;
315 QSet<QString> _OCRSystem;
319 QSet<QString> _OCRCapabilities;
322 QList<HOCRTextBox> _pages;
325 QSet<QString> _warnings;
HOCRDocument(QString fileName)
Constructs an HOCR document from a file.
HOCRDocument(QIODevice *device)
Constructs an HOCR document from a QIODevice.
QString toText() const
Export this document as text.
static bool areLanguagesSupportedByTesseract(const QStringList &lingos)
Check if languages are supported by tesseract.
void read(const QImage &image, const QStringList &languages=QStringList())
Generates an HOCR document by running the tesseract OCR engine.
void read(QIODevice *device)
Reads an HOCR document from a QIODevice.
QList< HOCRTextBox > pages() const
Pages in the document.
bool isEmpty() const
Returns true if the document contains no pages.
QString error() const
Error message.
QSet< QString > capabilities() const
OCR capabilites.
HOCRDocument()
Constructs an empty HOCR document.
HOCRTextBox takeFirstPage()
Removes the first page of the document and returns it.
void read(const QString &fileName)
Reads an HOCR document from a file.
QFont suggestFont() const
Suggest font.
QSet< QString > warnings() const
Warning messages.
QList< QImage > toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8) const
Export to images.
bool hasText() const
Check if the document does contain text.
HOCRDocument(const QImage &image, QStringList languages=QStringList())
Constructs an HOCR document by running the tesseract OCR engine.
void append(const HOCRDocument &other)
Appends other HOCRDocument.
static QStringList tesseractLanguages()
List of languages supported by tesseract.
bool hasWarnings() const
Warning status.
bool hasError() const
Error status.
QString toPDF(const QString &fileName, resolution _resolution, const QString &title=QString(), const QPageSize &overridePageSize=QPageSize(), QFont *overrideFont=0) const
Export to PDF.
void clear()
Resets the document.
QSet< QString > system() const
System(s) that generated this file.
Text box, as defined in an HOCR file.
The resolution class stores a resolution and converts between units.