You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
291 lines
11 KiB
C
291 lines
11 KiB
C
|
5 years ago
|
/*!
|
||
|
|
* \file TextDetector.h
|
||
|
|
* \date 2020/03/12
|
||
|
|
*
|
||
|
|
* \author Lin, Chi
|
||
|
|
* Contact: lin.chi@hzleaper.com
|
||
|
|
*
|
||
|
|
*
|
||
|
|
* \note
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifndef __TextDetector_h_
|
||
|
|
#define __TextDetector_h_
|
||
|
|
|
||
|
|
#include "StdUtils.h"
|
||
|
|
#include "CVUtils.h"
|
||
|
|
#include "CyclopsEnums.h"
|
||
|
|
#include "CyclopsModules.h"
|
||
|
|
#include "DetectRoi.h"
|
||
|
|
#include "TextInstance.h"
|
||
|
|
#include "SampleDBManager.h"
|
||
|
|
|
||
|
|
struct OCRParamPack;
|
||
|
|
class OCRFontDB;
|
||
|
|
|
||
|
|
class TextDetector : public ICyclopsModuleInstance
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
enum ErrorCode {
|
||
|
|
NoError = 1,
|
||
|
|
// negative values for error
|
||
|
|
ErrNoFontDB = -9999,
|
||
|
|
ErrNoCharDB,
|
||
|
|
ErrModifyOOBFontDB,
|
||
|
|
ErrFailAddToFontDB,
|
||
|
|
ErrFailRemoveFromFontDB,
|
||
|
|
ErrFailTrainFontDB,
|
||
|
|
ErrFailTrainTooFewChar,
|
||
|
|
ErrFailTrainTooFewSample,
|
||
|
|
ErrFailTrainFailExtFeature,
|
||
|
|
};
|
||
|
|
|
||
|
|
/*! \fn setPolarity
|
||
|
|
* Define polarity of text, Polarity::Black2White means black text on white background, Polarity::White2Black means white text on black background,
|
||
|
|
* default to Polarity::Either which means we'll decide polarity on the fly by fraction of foreground, see also getPolarity()
|
||
|
|
* \fn getPolarity
|
||
|
|
* Get value of polarity of text, see also setPolarity()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER2(Polarity, Polarity, Polarity::Black2White, Polarity::Either)
|
||
|
|
/*! \fn setCharWidth
|
||
|
|
* Define the character width range [val1, val2], default to [10, 100]
|
||
|
|
* \fn getCharWidthStart
|
||
|
|
* Get lower bound of character width
|
||
|
|
* \fn getCharWidthEnd
|
||
|
|
* Get upper bound of character width
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(int, CharWidth)
|
||
|
|
/*! \fn setCharHeight
|
||
|
|
* Define the character height range [val1, val2], default to [10, 100]
|
||
|
|
* \fn getCharHeightStart
|
||
|
|
* Get lower bound of character height
|
||
|
|
* \fn getCharHeightEnd
|
||
|
|
* Get upper bound of character height
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(int, CharHeight)
|
||
|
|
/*! \fn setCharAspectRatio
|
||
|
|
* Define the character aspect ratio(width/height) range [val1, val2] in percentage, default to [-1, -1],
|
||
|
|
* which means aspect ratio is not considered during segmentation.
|
||
|
|
* A valid setting is between 1 ~ 10000%(width = 1/100 height to width = 100 height).
|
||
|
|
* \fn getCharAspectRatioStart
|
||
|
|
* Get lower bound of character aspect ratio
|
||
|
|
* \fn getCharAspectRatioEnd
|
||
|
|
* Get upper bound of character aspect ratio
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(int, CharAspectRatio)
|
||
|
|
/*! \fn setCharStroke
|
||
|
|
* Define the stroke width range of character [val1, val2], default to [-1, -1],
|
||
|
|
* which means it will be automatically inspected during detection
|
||
|
|
* \fn getCharStrokeStart
|
||
|
|
* Get lower bound of character stroke width
|
||
|
|
* \fn getCharStrokeEnd
|
||
|
|
* Get upper bound of character stroke width
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(int, CharStroke)
|
||
|
|
/*! \fn setCharSpace
|
||
|
|
* Define the range of internal space between two nearby characters [val1, val2], default to [-1, -1],
|
||
|
|
* which means it will be automatically inspected during detection
|
||
|
|
* \fn getCharSpaceStart
|
||
|
|
* Get lower bound of internal space between two nearby characters
|
||
|
|
* \fn getCharSpaceEnd
|
||
|
|
* Get upper bound of internal space between two nearby characters
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(int, CharSpace)
|
||
|
|
/*! \fn setLineAngle
|
||
|
|
* Define angle search range from the main line direction, default to [-5, 5], see also getLineAngle()
|
||
|
|
* \fn getLineAngle
|
||
|
|
* Get angle search range from the main line direction, see also setLineAngle()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(float, LineAngle)
|
||
|
|
/*! \fn setLineSkew
|
||
|
|
* Define maximum skew search angle, it's used to handle italic fonts, default to [0, 0], see also getLineSkew()
|
||
|
|
* \fn getLineSkew
|
||
|
|
* Get value of maximum skew search angle, see also setLineSkew()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER_PAIR(float, LineSkew)
|
||
|
|
/*! \fn setNoiseThreshold
|
||
|
|
* Define minimum area of valid character or fragment in pixel, it should be at least 1 pixel, default to 0 for auto detection,
|
||
|
|
* see also getNoiseThreshold()
|
||
|
|
* \fn getNoiseThreshold
|
||
|
|
* Get value of minimun area of valid character or fragment in pixel, see also setNoiseThreshold()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(int, NoiseThreshold)
|
||
|
|
/*! \fn setFragmentThreshold
|
||
|
|
* Define maximum area of object that could be treated as fragment, then it will be merged to neighbors,
|
||
|
|
* default to 0 which means we'll decide it on the fly by distribution of character candidates' area, see also getFragmentThreshold()
|
||
|
|
* \fn getFragmentThreshold
|
||
|
|
* Get value of maximum area of object that could be treated as fragment, see also setFragmentThreshold()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(int, FragmentThreshold)
|
||
|
|
/*! \fn setBigCharThreshold
|
||
|
|
* Define minimum area of object that could be treated as big char, then it may be split into several chars,
|
||
|
|
* default to INT_MAX which means we will not judge big-char via candidate's area, see also getBigCharThreshold()
|
||
|
|
* \fn getBigCharThreshold
|
||
|
|
* Get value of minimum area of object that could be treated as big char, see also setBigCharThreshold()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(int, BigCharThreshold)
|
||
|
|
/*! \fn setDotPrint
|
||
|
|
* Define whether this is dot-printed text, default to DotPrint::Auto, see also getDotPrint()
|
||
|
|
* \fn getDotPrint
|
||
|
|
* Get value of whether this is dot-printed text, see also setDotPrint()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(DotPrint, DotPrint)
|
||
|
|
/*! \fn setDotArea
|
||
|
|
* Define average area of a dot, works for dot-printed case, default to 0 for auto-detection, see also getDotArea()
|
||
|
|
* \fn getDotArea
|
||
|
|
* Get value of average area of a dot, see also setDotArea()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(float, DotArea)
|
||
|
|
/*! \fn setDotGap
|
||
|
|
* Define maximum distance between two nearby dots, default to 0 for auto-detection, see also getDotGap()
|
||
|
|
* \fn getDotGap
|
||
|
|
* Get value of maximum distance between two nearby dots, see also setDotGap()
|
||
|
|
*/
|
||
|
|
DECLARE_PARAMETER(float, DotGap)
|
||
|
|
|
||
|
|
public:
|
||
|
|
TextDetector() : mPolarity(Polarity::Either),
|
||
|
|
mCharWidthStart(10), mCharWidthEnd(100), mCharHeightStart(10), mCharHeightEnd(100),
|
||
|
|
mCharAspectRatioStart(-1), mCharAspectRatioEnd(-1), mCharStrokeStart(-1), mCharStrokeEnd(-1),
|
||
|
|
mCharSpaceStart(-1), mCharSpaceEnd(-1),
|
||
|
|
mLineAngleStart(-5), mLineAngleEnd(5), mLineSkewStart(0), mLineSkewEnd(0),
|
||
|
|
mNoiseThreshold(0), mFragmentThreshold(0), mBigCharThreshold(INT_MAX),
|
||
|
|
mDotPrint(DotPrint::Auto), mDotArea(0), mDotGap(0)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
virtual ~TextDetector() {}
|
||
|
|
|
||
|
|
//! Smart pointer to hold an instance of TextDetector
|
||
|
|
typedef std::shared_ptr<TextDetector> Ptr;
|
||
|
|
DECL_GET_INSTANCE(TextDetector::Ptr)
|
||
|
|
|
||
|
|
/*! \fn serializeToMemory
|
||
|
|
* Serialize information of the detector into a in-memory string, see also deserializeFromMemory()
|
||
|
|
* @param str used to take the output serialization result
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn serializeToFile
|
||
|
|
* Serialize information of the detector into a text file, see also deserializeFromFile()
|
||
|
|
* @param filename file name (full path) where we will write the data
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn deserializeFromMemory
|
||
|
|
* Deserialize the detector from in-memory string, see also serializeToMemory()
|
||
|
|
* @param str in-memory string
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn deserializeFromFile
|
||
|
|
* Deserialize the detector from a text file, see also serializeToFile()
|
||
|
|
* @param filename file name (full path) where we will read the data
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
*/
|
||
|
|
DECL_SERIALIZE_FUNCS
|
||
|
|
|
||
|
|
/*! Switch to a new font database with provided name
|
||
|
|
* we'll create a new font database if the one with specified name is not found
|
||
|
|
* Out-of-box font databases:
|
||
|
|
* "Document": printed Arial, Courier, or Times New Roman fonts
|
||
|
|
* "Industrial": Arial, OCR-B and other sanserif fonts used for industrial marking
|
||
|
|
* "OCR-A": OCR-A font
|
||
|
|
* "OCR-B": OCR-B font
|
||
|
|
* "SEMI": SEMI font
|
||
|
|
* "MICR": MICR font
|
||
|
|
* @param dbName name of font database
|
||
|
|
* @param workDir working directory where the font database should be located, pass empty indicating "in-memory"\n
|
||
|
|
* does nothing of oob font databases.
|
||
|
|
* @param true if succeed
|
||
|
|
*/
|
||
|
|
virtual bool setFontDB(const string& dbName, const string& workDir = string());
|
||
|
|
|
||
|
|
/*! Get font database name for recognition (and segmentation) */
|
||
|
|
virtual string getFrontDB();
|
||
|
|
|
||
|
|
/*! Get all available font database names
|
||
|
|
* @param oob whether include out-of-box font databases
|
||
|
|
* @param custom whether include use-created font databases
|
||
|
|
*/
|
||
|
|
virtual vector<string> getAllFontDBNames(bool oob = true, bool custom = true);
|
||
|
|
|
||
|
|
/*! Get all characters in font database
|
||
|
|
* @return all character values
|
||
|
|
*/
|
||
|
|
virtual vector<string> getAllCharsInFontDB();
|
||
|
|
|
||
|
|
/*! Create a new font database */
|
||
|
|
virtual bool createFontDB(const string& dbName, const string& workDir);
|
||
|
|
|
||
|
|
/*! Remove an existing font database */
|
||
|
|
virtual bool removeFontDB(const string& dbName);
|
||
|
|
|
||
|
|
/*! Modify configuration of current font database */
|
||
|
|
virtual int setFontDBConfig(const OCRFontDBConfig& config);
|
||
|
|
|
||
|
|
/*! Get configuration of current font database
|
||
|
|
* @return return false if there's no font database
|
||
|
|
*/
|
||
|
|
virtual int getFontDBConfig(OCRFontDBConfig& config);
|
||
|
|
|
||
|
|
/*! Get sample database of specified character, for browse/add/edit/remove
|
||
|
|
* @param charVal character
|
||
|
|
* @param dbmName output name of database, use it to get instance of SampleDBManager then
|
||
|
|
*/
|
||
|
|
virtual int getCharDB(const std::string& charVal, string& dbmName);
|
||
|
|
|
||
|
|
/*! Add known characters to font database for training
|
||
|
|
* @param lines known characters organized in lines, usually is the labeled result of segmentation
|
||
|
|
* @return errCode error code
|
||
|
|
*/
|
||
|
|
virtual int addToCharDB(const TextInstance& textInst);
|
||
|
|
|
||
|
|
/*! Remove character and its sample database from font database */
|
||
|
|
virtual int removeCharDB(const std::string& charVal);
|
||
|
|
|
||
|
|
/*! Add character */
|
||
|
|
virtual int addCharDB(const std::string& charVal, string& dbmName);
|
||
|
|
|
||
|
|
/*! Train recognition (and segmentation) model
|
||
|
|
* @return errCode error code
|
||
|
|
*/
|
||
|
|
virtual int train();
|
||
|
|
|
||
|
|
/*! Whether it's trained */
|
||
|
|
virtual bool isTrained();
|
||
|
|
|
||
|
|
/*! Detect text with provided image, organized by line and characters.
|
||
|
|
* @param img input image for detection
|
||
|
|
* @param textInst recognition result
|
||
|
|
* @param mask input mask for exclude some pixel from detection
|
||
|
|
* @return total count of lines, 0 if we found nothing
|
||
|
|
*/
|
||
|
|
virtual int detect(const Mat& img, TextInstance& textInst,
|
||
|
|
Mat* mask = nullptr);
|
||
|
|
|
||
|
|
/** @overload */
|
||
|
|
virtual int detect(const Mat& img, DetectRoi& droi, TextInstance& textInst);
|
||
|
|
|
||
|
|
/** @overload
|
||
|
|
* @return the detected result, empty if we found nothing
|
||
|
|
*/
|
||
|
|
virtual std::string detect(const Mat& img, DetectRoi& droi);
|
||
|
|
|
||
|
|
/*! Segment text (but do not recognize) with provided image, organized by line and characters.
|
||
|
|
* @param img input image for detection
|
||
|
|
* @param textInst segmentation result
|
||
|
|
* @param mask input mask for exclude some pixel from detection
|
||
|
|
* @return total count of lines, 0 if we found nothing
|
||
|
|
*/
|
||
|
|
virtual int segment(const Mat& img, TextInstance& textInst,
|
||
|
|
Mat* mask = nullptr);
|
||
|
|
|
||
|
|
/** @overload */
|
||
|
|
virtual int segment(const Mat& img, DetectRoi& droi, TextInstance& textInst);
|
||
|
|
|
||
|
|
private:
|
||
|
|
virtual bool serialize(FileStorage& fs);
|
||
|
|
virtual bool deserialize(const FileNode& fs);
|
||
|
|
|
||
|
|
void genOCRParamPack(OCRParamPack& pack);
|
||
|
|
|
||
|
|
std::shared_ptr<OCRFontDB> mFontDbPtr;
|
||
|
|
};
|
||
|
|
|
||
|
|
#endif // TextDetector_h_
|
||
|
|
|