You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wheeldetect/3part/Cyclops/include/TextDetector.h

291 lines
11 KiB
C

/*!
* \file TextDetector.h
* \date 2020/03/12
*
* \author Lin, Chi
* Contact: lin.chi@hzleaper.com
*
*
* \note
*/
#ifndef __TextDetector_h_
#define __TextDetector_h_
#include "StdUtils.h"
#include "CVUtils.h"
#include "CyclopsEnums.h"
#include "CyclopsModules.h"
#include "DetectRoi.h"
#include "TextInstance.h"
#include "SampleDBManager.h"
struct OCRParamPack;
class OCRFontDB;
class TextDetector : public ICyclopsModuleInstance
{
public:
enum ErrorCode {
NoError = 1,
// negative values for error
ErrNoFontDB = -9999,
ErrNoCharDB,
ErrModifyOOBFontDB,
ErrFailAddToFontDB,
ErrFailRemoveFromFontDB,
ErrFailTrainFontDB,
ErrFailTrainTooFewChar,
ErrFailTrainTooFewSample,
ErrFailTrainFailExtFeature,
};
/*! \fn setPolarity
* Define polarity of text, Polarity::Black2White means black text on white background, Polarity::White2Black means white text on black background,
* default to Polarity::Either which means we'll decide polarity on the fly by fraction of foreground, see also getPolarity()
* \fn getPolarity
* Get value of polarity of text, see also setPolarity()
*/
DECLARE_PARAMETER2(Polarity, Polarity, Polarity::Black2White, Polarity::Either)
/*! \fn setCharWidth
* Define the character width range [val1, val2], default to [10, 100]
* \fn getCharWidthStart
* Get lower bound of character width
* \fn getCharWidthEnd
* Get upper bound of character width
*/
DECLARE_PARAMETER_PAIR(int, CharWidth)
/*! \fn setCharHeight
* Define the character height range [val1, val2], default to [10, 100]
* \fn getCharHeightStart
* Get lower bound of character height
* \fn getCharHeightEnd
* Get upper bound of character height
*/
DECLARE_PARAMETER_PAIR(int, CharHeight)
/*! \fn setCharAspectRatio
* Define the character aspect ratio(width/height) range [val1, val2] in percentage, default to [-1, -1],
* which means aspect ratio is not considered during segmentation.
* A valid setting is between 1 ~ 10000%(width = 1/100 height to width = 100 height).
* \fn getCharAspectRatioStart
* Get lower bound of character aspect ratio
* \fn getCharAspectRatioEnd
* Get upper bound of character aspect ratio
*/
DECLARE_PARAMETER_PAIR(int, CharAspectRatio)
/*! \fn setCharStroke
* Define the stroke width range of character [val1, val2], default to [-1, -1],
* which means it will be automatically inspected during detection
* \fn getCharStrokeStart
* Get lower bound of character stroke width
* \fn getCharStrokeEnd
* Get upper bound of character stroke width
*/
DECLARE_PARAMETER_PAIR(int, CharStroke)
/*! \fn setCharSpace
* Define the range of internal space between two nearby characters [val1, val2], default to [-1, -1],
* which means it will be automatically inspected during detection
* \fn getCharSpaceStart
* Get lower bound of internal space between two nearby characters
* \fn getCharSpaceEnd
* Get upper bound of internal space between two nearby characters
*/
DECLARE_PARAMETER_PAIR(int, CharSpace)
/*! \fn setLineAngle
* Define angle search range from the main line direction, default to [-5, 5], see also getLineAngle()
* \fn getLineAngle
* Get angle search range from the main line direction, see also setLineAngle()
*/
DECLARE_PARAMETER_PAIR(float, LineAngle)
/*! \fn setLineSkew
* Define maximum skew search angle, it's used to handle italic fonts, default to [0, 0], see also getLineSkew()
* \fn getLineSkew
* Get value of maximum skew search angle, see also setLineSkew()
*/
DECLARE_PARAMETER_PAIR(float, LineSkew)
/*! \fn setNoiseThreshold
* Define minimum area of valid character or fragment in pixel, it should be at least 1 pixel, default to 0 for auto detection,
* see also getNoiseThreshold()
* \fn getNoiseThreshold
* Get value of minimun area of valid character or fragment in pixel, see also setNoiseThreshold()
*/
DECLARE_PARAMETER(int, NoiseThreshold)
/*! \fn setFragmentThreshold
* Define maximum area of object that could be treated as fragment, then it will be merged to neighbors,
* default to 0 which means we'll decide it on the fly by distribution of character candidates' area, see also getFragmentThreshold()
* \fn getFragmentThreshold
* Get value of maximum area of object that could be treated as fragment, see also setFragmentThreshold()
*/
DECLARE_PARAMETER(int, FragmentThreshold)
/*! \fn setBigCharThreshold
* Define minimum area of object that could be treated as big char, then it may be split into several chars,
* default to INT_MAX which means we will not judge big-char via candidate's area, see also getBigCharThreshold()
* \fn getBigCharThreshold
* Get value of minimum area of object that could be treated as big char, see also setBigCharThreshold()
*/
DECLARE_PARAMETER(int, BigCharThreshold)
/*! \fn setDotPrint
* Define whether this is dot-printed text, default to DotPrint::Auto, see also getDotPrint()
* \fn getDotPrint
* Get value of whether this is dot-printed text, see also setDotPrint()
*/
DECLARE_PARAMETER(DotPrint, DotPrint)
/*! \fn setDotArea
* Define average area of a dot, works for dot-printed case, default to 0 for auto-detection, see also getDotArea()
* \fn getDotArea
* Get value of average area of a dot, see also setDotArea()
*/
DECLARE_PARAMETER(float, DotArea)
/*! \fn setDotGap
* Define maximum distance between two nearby dots, default to 0 for auto-detection, see also getDotGap()
* \fn getDotGap
* Get value of maximum distance between two nearby dots, see also setDotGap()
*/
DECLARE_PARAMETER(float, DotGap)
public:
TextDetector() : mPolarity(Polarity::Either),
mCharWidthStart(10), mCharWidthEnd(100), mCharHeightStart(10), mCharHeightEnd(100),
mCharAspectRatioStart(-1), mCharAspectRatioEnd(-1), mCharStrokeStart(-1), mCharStrokeEnd(-1),
mCharSpaceStart(-1), mCharSpaceEnd(-1),
mLineAngleStart(-5), mLineAngleEnd(5), mLineSkewStart(0), mLineSkewEnd(0),
mNoiseThreshold(0), mFragmentThreshold(0), mBigCharThreshold(INT_MAX),
mDotPrint(DotPrint::Auto), mDotArea(0), mDotGap(0)
{
}
virtual ~TextDetector() {}
//! Smart pointer to hold an instance of TextDetector
typedef std::shared_ptr<TextDetector> Ptr;
DECL_GET_INSTANCE(TextDetector::Ptr)
/*! \fn serializeToMemory
* Serialize information of the detector into a in-memory string, see also deserializeFromMemory()
* @param str used to take the output serialization result
* @return true for succeed, false for fail
* \fn serializeToFile
* Serialize information of the detector into a text file, see also deserializeFromFile()
* @param filename file name (full path) where we will write the data
* @return true for succeed, false for fail
* \fn deserializeFromMemory
* Deserialize the detector from in-memory string, see also serializeToMemory()
* @param str in-memory string
* @return true for succeed, false for fail
* \fn deserializeFromFile
* Deserialize the detector from a text file, see also serializeToFile()
* @param filename file name (full path) where we will read the data
* @return true for succeed, false for fail
*/
DECL_SERIALIZE_FUNCS
/*! Switch to a new font database with provided name
* we'll create a new font database if the one with specified name is not found
* Out-of-box font databases:
* "Document": printed Arial, Courier, or Times New Roman fonts
* "Industrial": Arial, OCR-B and other sanserif fonts used for industrial marking
* "OCR-A": OCR-A font
* "OCR-B": OCR-B font
* "SEMI": SEMI font
* "MICR": MICR font
* @param dbName name of font database
* @param workDir working directory where the font database should be located, pass empty indicating "in-memory"\n
* does nothing of oob font databases.
* @param true if succeed
*/
virtual bool setFontDB(const string& dbName, const string& workDir = string());
/*! Get font database name for recognition (and segmentation) */
virtual string getFrontDB();
/*! Get all available font database names
* @param oob whether include out-of-box font databases
* @param custom whether include use-created font databases
*/
virtual vector<string> getAllFontDBNames(bool oob = true, bool custom = true);
/*! Get all characters in font database
* @return all character values
*/
virtual vector<string> getAllCharsInFontDB();
/*! Create a new font database */
virtual bool createFontDB(const string& dbName, const string& workDir);
/*! Remove an existing font database */
virtual bool removeFontDB(const string& dbName);
/*! Modify configuration of current font database */
virtual int setFontDBConfig(const OCRFontDBConfig& config);
/*! Get configuration of current font database
* @return return false if there's no font database
*/
virtual int getFontDBConfig(OCRFontDBConfig& config);
/*! Get sample database of specified character, for browse/add/edit/remove
* @param charVal character
* @param dbmName output name of database, use it to get instance of SampleDBManager then
*/
virtual int getCharDB(const std::string& charVal, string& dbmName);
/*! Add known characters to font database for training
* @param lines known characters organized in lines, usually is the labeled result of segmentation
* @return errCode error code
*/
virtual int addToCharDB(const TextInstance& textInst);
/*! Remove character and its sample database from font database */
virtual int removeCharDB(const std::string& charVal);
/*! Add character */
virtual int addCharDB(const std::string& charVal, string& dbmName);
/*! Train recognition (and segmentation) model
* @return errCode error code
*/
virtual int train();
/*! Whether it's trained */
virtual bool isTrained();
/*! Detect text with provided image, organized by line and characters.
* @param img input image for detection
* @param textInst recognition result
* @param mask input mask for exclude some pixel from detection
* @return total count of lines, 0 if we found nothing
*/
virtual int detect(const Mat& img, TextInstance& textInst,
Mat* mask = nullptr);
/** @overload */
virtual int detect(const Mat& img, DetectRoi& droi, TextInstance& textInst);
/** @overload
* @return the detected result, empty if we found nothing
*/
virtual std::string detect(const Mat& img, DetectRoi& droi);
/*! Segment text (but do not recognize) with provided image, organized by line and characters.
* @param img input image for detection
* @param textInst segmentation result
* @param mask input mask for exclude some pixel from detection
* @return total count of lines, 0 if we found nothing
*/
virtual int segment(const Mat& img, TextInstance& textInst,
Mat* mask = nullptr);
/** @overload */
virtual int segment(const Mat& img, DetectRoi& droi, TextInstance& textInst);
private:
virtual bool serialize(FileStorage& fs);
virtual bool deserialize(const FileNode& fs);
void genOCRParamPack(OCRParamPack& pack);
std::shared_ptr<OCRFontDB> mFontDbPtr;
};
#endif // TextDetector_h_