/*! * \file TextDetector.h * \date 2020/03/12 * * \author Lin, Chi * Contact: lin.chi@hzleaper.com * * * \note */ #ifndef __TextDetector_h_ #define __TextDetector_h_ #include "StdUtils.h" #include "CVUtils.h" #include "CyclopsEnums.h" #include "CyclopsModules.h" #include "DetectRoi.h" #include "TextInstance.h" #include "SampleDBManager.h" struct OCRParamPack; class OCRFontDB; class TextDetector : public ICyclopsModuleInstance { public: enum ErrorCode { NoError = 1, // negative values for error ErrNoFontDB = -9999, ErrNoCharDB, ErrModifyOOBFontDB, ErrFailAddToFontDB, ErrFailRemoveFromFontDB, ErrFailTrainFontDB, ErrFailTrainTooFewChar, ErrFailTrainTooFewSample, ErrFailTrainFailExtFeature, }; /*! \fn setPolarity * Define polarity of text, Polarity::Black2White means black text on white background, Polarity::White2Black means white text on black background, * default to Polarity::Either which means we'll decide polarity on the fly by fraction of foreground, see also getPolarity() * \fn getPolarity * Get value of polarity of text, see also setPolarity() */ DECLARE_PARAMETER2(Polarity, Polarity, Polarity::Black2White, Polarity::Either) /*! \fn setCharWidth * Define the character width range [val1, val2], default to [10, 100] * \fn getCharWidthStart * Get lower bound of character width * \fn getCharWidthEnd * Get upper bound of character width */ DECLARE_PARAMETER_PAIR(int, CharWidth) /*! \fn setCharHeight * Define the character height range [val1, val2], default to [10, 100] * \fn getCharHeightStart * Get lower bound of character height * \fn getCharHeightEnd * Get upper bound of character height */ DECLARE_PARAMETER_PAIR(int, CharHeight) /*! \fn setCharAspectRatio * Define the character aspect ratio(width/height) range [val1, val2] in percentage, default to [-1, -1], * which means aspect ratio is not considered during segmentation. * A valid setting is between 1 ~ 10000%(width = 1/100 height to width = 100 height). * \fn getCharAspectRatioStart * Get lower bound of character aspect ratio * \fn getCharAspectRatioEnd * Get upper bound of character aspect ratio */ DECLARE_PARAMETER_PAIR(int, CharAspectRatio) /*! \fn setCharStroke * Define the stroke width range of character [val1, val2], default to [-1, -1], * which means it will be automatically inspected during detection * \fn getCharStrokeStart * Get lower bound of character stroke width * \fn getCharStrokeEnd * Get upper bound of character stroke width */ DECLARE_PARAMETER_PAIR(int, CharStroke) /*! \fn setCharSpace * Define the range of internal space between two nearby characters [val1, val2], default to [-1, -1], * which means it will be automatically inspected during detection * \fn getCharSpaceStart * Get lower bound of internal space between two nearby characters * \fn getCharSpaceEnd * Get upper bound of internal space between two nearby characters */ DECLARE_PARAMETER_PAIR(int, CharSpace) /*! \fn setLineAngle * Define angle search range from the main line direction, default to [-5, 5], see also getLineAngle() * \fn getLineAngle * Get angle search range from the main line direction, see also setLineAngle() */ DECLARE_PARAMETER_PAIR(float, LineAngle) /*! \fn setLineSkew * Define maximum skew search angle, it's used to handle italic fonts, default to [0, 0], see also getLineSkew() * \fn getLineSkew * Get value of maximum skew search angle, see also setLineSkew() */ DECLARE_PARAMETER_PAIR(float, LineSkew) /*! \fn setNoiseThreshold * Define minimum area of valid character or fragment in pixel, it should be at least 1 pixel, default to 0 for auto detection, * see also getNoiseThreshold() * \fn getNoiseThreshold * Get value of minimun area of valid character or fragment in pixel, see also setNoiseThreshold() */ DECLARE_PARAMETER(int, NoiseThreshold) /*! \fn setFragmentThreshold * Define maximum area of object that could be treated as fragment, then it will be merged to neighbors, * default to 0 which means we'll decide it on the fly by distribution of character candidates' area, see also getFragmentThreshold() * \fn getFragmentThreshold * Get value of maximum area of object that could be treated as fragment, see also setFragmentThreshold() */ DECLARE_PARAMETER(int, FragmentThreshold) /*! \fn setBigCharThreshold * Define minimum area of object that could be treated as big char, then it may be split into several chars, * default to INT_MAX which means we will not judge big-char via candidate's area, see also getBigCharThreshold() * \fn getBigCharThreshold * Get value of minimum area of object that could be treated as big char, see also setBigCharThreshold() */ DECLARE_PARAMETER(int, BigCharThreshold) /*! \fn setDotPrint * Define whether this is dot-printed text, default to DotPrint::Auto, see also getDotPrint() * \fn getDotPrint * Get value of whether this is dot-printed text, see also setDotPrint() */ DECLARE_PARAMETER(DotPrint, DotPrint) /*! \fn setDotArea * Define average area of a dot, works for dot-printed case, default to 0 for auto-detection, see also getDotArea() * \fn getDotArea * Get value of average area of a dot, see also setDotArea() */ DECLARE_PARAMETER(float, DotArea) /*! \fn setDotGap * Define maximum distance between two nearby dots, default to 0 for auto-detection, see also getDotGap() * \fn getDotGap * Get value of maximum distance between two nearby dots, see also setDotGap() */ DECLARE_PARAMETER(float, DotGap) public: TextDetector() : mPolarity(Polarity::Either), mCharWidthStart(10), mCharWidthEnd(100), mCharHeightStart(10), mCharHeightEnd(100), mCharAspectRatioStart(-1), mCharAspectRatioEnd(-1), mCharStrokeStart(-1), mCharStrokeEnd(-1), mCharSpaceStart(-1), mCharSpaceEnd(-1), mLineAngleStart(-5), mLineAngleEnd(5), mLineSkewStart(0), mLineSkewEnd(0), mNoiseThreshold(0), mFragmentThreshold(0), mBigCharThreshold(INT_MAX), mDotPrint(DotPrint::Auto), mDotArea(0), mDotGap(0) { } virtual ~TextDetector() {} //! Smart pointer to hold an instance of TextDetector typedef std::shared_ptr Ptr; DECL_GET_INSTANCE(TextDetector::Ptr) /*! \fn serializeToMemory * Serialize information of the detector into a in-memory string, see also deserializeFromMemory() * @param str used to take the output serialization result * @return true for succeed, false for fail * \fn serializeToFile * Serialize information of the detector into a text file, see also deserializeFromFile() * @param filename file name (full path) where we will write the data * @return true for succeed, false for fail * \fn deserializeFromMemory * Deserialize the detector from in-memory string, see also serializeToMemory() * @param str in-memory string * @return true for succeed, false for fail * \fn deserializeFromFile * Deserialize the detector from a text file, see also serializeToFile() * @param filename file name (full path) where we will read the data * @return true for succeed, false for fail */ DECL_SERIALIZE_FUNCS /*! Switch to a new font database with provided name * we'll create a new font database if the one with specified name is not found * Out-of-box font databases: * "Document": printed Arial, Courier, or Times New Roman fonts * "Industrial": Arial, OCR-B and other sanserif fonts used for industrial marking * "OCR-A": OCR-A font * "OCR-B": OCR-B font * "SEMI": SEMI font * "MICR": MICR font * @param dbName name of font database * @param workDir working directory where the font database should be located, pass empty indicating "in-memory"\n * does nothing of oob font databases. * @param true if succeed */ virtual bool setFontDB(const string& dbName, const string& workDir = string()); /*! Get font database name for recognition (and segmentation) */ virtual string getFrontDB(); /*! Get all available font database names * @param oob whether include out-of-box font databases * @param custom whether include use-created font databases */ virtual vector getAllFontDBNames(bool oob = true, bool custom = true); /*! Get all characters in font database * @return all character values */ virtual vector getAllCharsInFontDB(); /*! Create a new font database */ virtual bool createFontDB(const string& dbName, const string& workDir); /*! Remove an existing font database */ virtual bool removeFontDB(const string& dbName); /*! Modify configuration of current font database */ virtual int setFontDBConfig(const OCRFontDBConfig& config); /*! Get configuration of current font database * @return return false if there's no font database */ virtual int getFontDBConfig(OCRFontDBConfig& config); /*! Get sample database of specified character, for browse/add/edit/remove * @param charVal character * @param dbmName output name of database, use it to get instance of SampleDBManager then */ virtual int getCharDB(const std::string& charVal, string& dbmName); /*! Add known characters to font database for training * @param lines known characters organized in lines, usually is the labeled result of segmentation * @return errCode error code */ virtual int addToCharDB(const TextInstance& textInst); /*! Remove character and its sample database from font database */ virtual int removeCharDB(const std::string& charVal); /*! Add character */ virtual int addCharDB(const std::string& charVal, string& dbmName); /*! Train recognition (and segmentation) model * @return errCode error code */ virtual int train(); /*! Whether it's trained */ virtual bool isTrained(); /*! Detect text with provided image, organized by line and characters. * @param img input image for detection * @param textInst recognition result * @param mask input mask for exclude some pixel from detection * @return total count of lines, 0 if we found nothing */ virtual int detect(const Mat& img, TextInstance& textInst, Mat* mask = nullptr); /** @overload */ virtual int detect(const Mat& img, DetectRoi& droi, TextInstance& textInst); /** @overload * @return the detected result, empty if we found nothing */ virtual std::string detect(const Mat& img, DetectRoi& droi); /*! Segment text (but do not recognize) with provided image, organized by line and characters. * @param img input image for detection * @param textInst segmentation result * @param mask input mask for exclude some pixel from detection * @return total count of lines, 0 if we found nothing */ virtual int segment(const Mat& img, TextInstance& textInst, Mat* mask = nullptr); /** @overload */ virtual int segment(const Mat& img, DetectRoi& droi, TextInstance& textInst); private: virtual bool serialize(FileStorage& fs); virtual bool deserialize(const FileNode& fs); void genOCRParamPack(OCRParamPack& pack); std::shared_ptr mFontDbPtr; }; #endif // TextDetector_h_