You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
329 lines
13 KiB
C
329 lines
13 KiB
C
|
4 years ago
|
/*!
|
||
|
|
* \file LLClassifier.h
|
||
|
|
* \date 2019/10/18
|
||
|
|
*
|
||
|
|
* \author Lin, Chi
|
||
|
|
* Contact: lin.chi@hzleaper.com
|
||
|
|
*
|
||
|
|
*
|
||
|
|
* \note
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifndef __LLClassifier_h_
|
||
|
|
#define __LLClassifier_h_
|
||
|
|
|
||
|
|
#include "CyclopsCommon.h"
|
||
|
|
#include "StdUtils.h"
|
||
|
|
#include "DetectRoi.h"
|
||
|
|
#include "CyclopsModules.h"
|
||
|
|
#include "SampleDBManager.h"
|
||
|
|
#include "FeatureEvaluator.h"
|
||
|
|
#include "CyclopsMLModel.h"
|
||
|
|
#include "TestResult.h"
|
||
|
|
#include "CyclopsGrid.h"
|
||
|
|
#include "TrainResult.h"
|
||
|
|
|
||
|
|
/*! \brief Light-weighted Learning-based Classifier
|
||
|
|
* Uniformed API for machine learning (classification), support auto-tune and various testing technology
|
||
|
|
*
|
||
|
|
* Example:
|
||
|
|
* \code{.cpp}
|
||
|
|
* LLClassifier::Ptr llcPtr = LLClassifier::getInstance("new_llc");
|
||
|
|
* llcPtr->updateClass(0, "class_label_1", "folder_of_sample_1"); // add class 1 and its sample database
|
||
|
|
* llcPtr->updateClass(1, "class_label_2", "folder_of_sample_2"); // add class 2 and its sample database
|
||
|
|
* FeatureEvaluator::Ptr fePtr = FeatureEvaluator::getInstance("new_llc"); // create a new feature evaluator
|
||
|
|
* llcPtr->setFeatureEvaluator(fePtr);
|
||
|
|
* CyclopsFeature::Ptr hogPtr = fePtr->add(FeatureType::HOG); // add HOG feature
|
||
|
|
* CyclopsMLModel::Ptr svmPtr = llcPtr->add(MLModel::SVM); // add SVM model
|
||
|
|
* bool ret = llcPtr->train(); // train
|
||
|
|
* if (ret) {
|
||
|
|
* std::string classLabel;
|
||
|
|
* float confidence;
|
||
|
|
* llcPtr->predict(newImg, classLabel, confidence); // prediction
|
||
|
|
* }
|
||
|
|
* \endcode
|
||
|
|
*
|
||
|
|
* Auto-tune example:
|
||
|
|
* \code{.cpp}
|
||
|
|
* std::vector<CyclopsGrids> mgrids; // create tuning grid
|
||
|
|
* CyclopsGrids svmGrids(svmPtr->getName());
|
||
|
|
* svmGrids.addGrid<double>(SVCModel::Coef, 0.1, 0.4, 2, true); // 3 steps
|
||
|
|
* mgrids.push_back(std::move(svmGrids));
|
||
|
|
* TrainResults::Ptr trPtr = mllcPtr->autoTune(LLClassifier::ModelOnly, mgrids); // or use startAutoTune()
|
||
|
|
* int svmId = trPtr->getIdByName(svmPtr->getName()); // get index of svm model in test result
|
||
|
|
* std::list<int> bestList = trPtr->getBest(svmId);
|
||
|
|
* TrainResult res = trPtr->get(bestList.front()); // get best tuning result
|
||
|
|
* double acc = res.testAccuracy;
|
||
|
|
* \endcode
|
||
|
|
*
|
||
|
|
* Test example:
|
||
|
|
* \code{.cpp}
|
||
|
|
* TestResults::Ptr trPtr = mllcPtr->test(SamplingMode::CrossValidation, 3, true, 5); // or use startTest()
|
||
|
|
* int svmId = trPtr->getIdByName(svmPtr->getName()); // get index of svm model in test result
|
||
|
|
* double acc = trPtr->getAccuracy(svmId);
|
||
|
|
* \endcode
|
||
|
|
* see LLCTest for unit test
|
||
|
|
*/
|
||
|
|
class LLClassifier : public ICyclopsModuleInstance
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
enum ErrorCode {
|
||
|
|
NoError = 1,
|
||
|
|
// negative values for error
|
||
|
|
ErrLoadSample = -9999,
|
||
|
|
ErrTooFewClass, // at least two class
|
||
|
|
ErrNoModel,
|
||
|
|
ErrNoFeature,
|
||
|
|
ErrDirtyModel,
|
||
|
|
ErrNotTrained,
|
||
|
|
ErrTooFewSample,
|
||
|
|
ErrStopByUser,
|
||
|
|
ErrBusy,
|
||
|
|
ErrFailExtFeature,
|
||
|
|
ErrUnexpect,
|
||
|
|
};
|
||
|
|
|
||
|
|
static const int cMinTrainCount;
|
||
|
|
static const int cMinTestCount;
|
||
|
|
static const int cMinSampleSize;
|
||
|
|
static const int cMaxSampleSize;
|
||
|
|
|
||
|
|
LLClassifier() : mPrimaryModel(0)
|
||
|
|
{}
|
||
|
|
virtual ~LLClassifier() {}
|
||
|
|
|
||
|
|
typedef std::shared_ptr<LLClassifier> Ptr;
|
||
|
|
DECL_GET_INSTANCE(LLClassifier::Ptr)
|
||
|
|
|
||
|
|
/*! \fn serializeToMemory
|
||
|
|
* Serialize information of the classifier into a in-memory string, see also deserializeFromMemory()
|
||
|
|
* @param str used to take the output serialization result
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn serializeToFile
|
||
|
|
* Serialize information of the classifier into a text file, see also deserializeFromFile()
|
||
|
|
* @param filename file name (full path) where we will write the data
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn deserializeFromMemory
|
||
|
|
* Deserialize the classifier from in-memory string, see also serializeToMemory()
|
||
|
|
* @param str in-memory string
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
* \fn deserializeFromFile
|
||
|
|
* Deserialize the classifier from a text file, see also serializeToFile()
|
||
|
|
* @param filename file name (full path) where we will read the data
|
||
|
|
* @return true for succeed, false for fail
|
||
|
|
*/
|
||
|
|
DECL_SERIALIZE_FUNCS
|
||
|
|
|
||
|
|
/*! Total count of classes */
|
||
|
|
int countClass() const { return mClassLabels.size(); }
|
||
|
|
/*! Add a new class of given label and its sample database location */
|
||
|
|
virtual bool addClass(const std::string& label, const std::string& rootPath = std::string());
|
||
|
|
/*! Update an existing class with new label and new sample database location */
|
||
|
|
virtual bool updateClass(int index, const std::string& label, const std::string& rootPath = std::string());
|
||
|
|
/*! Remove an existing class
|
||
|
|
* @param removePermanently true if you want to remove the sample database permanently from disk
|
||
|
|
*/
|
||
|
|
virtual bool removeClass(const std::string& label, bool removePermanently);
|
||
|
|
/*! Remove all classes */
|
||
|
|
virtual bool removeAllClass(bool removePermanently);
|
||
|
|
/*! Get index of class by its label */
|
||
|
|
virtual int indexOfClass(const std::string& label);
|
||
|
|
/*! Get sample database manager by class label */
|
||
|
|
virtual SampleDBManager::Ptr getSampleDB(const std::string& label);
|
||
|
|
/*! Get class label by index */
|
||
|
|
std::string getClass(int index) const {
|
||
|
|
if (index < 0 || index >= mClassLabels.size()) return "";
|
||
|
|
return mClassLabels[index];
|
||
|
|
}
|
||
|
|
|
||
|
|
/*! Get uniformed sample size */
|
||
|
|
virtual Size getSampleSize();
|
||
|
|
|
||
|
|
/*! Set feature evaluator */
|
||
|
|
void setFeatureEvaluator(FeatureEvaluator::Ptr pFE) { mFE = pFE; }
|
||
|
|
/*! Get feature evaluator for feature modification */
|
||
|
|
FeatureEvaluator::Ptr getFeatureEvaluator() const { return mFE; }
|
||
|
|
|
||
|
|
/*! Add a new model of given type */
|
||
|
|
virtual CyclopsMLModel::Ptr add(MLModel ftype);
|
||
|
|
/*! Total count of models added */
|
||
|
|
int size() const { return mMLModels.size(); }
|
||
|
|
/*! Get model by index */
|
||
|
|
CyclopsMLModel::Ptr get(int index) {
|
||
|
|
if (index < 0 || index >= size()) return nullptr;
|
||
|
|
return mMLModels[index];
|
||
|
|
}
|
||
|
|
/*! Remove model by index */
|
||
|
|
virtual bool remove(int index);
|
||
|
|
/*! Remove all models */
|
||
|
|
virtual bool removeAll();
|
||
|
|
/*! Get model index by name */
|
||
|
|
virtual int indexByName(const std::string& name);
|
||
|
|
|
||
|
|
/*! Modify primary model which will be used for prediction */
|
||
|
|
void setPrimaryModel(int idx) {
|
||
|
|
if (idx >= 0 && idx < size())
|
||
|
|
mPrimaryModel = idx;
|
||
|
|
}
|
||
|
|
/*! Get index of primary model */
|
||
|
|
int getPrimaryModel() const {
|
||
|
|
int s = size();
|
||
|
|
if (mPrimaryModel >= 0 && mPrimaryModel < s) return mPrimaryModel;
|
||
|
|
else if (s > 0) return 0;
|
||
|
|
else return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*! Train all models */
|
||
|
|
virtual int train();
|
||
|
|
|
||
|
|
/*! Whether the primary model trained */
|
||
|
|
virtual bool isTrained();
|
||
|
|
|
||
|
|
/*! Prediction of given image */
|
||
|
|
virtual int predict(const Mat& img,
|
||
|
|
std::string& classLabel,
|
||
|
|
float& confidence,
|
||
|
|
SampleInstance::UserDataMap* pUserData = nullptr);
|
||
|
|
|
||
|
|
/** @overload */
|
||
|
|
virtual int predict(const Mat& img, DetectRoi& droi,
|
||
|
|
std::string& classLabel,
|
||
|
|
float& confidence,
|
||
|
|
SampleInstance::UserDataMap* pUserData = nullptr);
|
||
|
|
|
||
|
|
/*! Test and accuracy estimation (synchronous)
|
||
|
|
* @param mode Sampling mode, how test cases are built
|
||
|
|
* @param repeat for random test, how many times split-train-test is performed
|
||
|
|
* @param stratified whether same proportion is guaranteed for each category
|
||
|
|
* @param folds for cross-validation sampling, dataset is split into $folds group,
|
||
|
|
* each time use one group as test dataset and remaining as train dataset
|
||
|
|
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
|
||
|
|
* @return test result
|
||
|
|
*/
|
||
|
|
virtual TestResults::Ptr test(SamplingMode mode, int repeat = 10, bool stratified = true,
|
||
|
|
int folds = 5, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Test and accuracy estimation (asynchronous) */
|
||
|
|
virtual TestResults::Ptr startTest(SamplingMode mode, int repeat = 10, bool stratified = true,
|
||
|
|
int folds = 5, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Auto-Tune feature and models parameters (synchronous)
|
||
|
|
* @param mode auto-tune mode, currently only FeatureOnly and ModelOnly is supported
|
||
|
|
* @param grids parameter searching grid for features or models
|
||
|
|
* @param randomSampling true for use randomly selected samples as training dataset and remaining as test,
|
||
|
|
otherwise, use default training dataset
|
||
|
|
* @param stratified whether same proportion is guaranteed for each category
|
||
|
|
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
|
||
|
|
* @return train result
|
||
|
|
*/
|
||
|
|
enum AutoTuneMode {
|
||
|
|
FeatureOnly,
|
||
|
|
ModelOnly,
|
||
|
|
FeatureAndModel
|
||
|
|
};
|
||
|
|
virtual TrainResults::Ptr autoTune(AutoTuneMode mode, const std::vector<CyclopsGrids>& grids,
|
||
|
|
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Auto-Tune feature and models parameters (asynchronous) */
|
||
|
|
virtual TrainResults::Ptr startAutoTune(AutoTuneMode mode, const std::vector<CyclopsGrids>& grids,
|
||
|
|
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Auto-select features of better ability to distinguish samples
|
||
|
|
* @param mode auto-select mode, see AutoSelectMode.
|
||
|
|
* @param topN choose top-n features as selection, the maximum number of features in selection
|
||
|
|
0 or negative value means we'll decide the number on-the-fly
|
||
|
|
* @param minAcc minimum classification accuracy or evaluation value that we should stop the process of auto-selection,
|
||
|
|
0 or negative value means we'll ignore it
|
||
|
|
* @param randomSampling true for use randomly selected samples as training dataset and remaining as test,
|
||
|
|
otherwise, use default training dataset
|
||
|
|
* @param stratified whether same proportion is guaranteed for each category
|
||
|
|
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
|
||
|
|
* @return train result
|
||
|
|
*/
|
||
|
|
enum AutoSelectMode {
|
||
|
|
/*! Based on evaluation ranking */
|
||
|
|
ByEvaluation,
|
||
|
|
/*! Based on classification accuracy, add features one by one into selection, SFS.
|
||
|
|
If there's no model defined or too few samples, we'll fall back to evaluation-based */
|
||
|
|
ByAdding,
|
||
|
|
/*! Based on classification accuracy, add features one by one into selection,
|
||
|
|
try remove one if makes better result in each iteration, SFFS.
|
||
|
|
If there's no model defined or too few samples, we'll fall back to evaluation-based */
|
||
|
|
ByAddingFloating,
|
||
|
|
/*! Based on classification accuracy, start from full selection, then remove feature one by one, SBS.
|
||
|
|
If there's no model defined or too few samples, we'll fall back to evaluation-based */
|
||
|
|
ByRemoving,
|
||
|
|
/*! Based on classification accuracy, start from full selection, then remove feature one by one,
|
||
|
|
try add one back if makes better result in each iteration, SFBS.
|
||
|
|
If there's no model defined or too few samples, we'll fall back to evaluation-based */
|
||
|
|
ByRemovingFloating,
|
||
|
|
};
|
||
|
|
virtual TrainResults::Ptr autoSelect(AutoSelectMode mode, int topVal = 0, float minAcc = 0,
|
||
|
|
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Auto-select features of better ability to distinguish samples (asynchronous) */
|
||
|
|
virtual TrainResults::Ptr startAutoSelect(AutoSelectMode mode, int topN = 0, float minAcc = 0,
|
||
|
|
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
|
||
|
|
|
||
|
|
/*! Whether it's is busy with training, testing or auto-tune task */
|
||
|
|
bool isBusy() const {
|
||
|
|
return mLock.isLocked();
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
virtual bool serialize(FileStorage& fs);
|
||
|
|
virtual bool deserialize(const FileNode& fs);
|
||
|
|
|
||
|
|
int prepare(bool forceRefresh = false, bool modelRequired = true);
|
||
|
|
bool isAllTrained();
|
||
|
|
bool genTrainMats(std::vector<SampleInstIterator>& trainSamples, Mat& sampleMat, Mat& labelMat);
|
||
|
|
int train(std::vector<SampleInstIterator>& trainSamples);
|
||
|
|
int predict(SampleInstPtr siPtr, int modelIdx, float* pConfidence = nullptr);
|
||
|
|
SampleDBManager::Ptr getSampleDBSafe(const std::string& label);
|
||
|
|
void refreshSampleSize();
|
||
|
|
void clearAll();
|
||
|
|
|
||
|
|
void testCrossValidate(AsyncResult* r, bool stratified, int folds);
|
||
|
|
void testRandom(AsyncResult* r, bool stratified, float trainSize, int repeat);
|
||
|
|
void testLeaveOneOut(AsyncResult* r);
|
||
|
|
void testOnTrain(AsyncResult* r);
|
||
|
|
void testOnTest(AsyncResult* r);
|
||
|
|
int doTest(AsyncResult* r, std::vector<SampleInstIterator>& testSamples);
|
||
|
|
|
||
|
|
void autoTuneFeature(AsyncResult* r,
|
||
|
|
std::vector<CyclopsGrids> grids, // yes, copy grids
|
||
|
|
bool randomSampling, bool stratified, float trainSize);
|
||
|
|
void autoTuneModel(AsyncResult* r,
|
||
|
|
std::vector<CyclopsGrids> grids, // yes, copy grids
|
||
|
|
bool randomSampling, bool stratified, float trainSize);
|
||
|
|
|
||
|
|
void autoSelectClassify(AsyncResult* r, int topVal, float minAcc,
|
||
|
|
bool randomSampling, bool stratified, float trainSize, bool adding, bool floating);
|
||
|
|
void autoSelectEvaluate(AsyncResult* r, int topVal, float minAcc,
|
||
|
|
bool randomSampling, bool stratified, float trainSize);
|
||
|
|
int _autoSelectEvaluate(AsyncResult* r, int topVal, float minAcc,
|
||
|
|
bool randomSampling, bool stratified, float trainSize); // without lock and prepare
|
||
|
|
|
||
|
|
bool totalRandomSampleSize(bool stratified, float trainSize, int* pTrainCount, int* pTestCount);
|
||
|
|
void genRandomSamples(bool stratified, float trainSize, int trainCount, int testCount,
|
||
|
|
std::vector<SampleInstIterator>* trainSamples = nullptr,
|
||
|
|
std::vector<SampleInstIterator>* testSamples = nullptr);
|
||
|
|
void genDefaultTrainSamples(std::vector<SampleInstIterator>& trainSamples);
|
||
|
|
void genDefaultTestSamples(std::vector<SampleInstIterator>& testSamples);
|
||
|
|
void preLoadSamples(std::vector<SampleInstIterator>& samples);
|
||
|
|
bool genSampleSetForAutoTask(bool randomSampling, bool stratified, float trainSize,
|
||
|
|
std::vector<SampleInstIterator>* trainSamples = nullptr,
|
||
|
|
std::vector<SampleInstIterator>* testSamples = nullptr);
|
||
|
|
|
||
|
|
private:
|
||
|
|
std::vector<std::string> mClassLabels;
|
||
|
|
FeatureEvaluator::Ptr mFE;
|
||
|
|
std::vector<CyclopsMLModel::Ptr> mMLModels;
|
||
|
|
Size mSampleSize;
|
||
|
|
int mPrimaryModel;
|
||
|
|
CyclopsLock mLock;
|
||
|
|
};
|
||
|
|
|
||
|
|
#endif // LLClassifier_h_
|
||
|
|
|