You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wheeldetect/3part/Cyclops/include/LLClassifier.h

329 lines
13 KiB
C++

/*!
* \file LLClassifier.h
* \date 2019/10/18
*
* \author Lin, Chi
* Contact: lin.chi@hzleaper.com
*
*
* \note
*/
#ifndef __LLClassifier_h_
#define __LLClassifier_h_
#include "CyclopsCommon.h"
#include "StdUtils.h"
#include "DetectRoi.h"
#include "CyclopsModules.h"
#include "SampleDBManager.h"
#include "FeatureEvaluator.h"
#include "CyclopsMLModel.h"
#include "TestResult.h"
#include "CyclopsGrid.h"
#include "TrainResult.h"
/*! \brief Light-weighted Learning-based Classifier
* Uniformed API for machine learning (classification), support auto-tune and various testing technology
*
* Example:
* \code{.cpp}
* LLClassifier::Ptr llcPtr = LLClassifier::getInstance("new_llc");
* llcPtr->updateClass(0, "class_label_1", "folder_of_sample_1"); // add class 1 and its sample database
* llcPtr->updateClass(1, "class_label_2", "folder_of_sample_2"); // add class 2 and its sample database
* FeatureEvaluator::Ptr fePtr = FeatureEvaluator::getInstance("new_llc"); // create a new feature evaluator
* llcPtr->setFeatureEvaluator(fePtr);
* CyclopsFeature::Ptr hogPtr = fePtr->add(FeatureType::HOG); // add HOG feature
* CyclopsMLModel::Ptr svmPtr = llcPtr->add(MLModel::SVM); // add SVM model
* bool ret = llcPtr->train(); // train
* if (ret) {
* std::string classLabel;
* float confidence;
* llcPtr->predict(newImg, classLabel, confidence); // prediction
* }
* \endcode
*
* Auto-tune example:
* \code{.cpp}
* std::vector<CyclopsGrids> mgrids; // create tuning grid
* CyclopsGrids svmGrids(svmPtr->getName());
* svmGrids.addGrid<double>(SVCModel::Coef, 0.1, 0.4, 2, true); // 3 steps
* mgrids.push_back(std::move(svmGrids));
* TrainResults::Ptr trPtr = mllcPtr->autoTune(LLClassifier::ModelOnly, mgrids); // or use startAutoTune()
* int svmId = trPtr->getIdByName(svmPtr->getName()); // get index of svm model in test result
* std::list<int> bestList = trPtr->getBest(svmId);
* TrainResult res = trPtr->get(bestList.front()); // get best tuning result
* double acc = res.testAccuracy;
* \endcode
*
* Test example:
* \code{.cpp}
* TestResults::Ptr trPtr = mllcPtr->test(SamplingMode::CrossValidation, 3, true, 5); // or use startTest()
* int svmId = trPtr->getIdByName(svmPtr->getName()); // get index of svm model in test result
* double acc = trPtr->getAccuracy(svmId);
* \endcode
* see LLCTest for unit test
*/
class LLClassifier : public ICyclopsModuleInstance
{
public:
enum ErrorCode {
NoError = 1,
// negative values for error
ErrLoadSample = -9999,
ErrTooFewClass, // at least two class
ErrNoModel,
ErrNoFeature,
ErrDirtyModel,
ErrNotTrained,
ErrTooFewSample,
ErrStopByUser,
ErrBusy,
ErrFailExtFeature,
ErrUnexpect,
};
static const int cMinTrainCount;
static const int cMinTestCount;
static const int cMinSampleSize;
static const int cMaxSampleSize;
LLClassifier() : mPrimaryModel(0)
{}
virtual ~LLClassifier() {}
typedef std::shared_ptr<LLClassifier> Ptr;
DECL_GET_INSTANCE(LLClassifier::Ptr)
/*! \fn serializeToMemory
* Serialize information of the classifier into a in-memory string, see also deserializeFromMemory()
* @param str used to take the output serialization result
* @return true for succeed, false for fail
* \fn serializeToFile
* Serialize information of the classifier into a text file, see also deserializeFromFile()
* @param filename file name (full path) where we will write the data
* @return true for succeed, false for fail
* \fn deserializeFromMemory
* Deserialize the classifier from in-memory string, see also serializeToMemory()
* @param str in-memory string
* @return true for succeed, false for fail
* \fn deserializeFromFile
* Deserialize the classifier from a text file, see also serializeToFile()
* @param filename file name (full path) where we will read the data
* @return true for succeed, false for fail
*/
DECL_SERIALIZE_FUNCS
/*! Total count of classes */
int countClass() const { return mClassLabels.size(); }
/*! Add a new class of given label and its sample database location */
virtual bool addClass(const std::string& label, const std::string& rootPath = std::string());
/*! Update an existing class with new label and new sample database location */
virtual bool updateClass(int index, const std::string& label, const std::string& rootPath = std::string());
/*! Remove an existing class
* @param removePermanently true if you want to remove the sample database permanently from disk
*/
virtual bool removeClass(const std::string& label, bool removePermanently);
/*! Remove all classes */
virtual bool removeAllClass(bool removePermanently);
/*! Get index of class by its label */
virtual int indexOfClass(const std::string& label);
/*! Get sample database manager by class label */
virtual SampleDBManager::Ptr getSampleDB(const std::string& label);
/*! Get class label by index */
std::string getClass(int index) const {
if (index < 0 || index >= mClassLabels.size()) return "";
return mClassLabels[index];
}
/*! Get uniformed sample size */
virtual Size getSampleSize();
/*! Set feature evaluator */
void setFeatureEvaluator(FeatureEvaluator::Ptr pFE) { mFE = pFE; }
/*! Get feature evaluator for feature modification */
FeatureEvaluator::Ptr getFeatureEvaluator() const { return mFE; }
/*! Add a new model of given type */
virtual CyclopsMLModel::Ptr add(MLModel ftype);
/*! Total count of models added */
int size() const { return mMLModels.size(); }
/*! Get model by index */
CyclopsMLModel::Ptr get(int index) {
if (index < 0 || index >= size()) return nullptr;
return mMLModels[index];
}
/*! Remove model by index */
virtual bool remove(int index);
/*! Remove all models */
virtual bool removeAll();
/*! Get model index by name */
virtual int indexByName(const std::string& name);
/*! Modify primary model which will be used for prediction */
void setPrimaryModel(int idx) {
if (idx >= 0 && idx < size())
mPrimaryModel = idx;
}
/*! Get index of primary model */
int getPrimaryModel() const {
int s = size();
if (mPrimaryModel >= 0 && mPrimaryModel < s) return mPrimaryModel;
else if (s > 0) return 0;
else return -1;
}
/*! Train all models */
virtual int train();
/*! Whether the primary model trained */
virtual bool isTrained();
/*! Prediction of given image */
virtual int predict(const Mat& img,
std::string& classLabel,
float& confidence,
SampleInstance::UserDataMap* pUserData = nullptr);
/** @overload */
virtual int predict(const Mat& img, DetectRoi& droi,
std::string& classLabel,
float& confidence,
SampleInstance::UserDataMap* pUserData = nullptr);
/*! Test and accuracy estimation (synchronous)
* @param mode Sampling mode, how test cases are built
* @param repeat for random test, how many times split-train-test is performed
* @param stratified whether same proportion is guaranteed for each category
* @param folds for cross-validation sampling, dataset is split into $folds group,
* each time use one group as test dataset and remaining as train dataset
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
* @return test result
*/
virtual TestResults::Ptr test(SamplingMode mode, int repeat = 10, bool stratified = true,
int folds = 5, float trainSize = 0.66);
/*! Test and accuracy estimation (asynchronous) */
virtual TestResults::Ptr startTest(SamplingMode mode, int repeat = 10, bool stratified = true,
int folds = 5, float trainSize = 0.66);
/*! Auto-Tune feature and models parameters (synchronous)
* @param mode auto-tune mode, currently only FeatureOnly and ModelOnly is supported
* @param grids parameter searching grid for features or models
* @param randomSampling true for use randomly selected samples as training dataset and remaining as test,
otherwise, use default training dataset
* @param stratified whether same proportion is guaranteed for each category
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
* @return train result
*/
enum AutoTuneMode {
FeatureOnly,
ModelOnly,
FeatureAndModel
};
virtual TrainResults::Ptr autoTune(AutoTuneMode mode, const std::vector<CyclopsGrids>& grids,
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
/*! Auto-Tune feature and models parameters (asynchronous) */
virtual TrainResults::Ptr startAutoTune(AutoTuneMode mode, const std::vector<CyclopsGrids>& grids,
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
/*! Auto-select features of better ability to distinguish samples
* @param mode auto-select mode, see AutoSelectMode.
* @param topN choose top-n features as selection, the maximum number of features in selection
0 or negative value means we'll decide the number on-the-fly
* @param minAcc minimum classification accuracy or evaluation value that we should stop the process of auto-selection,
0 or negative value means we'll ignore it
* @param randomSampling true for use randomly selected samples as training dataset and remaining as test,
otherwise, use default training dataset
* @param stratified whether same proportion is guaranteed for each category
* @param trainSize for random sampling, training dataset contains $trainSize of all samples
* @return train result
*/
enum AutoSelectMode {
/*! Based on evaluation ranking */
ByEvaluation,
/*! Based on classification accuracy, add features one by one into selection, SFS.
If there's no model defined or too few samples, we'll fall back to evaluation-based */
ByAdding,
/*! Based on classification accuracy, add features one by one into selection,
try remove one if makes better result in each iteration, SFFS.
If there's no model defined or too few samples, we'll fall back to evaluation-based */
ByAddingFloating,
/*! Based on classification accuracy, start from full selection, then remove feature one by one, SBS.
If there's no model defined or too few samples, we'll fall back to evaluation-based */
ByRemoving,
/*! Based on classification accuracy, start from full selection, then remove feature one by one,
try add one back if makes better result in each iteration, SFBS.
If there's no model defined or too few samples, we'll fall back to evaluation-based */
ByRemovingFloating,
};
virtual TrainResults::Ptr autoSelect(AutoSelectMode mode, int topVal = 0, float minAcc = 0,
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
/*! Auto-select features of better ability to distinguish samples (asynchronous) */
virtual TrainResults::Ptr startAutoSelect(AutoSelectMode mode, int topN = 0, float minAcc = 0,
bool randomSampling = false, bool stratified = true, float trainSize = 0.66);
/*! Whether it's is busy with training, testing or auto-tune task */
bool isBusy() const {
return mLock.isLocked();
}
private:
virtual bool serialize(FileStorage& fs);
virtual bool deserialize(const FileNode& fs);
int prepare(bool forceRefresh = false, bool modelRequired = true);
bool isAllTrained();
bool genTrainMats(std::vector<SampleInstIterator>& trainSamples, Mat& sampleMat, Mat& labelMat);
int train(std::vector<SampleInstIterator>& trainSamples);
int predict(SampleInstPtr siPtr, int modelIdx, float* pConfidence = nullptr);
SampleDBManager::Ptr getSampleDBSafe(const std::string& label);
void refreshSampleSize();
void clearAll();
void testCrossValidate(AsyncResult* r, bool stratified, int folds);
void testRandom(AsyncResult* r, bool stratified, float trainSize, int repeat);
void testLeaveOneOut(AsyncResult* r);
void testOnTrain(AsyncResult* r);
void testOnTest(AsyncResult* r);
int doTest(AsyncResult* r, std::vector<SampleInstIterator>& testSamples);
void autoTuneFeature(AsyncResult* r,
std::vector<CyclopsGrids> grids, // yes, copy grids
bool randomSampling, bool stratified, float trainSize);
void autoTuneModel(AsyncResult* r,
std::vector<CyclopsGrids> grids, // yes, copy grids
bool randomSampling, bool stratified, float trainSize);
void autoSelectClassify(AsyncResult* r, int topVal, float minAcc,
bool randomSampling, bool stratified, float trainSize, bool adding, bool floating);
void autoSelectEvaluate(AsyncResult* r, int topVal, float minAcc,
bool randomSampling, bool stratified, float trainSize);
int _autoSelectEvaluate(AsyncResult* r, int topVal, float minAcc,
bool randomSampling, bool stratified, float trainSize); // without lock and prepare
bool totalRandomSampleSize(bool stratified, float trainSize, int* pTrainCount, int* pTestCount);
void genRandomSamples(bool stratified, float trainSize, int trainCount, int testCount,
std::vector<SampleInstIterator>* trainSamples = nullptr,
std::vector<SampleInstIterator>* testSamples = nullptr);
void genDefaultTrainSamples(std::vector<SampleInstIterator>& trainSamples);
void genDefaultTestSamples(std::vector<SampleInstIterator>& testSamples);
void preLoadSamples(std::vector<SampleInstIterator>& samples);
bool genSampleSetForAutoTask(bool randomSampling, bool stratified, float trainSize,
std::vector<SampleInstIterator>* trainSamples = nullptr,
std::vector<SampleInstIterator>* testSamples = nullptr);
private:
std::vector<std::string> mClassLabels;
FeatureEvaluator::Ptr mFE;
std::vector<CyclopsMLModel::Ptr> mMLModels;
Size mSampleSize;
int mPrimaryModel;
CyclopsLock mLock;
};
#endif // LLClassifier_h_