/*! * \file SampleDBManager.h * \date 2019/10/17 * * \author Lin, Chi * Contact: lin.chi@hzleaper.com * * * \note */ #ifndef __SampleDBManager_h_ #define __SampleDBManager_h_ #include "CyclopsCommon.h" #include "StdUtils.h" #include "DetectRoi.h" #include #include "CyclopsModules.h" #include "SampleInstance.h" /*! \brief Manager for sample database * Support add/delete/query operations, could be in-memory or persistent * Example: * \code{.cpp} * SampleDBManager sdbMgr; // or create via global factory SampleDBManager::getInstance() * bool ret= sdbMgr.init("my_new_db", "z:/db_folder_name"); // or pass in empty folder path indicating an in-memory database * if (ret) { * sdbMgr.add(sampleImg, SampleDBManager::TrainSet); * // ... and more * sdbMgr.add(testImg, SampleDBManager::TestSet); * // ... and more * } * \endcode * To get sample instances for browsing/modification and other usage, use getIterator * */ class SampleDBManager : public ICyclopsModuleInstance { /*! \fn getClassLabel * Get class label of this sample database, see also load() */ DECLARE_PARAMETER_GET(std::string, ClassLabel) /*! \fn setCacheImage * Define whether we'll cache image in sample instance, default to false, see also getCacheImage() * \fn getCacheImage * Get value of whether we'll cache image in sample instance, see also setCacheImage() */ DECLARE_PARAMETER(bool, CacheImage) /*! \fn setUseAutoSize * Define whether use auto generated uniformed size as sample size, or use customized size, default to true, see also getCacheImage() * \fn getUseAutoSize * Get whether use auto generated uniformed size as sample size, or use customized size, see also setUseAutoSize() */ DECLARE_PARAMETER(bool, UseAutoSize) /*! \fn setCustomSize * Define customized sample size, used when UseAutoSize = false, default to (64, 64) */ DECLARE_PARAMETER_SET(Size, CustomSize) public: enum SampleDBType { All = 0, TrainSet, TestSet }; public: SampleDBManager() : mInited(false), mInMemory(false), mCacheImage(false), mUseAutoSize(true), mCustomSize(64, 64), mAutoSize(0, 0), mAutoSizeCount(0) {} virtual ~SampleDBManager() {} typedef std::shared_ptr Ptr; DECL_GET_INSTANCE(SampleDBManager::Ptr) /*! initialize the whole sample database * @param classLabel string label of class * @param rootPath working directory, where the dbinfo file locates, empty for in-memory mode * @return true if everything is fine */ virtual bool init(const std::string& classLabel, const std::string& rootPath); /*! return whether this is a well initialized */ bool hasInit() const { return mInited; } /*! change class label */ virtual void setClassLabel(const std::string& classLabel); /*! refresh the whole sample database, add new sample instance from disk and remove legacy * @return true if everything is fine */ virtual bool refresh(bool force = false); /*! \fn serializeToMemory * Serialize information of this sample database into a in-memory string, see also deserializeFromMemory() * @param str used to take the output serialization result * @return true for succeed, false for fail * \fn serializeToFile * Serialize information of this sample database into a text file, see also deserializeFromFile() * @param filename file name (full path) where we will write the data * @return true for succeed, false for fail * \fn deserializeFromMemory * Deserialize the sample database from in-memory string, see also serializeToMemory() * @param str in-memory string * @return true for succeed, false for fail * \fn deserializeFromFile * Deserialize the sample database from a text file, see also serializeToFile() * @param filename file name (full path) where we will read the data * @return true for succeed, false for fail */ DECL_SERIALIZE_FUNCS /*! get train set size */ std::size_t trainSetCount() const { return mTrainSet.size(); } /*! get test set size */ std::size_t testSetCount() const { return mTestSet.size(); } /*! Add image to train or test set. We'll save image to disk under current working directory * @param img input image * @param dbType train or test dataset * @param droi roi * @param pUserData optional user data map * @param pNewName return name of the new added * @return true if everything works fine */ virtual bool addToDB(const Mat& img, DetectRoi& droi, SampleDBType dbType, SampleInstance::UserDataMap* pUserData = nullptr, std::string* pNewName = nullptr); /** @overload */ virtual bool addToDB(const Mat& img, SampleDBType dbType, SampleInstance::UserDataMap* pUserData = nullptr, std::string* pNewName = nullptr); /** @overload */ virtual bool addToDB(const Mat& img, const vector& roi, SampleDBType dbType, SampleInstance::UserDataMap* pUserData = nullptr, std::string* pNewName = nullptr); /** @overload */ virtual bool addToDB(const Mat& img, const Mat& mask, SampleDBType dbType, SampleInstance::UserDataMap* pUserData = nullptr, std::string* pNewName = nullptr); /*! Add a new data folder to train or test set, and perform incremental refresh immediately * @param path path to the new data folder * @param dbType train or test dataset * @return true if everything works fine */ virtual bool addFolderToDB(const std::string& path, SampleDBType dbType); /*! Remove sample instance from train or test set, permanently remove image and roi file from disk * @param instName sample instance name to remove * @param dbType train or test dataset * @return true if everything works fine */ virtual bool removeFromDB(const std::string& instName, SampleDBType dbType); /*! Remove existing data folder from train or test set * @param path path to the new data folder * @param dbType train or test dataset * @return true if everything works fine */ virtual bool removeFolderFromDB(const std::string& path, SampleDBType dbType); /*! Query sample instance in this database * @param instName sample instance name to remove * @param dbType train or test dataset * @return pointer to sample instance */ virtual SampleInstPtr queryDB(const std::string& instName, SampleDBType dbType); /*! Get iterator to visit corresponding dataset(or subset of it) in specific order maybe * Example: * \code{.cpp} * SampleInstIterator it = sdbMgrPtr->getIterator(SampleDBManager::TrainSet, SortBy::Date); * while (it.hasNext()) { * SampleInstPtr& siPtr = it.next(); * // do stuff to sample instance * } * \endcode * @param dbType train, test or from all datasets * @return iterator */ virtual SampleInstIterator getIterator(SampleDBType dbType, SortBy sortBy = SortBy::None, int topN = -1); /*! Remove entire database, including train and test set */ virtual bool removeEntireDB(); /*! Get working directory */ virtual std::string getWorkDir() const; /*! Get default dataset folder */ virtual const std::string& getDefaultDataFolder(SampleDBType dbType) const; /*! Get all dataset folder */ const std::list& getDataFolder(SampleDBType dbType) const { return dbType == SampleDBType::TrainSet ? mTrainSetPaths : mTestSetPaths; } /*! Get uniformed sample size, either auto-generated or customized */ Size getSampleSize() const { if (mUseAutoSize) { return mAutoSizeCount > 0 ? Size(mAutoSize.width / mAutoSizeCount, mAutoSize.height / mAutoSizeCount) : Size(64, 64); } else { return mCustomSize; } } /*! cleanup all cached data in sample instances as it's invalid */ virtual void cleanupSampleCache(); /*! Parameter pack for how to generate more samples */ struct MoreSamplePack { Rangef shiftRng = Rangef(0, 0); Rangef rotateRng = Rangef(0, 0); Rangef resizeRng = Rangef(1, 1); Rangef grayRng = Rangef(0, 0); float shiftStep = 0.5; float rotateStep = 0.5; float resizeStep = 0.1; float grayStep = 10; }; /*! generate more samples by shifting, rotation, resizing, gray offset * @param dbType train or test dataset * @param samplePack how to generate more samples * @param 0 to take all, otherwise we'll randomly pick some * @return true if everything works fine */ virtual bool generateMoreSamples(SampleDBType dbType, const MoreSamplePack& samplePack, int moreCount = 0); private: bool serialize(FileStorage& fs); bool deserialize(const FileNode& fs); bool ensureDefaultDataFolder(SampleDBType dbType); SampleInstMap& getDataSet(SampleDBType dbType) { return dbType == SampleDBType::TrainSet ? mTrainSet : mTestSet; } void refreshDataset(SampleInstMap& dataset, SampleInstMap& newDataset, const std::string& dirPath, bool calcAutoSize = false); private: bool mInited; bool mInMemory; std::string mDBInfoPath; std::list mTrainSetPaths; std::list mTestSetPaths; std::string mTrainGenSetPath; std::string mTestGenSetPath; SampleInstMap mTrainSet; SampleInstMap mTestSet; Size2d mAutoSize; int mAutoSizeCount; }; #endif // SampleDBManager_h_