samples/cpp/train_HOG.cpp


#include " opencv2/ml.hpp "
#include <iostream>
#include <time.h>
using namespace cv ;
using namespace cv::ml ;
using namespace std ;
vector< float > get_svm_detector( const Ptr< SVM > & svm );
void convert_to_ml( const std::vector< Mat > & train_samples, Mat & trainData );
void load_images( const 字符串 & dirname, vector< Mat > & img_lst, bool showImages );
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );
void test_trained_detector( 字符串 obj_det_filename, 字符串 test_dir, 字符串 videofilename );
vector< float > get_svm_detector( const Ptr< SVM > & svm )
{
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv. rows ;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction( 0, alpha, svidx );
CV_Assert ( alpha. total () == 1 && svidx. total () == 1 && sv_total == 1 );
CV_Assert ( (alpha. type () == CV_64F && alpha. at < double >(0) == 1.) ||
(alpha. type () == CV_32F && alpha. at < float >(0) == 1.f) );
vector< float > hog_detector( sv. cols + 1 );
memcpy( &hog_detector[0], sv. ptr (), sv. cols * sizeof ( hog_detector[0] ) );
hog_detector[sv. cols ] = (float)-rho;
return hog_detector;
}
/*
* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
* Transposition of samples are made if needed.
*/
void convert_to_ml( const vector< Mat > & train_samples, Mat & trainData )
{
//--Convert data
const int rows = (int)train_samples.size();
const int cols = (int) std::max ( train_samples[0].cols, train_samples[0].rows );
Mat tmp( 1, cols, CV_32FC1 ); //< used for transposition if needed
trainData = Mat ( rows, cols, CV_32FC1 );
for ( size_t i = 0 ; i < train_samples.size(); ++i )
{
CV_Assert ( train_samples[i].cols == 1 || train_samples[i].rows == 1 );
if ( train_samples[i].cols == 1 )
{
transpose ( train_samples[i], tmp );
tmp.copyTo( trainData. row ( ( int )i ) );
}
else if ( train_samples[i].rows == 1 )
{
train_samples[i].copyTo( trainData. row ( ( int )i ) );
}
}
}
void load_images( const 字符串 & dirname, vector< Mat > & img_lst, bool showImages = false )
{
vector< String > files;
glob ( dirname, files );
for ( size_t i = 0; i < files.size(); ++i )
{
Mat img = imread ( files[i] ); // load the image
if ( img. empty () ) // invalid image, skip it.
{
cout << files[i] << " is invalid!" << endl;
continue ;
}
if ( showImages )
{
imshow ( "image" , img );
waitKey ( 1 );
}
img_lst.push_back( img );
}
}
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )
{
Rect box;
box. width = size. width ;
box. height = size. height ;
const int size_x = box. width ;
const int size_y = box. height ;
srand( ( unsigned int )time( NULL ) );
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
if ( full_neg_lst[i].cols > box. width && full_neg_lst[i].rows > box. height )
{
box. x = rand() % ( full_neg_lst[i].cols - size_x );
box. y = rand() % ( full_neg_lst[i].rows - size_y );
Mat roi = full_neg_lst[i]( box );
neg_lst.push_back( roi. clone () );
}
}
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )
{
hog. winSize = wsize;
Mat gray;
vector< float > descriptors;
for ( size_t i = 0 ; i < img_lst.size(); i++ )
{
if ( img_lst[i].cols >= wsize. width && img_lst[i].rows >= wsize. height )
{
Rect r = Rect (( img_lst[i].cols - wsize. width ) / 2,
( img_lst[i].rows - wsize. height ) / 2,
wsize. width ,
wsize. height );
cvtColor ( img_lst[i](r), gray, COLOR_BGR2GRAY );
hog. compute ( gray, descriptors, Size ( 8, 8 ), Size ( 0, 0 ) );
gradient_lst.push_back( Mat ( descriptors ).clone() );
if ( use_flip )
{
flip ( gray, gray, 1 );
hog. compute ( gray, descriptors, Size ( 8, 8 ), Size ( 0, 0 ) );
gradient_lst.push_back( Mat ( descriptors ).clone() );
}
}
}
}
void test_trained_detector( 字符串 obj_det_filename, 字符串 test_dir, 字符串 videofilename )
{
cout << "Testing trained detector..." << endl;
hog. load ( obj_det_filename );
vector< String > files;
glob ( test_dir, files );
int delay = 0;
if ( videofilename != "" )
{
if ( videofilename.size() == 1 && isdigit( videofilename[0] ) )
cap. open ( videofilename[0] - '0' );
else
cap. open ( videofilename );
}
obj_det_filename = "testing " + obj_det_filename;
namedWindow ( obj_det_filename, WINDOW_NORMAL );
for ( size_t i=0;; i++ )
{
Mat img;
if ( cap. isOpened () )
{
cap >> img;
delay = 1;
}
else if ( i < files.size() )
{
img = imread ( files[i] );
}
if ( img. empty () )
{
return ;
}
vector< Rect > detections;
vector< double > foundWeights;
hog. detectMultiScale ( img, detections, foundWeights );
for ( size_t j = 0; j < detections.size(); j++ )
{
Scalar color = Scalar ( 0, foundWeights[j] * foundWeights[j] * 200, 0 );
rectangle ( img, detections[j], color, img. cols / 400 + 1 );
}
imshow ( obj_det_filename, img );
if ( waitKey ( delay ) == 27 )
{
return ;
}
}
}
int main( int argc, char ** argv )
{
const char * keys =
{
"{help h| | show help message}"
"{pd | | path of directory contains positive images}"
"{nd | | path of directory contains negative images}"
"{td | | path of directory contains test images}"
"{tv | | test video file name}"
"{dw | | width of the detector}"
"{dh | | height of the detector}"
"{f |false| indicates if the program will generate and use mirrored samples or not}"
"{d |false| train twice}"
"{t |false| test a trained detector}"
"{v |false| visualize training steps}"
"{fn |my_detector.yml| file name of trained SVM}"
};
CommandLineParser parser( argc, argv, keys );
if ( parser.has( "help" ) )
{
parser.printMessage();
exit( 0 );
}
字符串 pos_dir = parser.get< 字符串 >( "pd" );
字符串 neg_dir = parser.get< 字符串 >( "nd" );
字符串 test_dir = parser.get< 字符串 >( "td" );
字符串 obj_det_filename = parser.get< 字符串 >( "fn" );
字符串 videofilename = parser.get< 字符串 >( "tv" );
int detector_width = parser.get< int >( "dw" );
int detector_height = parser.get< int >( "dh" );
bool test_detector = parser.get< bool >( "t" );
bool train_twice = parser.get< bool >( "d" );
bool visualization = parser.get< bool >( "v" );
bool flip_samples = parser.get< bool >( "f" );
if ( test_detector )
{
test_trained_detector( obj_det_filename, test_dir, videofilename );
exit( 0 );
}
if ( pos_dir.empty() || neg_dir.empty() )
{
parser.printMessage();
cout << "Wrong number of parameters.\n\n"
<< "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"
<< "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos" ;
exit( 1 );
}
vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
vector< int > labels;
clog << "Positive images are being loaded..." ;
load_images( pos_dir, pos_lst, visualization );
if ( pos_lst.size() > 0 )
{
clog << "...[done]" << endl;
}
else
{
clog << "no image in " << pos_dir <<endl;
return 1;
}
Size pos_image_size = pos_lst[0].size();
if ( detector_width && detector_height )
{
pos_image_size = Size ( detector_width, detector_height );
}
else
{
for ( size_t i = 0; i < pos_lst.size(); ++i )
{
if ( pos_lst[i].size() != pos_image_size )
{
cout << "All positive images should be same size!" << endl;
exit( 1 );
}
}
pos_image_size = pos_image_size / 8 * 8;
}
clog << "Negative images are being loaded..." ;
load_images( neg_dir, full_neg_lst, false );
sample_neg( full_neg_lst, neg_lst, pos_image_size );
clog << "...[done]" << endl;
clog << "Histogram of Gradients are being calculated for positive images..." ;
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
size_t positive_count = gradient_lst.size();
labels.assign( positive_count, +1 );
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images..." ;
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
size_t negative_count = gradient_lst.size() - positive_count;
labels.insert( labels.end(), negative_count, -1 );
CV_Assert ( positive_count < labels.size() );
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
Mat train_data;
convert_to_ml( gradient_lst, train_data );
clog << "Training SVM..." ;
/* Default values to train SVM */
svm->setCoef0( 0.0 );
svm->setDegree( 3 );
svm->setTermCriteria( TermCriteria ( TermCriteria::MAX_ITER + TermCriteria::EPS , 1000, 1e-3 ) );
svm->setGamma( 0 );
svm->setKernel( SVM::LINEAR );
svm->setNu( 0.5 );
svm->setP( 0.1 ); // for EPSILON_SVR, epsilon in loss function?
svm->setC( 0.01 ); // From paper, soft classifier
svm->setType( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
svm->train( train_data, ROW_SAMPLE , labels );
clog << "...[done]" << endl;
if ( train_twice )
{
clog << "Testing trained detector on negative images. This may take a few minutes..." ;
my_hog. winSize = pos_image_size;
// Set the trained svm to my_hog
my_hog. setSVMDetector ( get_svm_detector( svm ) );
vector< Rect > detections;
vector< double > foundWeights;
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
{
if ( full_neg_lst[i].cols >= pos_image_size. width && full_neg_lst[i].rows >= pos_image_size. height )
my_hog. detectMultiScale ( full_neg_lst[i], detections, foundWeights );
else
detections.clear();
for ( size_t j = 0; j < detections.size(); j++ )
{
Mat detection = full_neg_lst[i]( detections[j] ).clone();
resize ( detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT );
neg_lst.push_back( detection );
}
if ( visualization )
{
for ( size_t j = 0; j < detections.size(); j++ )
{
rectangle ( full_neg_lst[i], detections[j], Scalar ( 0, 255, 0 ), 2 );
}
imshow ( "testing trained detector on negative images" , full_neg_lst[i] );
waitKey ( 5 );
}
}
clog << "...[done]" << endl;
gradient_lst.clear();
clog << "Histogram of Gradients are being calculated for positive images..." ;
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
positive_count = gradient_lst.size();
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images..." ;
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
negative_count = gradient_lst.size() - positive_count;
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
labels.clear();
labels.assign(positive_count, +1);
labels.insert(labels.end(), negative_count, -1);
clog << "Training SVM again..." ;
convert_to_ml( gradient_lst, train_data );
svm->train( train_data, ROW_SAMPLE , labels );
clog << "...[done]" << endl;
}
hog. winSize = pos_image_size;
hog. setSVMDetector ( get_svm_detector( svm ) );
hog. save ( obj_det_filename );
test_trained_detector( obj_det_filename, test_dir, videofilename );
return 0;
}