42 #ifndef __INCLUDE_MACHINE_LEARNING__
43 #define __INCLUDE_MACHINE_LEARNING__
46 #ifndef __INCLUDE_MIST_H__
50 #ifndef __INCLUDE_MIST_RANDOM__
69 #define __ASYMMETRIC_WEIGHTING__ 0
70 #define __ONE_PER_CLASS_CODE_WORD__ 0
71 #define __RANDOM_CODE_WORD__ 0
72 #define __DEBUG_OUTPUT_LEVEL__ 0
73 #define __NUMBER_OF_INNER_LOOPS__ 10
77 namespace machine_learning
93 feature_one( ) : value( 0.0 ), category( true ), weight( 0.0 )
128 return( value < f.
value );
133 struct feature :
public std::vector< feature_value_type >
135 typedef std::vector< feature_value_type > base;
145 feature( ) : base( ), category(
"unknown" ), weight( 0.0 ), valid( true )
153 feature(
size_type dimension ) : base( dimension ), category(
"unknown" ), weight( 0.0 ), valid( true )
161 feature(
const std::string &cate ) : base( ), category( cate ), weight( 0.0 ), valid( true )
170 feature(
size_type dimension,
const std::string &cate ) : base( dimension, 0.0 ), category( cate ), weight( 0.0 ), valid( true )
175 feature(
const feature &f ): base( f ), category( f.category ), weight( f.weight ), valid( f.valid )
184 base::operator =( f );
195 template <
class KEY,
class VALUE >
205 pair(
const KEY &k,
const VALUE &val ) : key( k ), value( val ){ }
208 bool operator <(
const pair &f )
const
210 return( key < f.key );
214 static bool greater(
const pair &f1,
const pair &f2 )
232 inline size_t __power_of_two__(
size_t x )
235 for(
size_t i = 0 ; i < x ; i++ )
273 threshold_ = other.threshold_;
274 index_ = other.index_;
286 template <
template <
typename,
typename >
class FEATURE_LIST,
template <
typename,
typename >
class CATEGORY_LIST,
class Allocator1,
class Allocator2 >
287 bool learn(
const FEATURE_LIST< feature_type, Allocator1 > & features,
const CATEGORY_LIST< bool, Allocator2 > &categories )
289 if( features.empty( ) )
294 double _minimum_classification_error_ = 1.0e100;
297 double overall_sum_of_positive_weights = 0.0;
298 double overall_sum_of_negative_weights = 0.0;
299 for(
size_type i = 0 ; i < features.size( ) ; i++ )
302 if( categories[ i ] )
304 overall_sum_of_positive_weights += f.
weight;
308 overall_sum_of_negative_weights += f.
weight;
312 if( overall_sum_of_positive_weights == 0.0 || overall_sum_of_negative_weights == 0.0 )
317 double max_sigma = -1.0;
318 int nfeatures =
static_cast< int >( features[ 0 ].size( ) );
321 #pragma omp parallel for firstprivate( nfeatures ) schedule( guided )
322 for(
int index = 0 ; index < nfeatures ; index++ )
324 std::vector< feature_one > flist;
325 flist.reserve( features.size( ) );
326 for(
size_type i = 0 ; i < features.size( ) ; i++ )
336 std::sort( flist.begin( ), flist.end( ) );
339 double min_error = 1.0e100, sgn = 1.0, th = 0.0;
340 double sum_of_positive_weights = 0.0;
341 double sum_of_negative_weights = 0.0;
343 for(
size_type i = 0 ; i < flist.size( ) ; i++ )
348 sum_of_positive_weights += f.
weight;
352 sum_of_negative_weights += f.
weight;
355 double e1 = sum_of_positive_weights + overall_sum_of_negative_weights - sum_of_negative_weights;
356 double e2 = sum_of_negative_weights + overall_sum_of_positive_weights - sum_of_positive_weights;
357 double error = e1 < e2 ? e1 : e2;
359 if( error <= min_error )
363 sgn = e1 < e2 ? -1.0 : 1.0;
365 if( 0 < i && i < flist.size( ) - 2 )
367 double v1 = std::abs( flist[ i - 1 ].value - flist[ i + 0 ].value );
368 double v2 = std::abs( flist[ i + 1 ].value - flist[ i + 2 ].value );
369 if( std::abs( v1 + v2 ) > 0 )
371 double t1 = flist[ i + 0 ].value;
372 double t2 = flist[ i + 1 ].value;
373 th = ( t1 * v1 + t2 * v2 ) / ( v1 + v2 );
376 else if( i < flist.size( ) - 1 )
378 th = ( th + flist[ i + 1 ].value ) * 0.5;
384 for(
size_t i = 0 ; i < features.size( ) ; i++ )
389 if( evaluate( f, index, sgn, th ) != categories[ i ] )
403 for(
size_type i = 0 ; i < flist.size( ) ; i++ )
406 if( categories[ i ] )
408 M1 += f.
weight * f[ index ];
412 M2 += f.
weight * f[ index ];
416 M1 /= overall_sum_of_positive_weights;
417 M2 /= overall_sum_of_negative_weights;
419 for(
size_type i = 0 ; i < features.size( ) ; i++ )
422 if( categories[ i ] )
424 S1 += f.
weight * ( f[ index ] - M1 ) * ( f[ index ] - M1 );
428 S2 += f.
weight * ( f[ index ] - M2 ) * ( f[ index ] - M2 );
432 S1 /= overall_sum_of_positive_weights;
433 S2 /= overall_sum_of_negative_weights;
434 double V1 = overall_sum_of_positive_weights * overall_sum_of_negative_weights * ( M1 - M2 ) * ( M1 - M2 );
435 double V2 = ( overall_sum_of_positive_weights + overall_sum_of_negative_weights ) * ( overall_sum_of_positive_weights * S1 + overall_sum_of_negative_weights * S2 );
436 sigma = V1 / ( 1.0 + V2 );
441 if( _minimum_classification_error_ > e )
443 _minimum_classification_error_ = e;
449 else if( _minimum_classification_error_ == e && sigma > max_sigma )
451 _minimum_classification_error_ = e;
468 template <
class FEATURE >
469 bool operator ()(
const FEATURE &f )
const
471 return( evaluate( f ) );
478 template <
class FEATURE >
479 bool evaluate(
const FEATURE &f )
const
481 return( evaluate( f, index_, sign_, threshold_ ) );
485 template <
class FEATURE >
486 bool evaluate(
const FEATURE &f,
size_type indx,
double sgn,
double th )
const
488 return( sgn * f[ indx ] <= sgn * th );
492 const std::string serialize( )
const
495 sprintf( buff,
"%ld,%f,%f", index_, sign_, threshold_ );
500 void deserialize(
const std::string &data )
502 sscanf( data.c_str( ),
"%ld,%lf,%lf", &index_, &sign_, &threshold_ );
524 ave_[ 0 ] = ave_[ 1 ] = 0.0;
525 sig_[ 0 ] = sig_[ 1 ] = 1.0;
531 memcpy( ave_, w.ave_,
sizeof(
double ) * 2 );
532 memcpy( sig_, w.sig_,
sizeof(
double ) * 2 );
540 memcpy( ave_, other.ave_,
sizeof(
double ) * 2 );
541 memcpy( sig_, other.sig_,
sizeof(
double ) * 2 );
542 index_ = other.index_;
554 template <
template <
typename,
typename >
class FEATURE_LIST,
template <
typename,
typename >
class CATEGORY_LIST,
class Allocator1,
class Allocator2 >
555 bool learn(
const FEATURE_LIST< feature_type, Allocator1 > & features,
const CATEGORY_LIST< bool, Allocator2 > &categories )
557 if( features.empty( ) )
562 double _minimum_classification_error_ = 1.0e100;
565 double overall_sum_of_positive_weights = 0.0;
566 double overall_sum_of_negative_weights = 0.0;
567 for(
size_type i = 0 ; i < features.size( ) ; i++ )
570 if( categories[ i ] )
572 overall_sum_of_positive_weights += f.
weight;
576 overall_sum_of_negative_weights += f.
weight;
580 double max_sigma = -1.0;
581 int nfeatures =
static_cast< int >( features[ 0 ].size( ) );
584 #pragma omp parallel for firstprivate( nfeatures ) schedule( guided )
585 for(
int index = 0 ; index < nfeatures ; index++ )
587 double ave[ 2 ] = { 0.0, 0.0 };
588 double sig[ 2 ] = { 0.0, 0.0 };
590 for(
size_type i = 0 ; i < features.size( ) ; i++ )
593 if( categories[ i ] )
595 ave[ 0 ] += f.
weight * f[ index ];
599 ave[ 1 ] += f.
weight * f[ index ];
603 ave[ 0 ] /= overall_sum_of_positive_weights;
604 ave[ 1 ] /= overall_sum_of_negative_weights;
606 for(
size_type i = 0 ; i < features.size( ) ; i++ )
609 if( categories[ i ] )
611 sig[ 0 ] += f.
weight * ( f[ index ] - ave[ 0 ] ) * ( f[ index ] - ave[ 0 ] );
615 sig[ 1 ] += f.
weight * ( f[ index ] - ave[ 1 ] ) * ( f[ index ] - ave[ 1 ] );
619 sig[ 0 ] /= overall_sum_of_positive_weights;
620 sig[ 1 ] /= overall_sum_of_negative_weights;
623 for(
size_t i = 0 ; i < features.size( ) ; i++ )
628 if( evaluate( f, index, ave, sig ) != categories[ i ] )
637 double V1 = overall_sum_of_positive_weights * overall_sum_of_negative_weights * ( ave[ 0 ] - ave[ 1 ] ) * ( ave[ 0 ] - ave[ 1 ] );
638 double V2 = ( overall_sum_of_positive_weights + overall_sum_of_negative_weights ) * ( overall_sum_of_positive_weights * sig[ 0 ] + overall_sum_of_negative_weights * sig[ 1 ] );
644 if( _minimum_classification_error_ >= e && sigma > max_sigma )
646 _minimum_classification_error_ = e;
648 memcpy( ave_, ave,
sizeof(
double ) * 2 );
649 memcpy( sig_, sig,
sizeof(
double ) * 2 );
662 template <
class FEATURE >
663 bool operator ()(
const FEATURE &f )
const
665 return( evaluate( f ) );
672 template <
class FEATURE >
673 bool evaluate(
const FEATURE &f )
const
675 return( evaluate( f, index_, ave_, sig_ ) );
679 template <
class FEATURE >
680 bool evaluate(
const FEATURE &f,
size_type indx,
const double ave[ 2 ],
const double sig[ 2 ] )
const
682 double v0 = f[ indx ] - ave[ 0 ];
683 double v1 = f[ indx ] - ave[ 1 ];
684 return( v0 * v0 * sig[ 1 ] <= v1 * v1 * sig[ 0 ] );
688 const std::string serialize( )
const
691 sprintf( buff,
"%ld,%f,%f,%f,%f", index_, ave_[ 0 ], ave_[ 1 ], sig_[ 0 ], sig_[ 1 ] );
696 void deserialize(
const std::string &data )
698 sscanf( data.c_str( ),
"%ld,%lf,%lf,%lf,%lf", &index_, &ave_[ 0 ], &ave_[ 1 ], &sig_[ 0 ], &sig_[ 1 ] );
711 _MIST_CONST(
size_type, __number_of_bins__, 100 );
714 double hist1_[ __number_of_bins__ ];
715 double hist2_[ __number_of_bins__ ];
724 memset( hist1_, 0,
sizeof(
double ) * __number_of_bins__ );
725 memset( hist2_, 0,
sizeof(
double ) * __number_of_bins__ );
731 memcpy( hist1_, w.hist1_,
sizeof(
double ) * __number_of_bins__ );
732 memcpy( hist2_, w.hist2_,
sizeof(
double ) * __number_of_bins__ );
740 memcpy( hist1_, other.hist1_,
sizeof(
double ) * __number_of_bins__ );
741 memcpy( hist2_, other.hist2_,
sizeof(
double ) * __number_of_bins__ );
742 index_ = other.index_;
756 template <
template <
typename,
typename >
class FEATURE_LIST,
template <
typename,
typename >
class CATEGORY_LIST,
class Allocator1,
class Allocator2 >
757 bool learn(
const FEATURE_LIST< feature_type, Allocator1 > & features,
const CATEGORY_LIST< bool, Allocator2 > &categories )
759 if( features.empty( ) )
764 double _minimum_classification_error_ = 1.0e100;
765 int nfeatures =
static_cast< int >( features[ 0 ].size( ) );
768 #pragma omp parallel for firstprivate( nfeatures ) schedule( guided )
769 for(
int index = 0 ; index < nfeatures ; index++ )
771 double min = features[ 0 ][ index ];
774 for(
size_type i = 0 ; i < features.size( ) ; i++ )
777 if( min > f[ index ] )
781 else if( max < f[ index ] )
787 double hist1[ __number_of_bins__ ];
788 double hist2[ __number_of_bins__ ];
789 for(
size_type i = 0 ; i < __number_of_bins__ ; i++ )
791 hist1[ i ] = hist2[ i ] = 0.0;
794 for(
size_type i = 0 ; i < features.size( ) ; i++ )
797 int bin = ( int )( ( f[ index ] - min ) * __number_of_bins__ / ( max - min + 1 ) + 0.5 );
802 else if( bin >= __number_of_bins__ )
804 bin = __number_of_bins__ - 1;
807 if( categories[ i ] )
818 for(
size_t i = 0 ; i < features.size( ) ; i++ )
823 if( evaluate( f, index, hist1, hist2, min, max ) != categories[ i ] )
832 if( _minimum_classification_error_ >= e )
834 _minimum_classification_error_ = e;
838 memcpy( hist1_, hist1,
sizeof(
double ) * __number_of_bins__ );
839 memcpy( hist2_, hist2,
sizeof(
double ) * __number_of_bins__ );
852 template <
class FEATURE >
853 bool operator ()(
const FEATURE &f )
const
855 return( evaluate( f ) );
862 template <
class FEATURE >
863 bool evaluate(
const FEATURE &f )
const
865 return( evaluate( f, index_, hist1_, hist2_, min_, max_ ) );
869 template <
class FEATURE >
870 bool evaluate(
const FEATURE &f,
size_type indx,
const double hist1[ ],
const double hist2[ ],
double min,
double max )
const
872 int bin = ( int )( ( f[ indx ] - min ) * __number_of_bins__ / ( max - min + 1 ) + 0.5 );
877 else if( bin >= __number_of_bins__ )
879 bin = __number_of_bins__ - 1;
881 return( hist1[ bin ] <= hist2[ bin ] );
885 const std::string serialize( )
const
894 void deserialize(
const std::string & )
901 template <
typename __WEAK_CLASSIFIER__ = threshold_
classifier >
912 std::vector< std::string > categories_;
913 std::vector< weak_classifier_type > weak_classifiers_;
914 std::vector< std::vector< bool > > code_word_;
915 std::vector< double > alpha_;
916 std::vector< double > beta_;
925 classifier(
const classifier &cls ) : categories_( cls.categories_ ), weak_classifiers_( cls.weak_classifiers_ ), code_word_( cls.code_word_ ), alpha_( cls.alpha_ ), beta_( cls.beta_ )
934 categories_ = other.categories_;
935 weak_classifiers_ = other.weak_classifiers_;
936 code_word_ = other.code_word_;
937 alpha_ = other.alpha_;
946 const std::vector< weak_classifier_type > &weak_classifiers( )
const
948 return( weak_classifiers_ );
952 std::vector< weak_classifier_type > &weak_classifiers( )
954 return( weak_classifiers_ );
958 const std::vector< std::string > &categories( )
const
960 return( categories_ );
964 std::vector< std::string > &categories( )
966 return( categories_ );
970 const std::vector< std::vector< bool > > &code_word( )
const
972 return( code_word_ );
976 std::vector< std::vector< bool > > &code_word( )
978 return( code_word_ );
982 const std::vector< double > &alpha( )
const
988 std::vector< double > &alpha( )
994 const std::vector< double > &beta( )
const
1000 std::vector< double > &beta( )
1013 template <
template <
typename,
typename >
class FEATURE_LIST,
class Allocator >
1014 bool learn( FEATURE_LIST< feature_type, Allocator > & features,
size_type number_of_iterations )
1016 if( features.empty( ) )
1018 std::cerr <<
"特徴データが指定されていません." << std::endl;
1021 else if( number_of_iterations == 0 )
1024 number_of_iterations++;
1028 weak_classifiers_.clear( );
1029 categories_.clear( );
1030 code_word_.clear( );
1036 std::map< std::string, size_type > category_map;
1037 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1039 category_map[ features[ i ].category ] = 0;
1043 categories_.reserve( category_map.size( ) );
1046 std::map< std::string, size_type >::iterator ite = category_map.begin( );
1047 for( ; ite != category_map.end( ) ; ++ite )
1049 ite->second = categories_.size( );
1050 categories_.push_back( ite->first );
1055 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1059 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1061 if( l != category_index )
1063 D( i, l ) = 1.0 /
static_cast< double >( features.size( ) * ( categories_.size( ) - 1 ) );
1072 std::vector< size_type > fcatemap( features.size( ) );
1073 std::vector< bool > fcategories( features.size( ) );
1074 std::vector< double > rpweight( categories_.size( ) );
1075 for(
size_type i = 0 ; i < fcatemap.size( ) ; i++ )
1077 fcatemap[ i ] =
static_cast< typename std::vector< size_type >::value_type
>( category_map[ features[ i ].category ] );
1080 #if defined( __ONE_PER_CLASS_CODE_WORD__ ) && __ONE_PER_CLASS_CODE_WORD__ == 1
1082 size_type nhypothesis = categories_.size( );
1083 #elif defined( __RANDOM_CODE_WORD__ ) && __RANDOM_CODE_WORD__ == 1
1086 size_type nhypothesis = categories_.size( );
1093 for(
size_type t = 0 ; t < number_of_iterations ; t++ )
1096 code_word_.push_back( std::vector< bool >( categories_.size( ) ) );
1097 std::vector< bool > &myu = code_word_.back( );
1099 #if defined( __ONE_PER_CLASS_CODE_WORD__ ) && __ONE_PER_CLASS_CODE_WORD__ == 1
1100 myu[ t % nhypothesis ] =
true;
1101 #elif defined( __RANDOM_CODE_WORD__ ) && __RANDOM_CODE_WORD__ == 1
1103 std::vector< pair< double, size_type > > list( nhypothesis );
1104 for(
size_type i = 0 ; i < list.size( ) ; i++ )
1106 list[ i ].key = rnd.
real1( );
1107 list[ i ].value = i;
1109 std::sort( list.begin( ), list.end( ) );
1110 for(
size_type i = 0 ; i < list.size( ) ; i++ )
1112 myu[ list[ i ].value ] = i < list.size( ) / 2;
1116 if( categories_.size( ) == 2 )
1118 myu[ 0 ] = ( t % 2 ) == 0;
1119 myu[ 1 ] = !myu[ 0 ];
1126 for(
size_type r = categories_.size( ) - 1 ; r > 0 ; r-- )
1128 bool b = ( val & 1 ) != 0;
1136 val = nhypothesis++;
1137 for(
size_type r = categories_.size( ) - 1 ; r > 0 ; r-- )
1139 myu[ r ] = ( val & 1 ) != 0;
1146 #if defined( __DEBUG_OUTPUT_LEVEL__ ) && __DEBUG_OUTPUT_LEVEL__ >= 3
1148 for(
size_type i = 0 ; i < fcategories.size( ) ; i++ )
1150 std::cout << myu[ fcatemap[ i ] ];
1152 std::cout << std::endl;
1162 for(
size_type i = 0 ; i < fcategories.size( ) ; i++ )
1164 fcategories[ i ] = myu[ fcatemap[ i ] ];
1169 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1174 bool myuY = fcategories[ i ];
1175 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1177 if( myuY != myu[ l ] )
1185 for(
size_type i = 0 ; i < D.rows( ) ; i++ )
1190 bool myuY = fcategories[ i ];
1192 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1194 if( myuY != myu[ l ] )
1205 weak.learn( features, fcategories );
1208 if( m < __NUMBER_OF_INNER_LOOPS__ - 1 )
1210 for(
size_type i = 0 ; i < rpweight.size( ) ; i++ )
1212 rpweight[ i ] = 0.0;
1215 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1220 double val = weak( f ) ? 1.0 : -1.0;
1223 for(
size_type l = 0 ; l < D.cols( ) ; l++ )
1225 tmp += D( i, l ) * val;
1228 rpweight[ fcatemap[ i ] ] += tmp;
1230 for(
size_type l = 0 ; l < D.cols( ) ; l++ )
1232 rpweight[ l ] -= D( i, l ) * val;
1237 std::vector< bool > tmyu( myu );
1238 bool isChanged =
false;
1239 for(
size_type i = 0 ; i < rpweight.size( ) ; i++ )
1241 bool nmyu = rpweight[ i ] >= 0.0;
1242 isChanged = isChanged || nmyu != tmyu[ i ];
1255 for( ; idx < tmyu.size( ) ; idx++ )
1257 if( tmyu[ 0 ] != tmyu[ idx ] )
1263 if( idx < tmyu.size( ) )
1276 #if defined( __DEBUG_OUTPUT_LEVEL__ ) && __DEBUG_OUTPUT_LEVEL__ >= 3
1278 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1283 std::cout << weak.evaluate( f );
1286 std::cout << std::endl;
1289 #if defined( __ASYMMETRIC_WEIGHTING__ ) && __ASYMMETRIC_WEIGHTING__ == 1
1291 const double eps = 1.0e-16;
1297 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1302 bool uY = fcategories[ i ];
1303 bool hX = weak( f );
1308 else if( hX && !uY )
1312 else if( !hX && !uY )
1323 double alpha = 0.5 * std::log( h1u1 / h1u0 );
1324 double beta = -0.5 * std::log( h0u0 / h0u1 );
1327 const double eps = 1.0e-16;
1328 double positives = eps;
1329 double negatives = eps;
1331 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1336 bool uY = fcategories[ i ];
1337 bool hX = weak( f );
1349 double alpha = 0.5 * std::log( positives / negatives );
1350 double beta = -alpha;
1353 alpha_.push_back( alpha );
1354 beta_.push_back( beta );
1357 for(
size_type i = 0 ; i < features.size( ) ; i++ )
1362 bool myuY = fcategories[ i ];
1363 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1365 double v = ( myu[ l ] - myuY ) * 0.5;
1375 D( i, l ) *= std::exp( v );
1381 for(
size_type i = 0 ; i < D.size( ) ; i++ )
1386 if( ( ( t + 1 ) % 5 ) == 0 )
1388 double __classification_error__ = error_rate( features );
1390 #if defined( __DEBUG_OUTPUT_LEVEL__ ) && __DEBUG_OUTPUT_LEVEL__ >= 1
1392 std::cout <<
"識別器の学習ループ " << t + 1 <<
" / " << number_of_iterations <<
" が終了しました。";
1393 std::cout <<
"分類誤差: " << __classification_error__ << std::endl;
1396 if( __classification_error__ == 0.0 )
1414 template <
class FEATURE >
1415 const std::string operator ()(
const FEATURE &f )
const
1417 return( evaluate( f ) );
1426 template <
class FEATURE >
1427 const std::string evaluate(
const FEATURE &f )
const
1430 std::vector< double > values( categories_.size( ), 0.0 );
1431 for(
size_type t = 0 ; t < weak_classifiers_.size( ) ; t++ )
1433 const std::vector< bool > &code = code_word_[ t ];
1434 double weight = weak_classifiers_[ t ]( f ) ? alpha_[ t ] : beta_[ t ];
1436 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1438 values[ l ] += code[ l ] * weight;
1443 for(
size_type l = 1 ; l < categories_.size( ) ; l++ )
1445 if( values[ category ] < values[ l ] )
1453 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1456 for(
size_type t = 0 ; t < weak_classifiers_.size( ) ; t++ )
1458 double val = code_word_[ t ][ l ];
1459 if( weak_classifiers_[ t ]( f ) )
1479 return( categories_[ category ] );
1487 template <
class FEATURE >
1490 std::vector< double > values( categories_.size( ), 0.0 );
1491 for(
size_type t = 0 ; t < weak_classifiers_.size( ) ; t++ )
1493 const std::vector< bool > &code = code_word_[ t ];
1494 double weight = weak_classifiers_[ t ]( f ) ? alpha_[ t ] : beta_[ t ];
1496 for(
size_type l = 0 ; l < categories_.size( ) ; l++ )
1498 values[ l ] += code[ l ] * weight;
1503 ranks.reserve( values.size( ) );
1505 for(
size_type l = 0 ; l < values.size( ) ; l++ )
1519 template <
template <
typename,
typename >
class FEATURE_LIST,
class Allocator >
1520 double error_rate(
const FEATURE_LIST< feature_type, Allocator > & features )
const
1522 if( features.empty( ) )
1528 int nfeatures =
static_cast< int >( features.size( ) );
1530 #pragma omp parallel for firstprivate( nfeatures ) reduction( +: error ) schedule( guided )
1531 for(
int i = 0 ; i < nfeatures ; i++ )
1536 std::string ret = evaluate( f );
1537 #if defined( __DEBUG_OUTPUT_LEVEL__ ) && __DEBUG_OUTPUT_LEVEL__ >= 3
1547 #if defined( __DEBUG_OUTPUT_LEVEL__ ) && __DEBUG_OUTPUT_LEVEL__ >= 3
1548 std::cout << std::endl;
1551 return( static_cast< double >( error ) / static_cast< double >( nfeatures ) );
1555 static const char *get_value(
const char *s,
const char *e, std::string &val )
1560 if( s[ 0 ] ==
'\r' )
1562 if( s + 1 != e && s[ 1 ] ==
'\n' )
1571 else if( s[ 0 ] ==
'\n' )
1575 else if( s[ 0 ] ==
' ' || s[ 0 ] ==
'\t' )
1586 const char *ep = sp;
1591 if( s[ 0 ] ==
'\r' )
1593 if( s + 1 != e && s[ 1 ] ==
'\n' )
1603 else if( s[ 0 ] ==
'\n' )
1608 else if( s[ 0 ] ==
',' )
1620 val = std::string( sp, ep );
1623 return( s > e ? e : s );
1635 bool save(
const std::string &filename )
const
1637 FILE *fp = fopen( filename.c_str( ),
"wt" );
1645 fprintf( fp,
"Category = %ld\n", categories_.size( ) );
1646 fprintf( fp,
"Stage = %ld\n", weak_classifiers_.size( ) );
1649 for(
size_type i = 0 ; i < categories_.size( ) ; i++ )
1651 fprintf( fp,
"Class[%ld] : %s\n", i + 1, categories_[ i ].c_str( ) );
1655 for(
size_type i = 0 ; i < code_word_.size( ) ; i++ )
1657 const std::vector< bool > &code = code_word_[ i ];
1658 fprintf( fp,
"%d", code[ 0 ] ? 1: 0 );
1659 for(
size_type l = 1 ; l < code.size( ) ; l++ )
1661 fprintf( fp,
",%d", code[ l ] ? 1: 0 );
1663 fprintf( fp,
"\n" );
1667 for(
size_type i = 0 ; i < weak_classifiers_.size( ) ; i++ )
1670 fprintf( fp,
"%s\n", weak.serialize( ).c_str( ) );
1671 fprintf( fp,
"%f,%f\n", alpha_[ i ], beta_[ i ] );
1687 bool load(
const std::string &filename )
1689 FILE *fp = fopen( filename.c_str( ),
"rt" );
1696 int numClasses = 0, numStages = 0, dmy;
1697 char line[ 4096 ], buff[ 4096 ];
1700 if( fgets( line, 4096, fp ) != NULL )
1702 sscanf( line,
"Category = %d", &numClasses );
1704 if( fgets( line, 4096, fp ) != NULL )
1706 sscanf( line,
"Stage = %d", &numStages );
1710 weak_classifiers_.resize( numStages );
1711 alpha_.resize( numStages );
1712 beta_.resize( numStages );
1713 code_word_.resize( numStages );
1714 categories_.resize( numClasses );
1717 for(
size_type i = 0 ; i < categories_.size( ) ; i++ )
1719 if( fgets( line, 4096, fp ) != NULL )
1721 memset( buff,
'\0', 4096 );
1722 sscanf( line,
"Class[%d] : %s", &dmy, buff );
1723 categories_[ i ] = buff;
1728 for(
size_type i = 0 ; i < code_word_.size( ) ; i++ )
1730 std::vector< bool > &code = code_word_[ i ];
1731 code.resize( numClasses );
1733 if( fgets( line, 4096, fp ) == NULL )
1738 const char *p = line, *ep = line + 4096;
1740 for(
size_type l = 0 ; l < code.size( ) ; l++ )
1743 p = get_value( p, ep, val );
1744 code[ l ] = val ==
"1" ?
true :
false;
1749 for(
size_type i = 0 ; i < weak_classifiers_.size( ) ; i++ )
1751 if( fgets( line, 4096, fp ) != NULL )
1753 weak_classifiers_[ i ].deserialize( line );
1756 if( fgets( line, 4096, fp ) != NULL )
1759 sscanf( line,
"%lf,%lf", &alpha, &beta );
1760 alpha_[ i ] = alpha;
1783 #endif // __INCLUDE_MACHINE_LEARNING__