mixture.h
説明を見る。
1 //
2 // Copyright (c) 2003-2011, MIST Project, Nagoya University
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without modification,
6 // are permitted provided that the following conditions are met:
7 //
8 // 1. Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // 2. Redistributions in binary form must reproduce the above copyright notice,
12 // this list of conditions and the following disclaimer in the documentation
13 // and/or other materials provided with the distribution.
14 //
15 // 3. Neither the name of the Nagoya University nor the names of its contributors
16 // may be used to endorse or promote products derived from this software
17 // without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
20 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 // FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
26 // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 //
28 
36 #ifndef __INCLUDE_MIXTURE__
37 #define __INCLUDE_MIXTURE__
38 
39 
40 #ifndef __INCLUDE_MIST_H__
41 #include "mist.h"
42 #endif
43 
44 #ifndef __INCLUDE_MIST_VECTOR__
45 #include "vector.h"
46 #endif
47 
48 #include <cmath>
49 #include <vector>
50 
51 
52 // mist名前空間の始まり
54 
55 
56 #define EMALGORITHM_DEBUG 0
57 
58 
66 
67 
69 namespace mixture
70 {
72  struct distribution
73  {
74  double weight;
75  double av;
76  double sd;
77 
78  distribution( ) : weight( 1.0 ), av( 0.0 ), sd( 1.0 ){ }
79  };
80 
81  inline std::ostream &operator <<( std::ostream &out, const distribution &a )
82  {
83  out << "( " << a.weight << ", " << a.av << ", " << a.sd << " )";
84  return( out );
85  }
86 
89  {
91 
92  double weight;
94  double v[ 4 ];
95 
96  distribution2( ) : weight( 1.0 )
97  {
98  v[ 0 ] = v[ 3 ] = 1.0;
99  v[ 1 ] = v[ 2 ] = 0.0;
100  }
101  };
102 
103  inline std::ostream &operator <<( std::ostream &out, const distribution2 &a )
104  {
105  out << "( " << a.weight << ", " << a.av << ", < " << a.v[ 0 ] << ", " << a.v[ 1 ] << ", " << a.v[ 2 ] << ", " << a.v[ 3 ] << " > )";
106  return( out );
107  }
108 
109  // dpで与えられる正規分布の(x,y)における値を返す。
110  inline double gauss( const mist::mixture::distribution2 &dp, double x, double y )
111  {
112  double t = dp.v[ 0 ] * dp.v[ 3 ] - dp.v[ 1 ] * dp.v[ 2 ];
113  double a = dp.v[ 3 ] / t;
114  double b = -dp.v[ 1 ] / t;
115  double c = -dp.v[ 2 ] / t;
116  double d = dp.v[ 0 ] / t;
117  x -= dp.av.x;
118  y -= dp.av.y;
119  const double pi = 3.1415926535897932384626433832795;
120  const double _2pi = 2.0 * pi;
121  double vvv = ( a * x + b * y ) * x + ( c * x + d * y ) * y;
122  return ( 1.0 / ( _2pi * sqrt( t ) ) * std::exp( - vvv / 2.0 ) );
123  }
124 
125  inline double gauss( const mist::mixture::distribution &dp, double x )
126  {
127  const double pi = 3.1415926535897932384626433832795;
128  const double _2pi = std::sqrt( 2.0 * pi );
129  double myu = x - dp.av;
130  return ( 1.0 / ( _2pi * dp.sd ) * std::exp( - myu * myu / ( 2.0 * dp.sd * dp.sd ) ) );
131  }
132 }
133 
134 
150 template < class Array >
151 bool estimate_mixture( const Array &rSamples, mixture::distribution *opdp, size_t nSamples, size_t nComponents, size_t nMaxIteration, double tolerance, size_t &nIteration )
152 {
153  if( rSamples.empty( ) || nComponents == 0 )
154  {
155  return( false );
156  }
157 
158  typedef size_t size_type;
159 
160  size_type k, m;
161  //const double pi = atan( 1.0f ) * 4.0f;
162  const double pi = 3.1415926535897932384626433832795;
163  const double _2pi = std::sqrt( 2.0 * pi );
164  const double _log_2pi = std::log( _2pi );
165  double fLastLikelihood = -1.0e30;
166 
167  array2< double > Weight( nSamples, nComponents );
168  std::vector< mixture::distribution > pdp( nComponents );
169 
170 
171 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
172  for( m = 0 ; m < nComponents ; m++ )
173  {
174  std::cerr << pdp[ m ] << std::endl;
175  }
176 #endif
177 
178  // 入力データを作業データにコピーする
179  for( m = 0 ; m < nComponents ; m++ )
180  {
181  pdp[ m ] = opdp[ m ];
182  }
183 
184  // 初期分布データの重みの和を1に正規化する
185  {
186  double tmp = 0.0;
187  for( m = 0 ; m < nComponents ; m++ )
188  {
189  tmp += pdp[ m ].weight;
190  }
191 
192  if( tmp <= 0.0 )
193  {
194  return( false );
195  }
196 
197  for( m = 0 ; m < nComponents ; m++ )
198  {
199  pdp[ m ].weight /= tmp;
200  }
201  }
202 
203  // EMアルゴリズムの開始
204  for( nIteration = 0 ; nIteration < nMaxIteration ; nIteration++ )
205  {
206  // E-step
207  for( k = 0 ; k < nSamples ; k++ )
208  {
209  double tmp = 0.0;
210 
211  for( m = 0 ; m < nComponents ; m++ )
212  {
213  double myu = rSamples[ k ] - pdp[ m ].av;
214  double v = pdp[ m ].weight * std::exp( - myu * myu / ( 2.0 * pdp[ m ].sd * pdp[ m ].sd ) ) / pdp[ m ].sd;
215  Weight( k, m ) = v;
216  tmp += v;
217  }
218 
219  if( tmp == 0.0 )
220  {
221  // 重みの合計が1にならないエラー
222  return( false );
223  }
224  else
225  {
226  for( size_type m = 0 ; m < nComponents ; m++ )
227  {
228  Weight( k, m ) /= tmp;
229  }
230  }
231  }
232 
233  // M-step
234  for( m = 0 ; m < nComponents ; m++ )
235  {
236  double weight_sum = 0;
237  double average = 0;
238  double variance = 0;
239 
240  for( k = 0 ; k < nSamples ; k++ )
241  {
242  weight_sum += Weight( k, m );
243  average += static_cast< double >( rSamples[ k ] ) * Weight( k, m );
244  }
245 
246  if( weight_sum > 0.0 )
247  {
248  pdp[ m ].weight = weight_sum / static_cast< double >( nSamples );
249  pdp[ m ].av = average / weight_sum;
250 
251  for( k = 0 ; k < nSamples ; k++ )
252  {
253  double myu = rSamples[ k ] - pdp[ m ].av;
254  variance += Weight( k, m ) * myu * myu;
255  }
256 
257  variance /= weight_sum;
258  }
259  else
260  {
261  // 重みの合計が1にならないエラー
262  return( false );
263  }
264 
265 
266  pdp[ m ].sd = std::sqrt( variance );
267  }
268 
269 
270  double weight_sum = 0;
271  for( m = 0 ; m < nComponents ; m++ )
272  {
273  weight_sum += pdp[ m ].weight;
274  }
275 
276  if( std::abs( weight_sum - 1.0 ) > 0.1 )
277  {
278  // 重みの合計が1にならないエラー
279  return( false );
280  }
281 
282  double fLikelihood = 0.0;
283 
284  for( k = 0 ; k < nSamples ; k++ )
285  {
286  double tmp = 0.0;
287 
288  for( m = 0 ; m < nComponents ; m++ )
289  {
290  double myu = rSamples[ k ] - pdp[ m ].av;
291  tmp += pdp[ m ].weight * std::exp( - myu * myu / ( 2.0 * pdp[ m ].sd * pdp[ m ].sd ) ) / pdp[ m ].sd;
292  //tmp += Weight( k, m ) * pdp[ m ].weight * std::exp( - myu * myu / ( 2.0 * pdp[ m ].sd * pdp[ m ].sd ) ) / pdp[ m ].sd;
293  }
294 
295  if( tmp == 0.0 )
296  {
297  return( false );
298  }
299 
300  fLikelihood += std::log( tmp ) - _log_2pi;
301  }
302 
303 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
304  for( m = 0 ; m < nComponents ; m++ )
305  {
306  std::cerr << pdp[ m ] << std::endl;
307  }
308 #elif defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 2
309  printf( "%f = ( %f, %f, %f )\n", fLikelihood, pdp[ 0 ].weight, pdp[ 0 ].av, pdp[ 0 ].sd );
310 #endif
311 
312  if( fLastLikelihood >= fLikelihood || 2.0 * std::abs( fLastLikelihood - fLikelihood ) < tolerance * ( std::abs( fLastLikelihood ) + std::abs( fLikelihood ) ) )
313  {
314  break;
315  }
316 
317  // 出力に作業データを反映させる
318  for( m = 0 ; m < nComponents ; m++ )
319  {
320  opdp[ m ] = pdp[ m ];
321  }
322 
323  fLastLikelihood = fLikelihood;
324  }
325 
326  return( true );
327 }
328 
329 
346 template < class Array >
347 bool estimate_mixture( const Array &rSamples, mixture::distribution2 *opdp, size_t nSamples, size_t nComponents, size_t nMaxIteration, double tolerance, size_t &nIteration )
348 {
349  if( rSamples.empty( ) || nComponents == 0 )
350  {
351  return( false );
352  }
353 
354  typedef size_t size_type;
355 
356  size_type k, m;
357  //const double pi = atan( 1.0f ) * 4.0f;
358  const double pi = 3.1415926535897932384626433832795;
359  const double _2pi = 2.0 * pi;
360  const double _log_2pi = std::log( _2pi );
361  double fLastLikelihood = -1.0e30;
362 
363  array2< double > Weight( nSamples, nComponents );
364  std::vector< mixture::distribution2 > pdp( nComponents );
365 
366 
367 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
368  for( m = 0 ; m < nComponents ; m++ )
369  {
370  std::cerr << pdp[ m ] << std::endl;
371  }
372 #endif
373 
374 
375  // 入力データを作業データにコピーする
376  for( m = 0 ; m < nComponents ; m++ )
377  {
378  pdp[ m ] = opdp[ m ];
379  }
380 
381 
382  // 初期分布データの重みの和を1に正規化する
383  {
384  double tmp = 0.0;
385  for( m = 0 ; m < nComponents ; m++ )
386  {
387  tmp += pdp[ m ].weight;
388  }
389 
390  if( tmp <= 0.0 )
391  {
392  return( false );
393  }
394 
395  for( m = 0 ; m < nComponents ; m++ )
396  {
397  pdp[ m ].weight /= tmp;
398  }
399  }
400 
401  // EMアルゴリズムの開始
402  for( nIteration = 0 ; nIteration < nMaxIteration ; nIteration++ )
403  {
404  // E-step
405 
406  for( k = 0 ; k < nSamples ; k++ )
407  {
408  double tmp = 0.0;
409 
410  for( m = 0 ; m < nComponents ; m++ )
411  {
412  double t = pdp[ m ].v[ 0 ] * pdp[ m ].v[ 3 ] - pdp[ m ].v[ 1 ] * pdp[ m ].v[ 2 ];
413  double a = pdp[ m ].v[ 3 ];
414  double b = -pdp[ m ].v[ 1 ];
415  double c = -pdp[ m ].v[ 2 ];
416  double d = pdp[ m ].v[ 0 ];
417  double x = rSamples[ k ].x - pdp[ m ].av.x;
418  double y = rSamples[ k ].y - pdp[ m ].av.y;
419  double vvv = ( ( a * x + b * y ) * x + ( c * x + d * y ) * y ) / t;
420  double v = pdp[ m ].weight * ( 1.0 / sqrt( t ) ) * std::exp( - vvv / 2.0 );
421  Weight( k, m ) = v;
422  tmp += v;
423  }
424  if( tmp == 0.0 )
425  {
426  // 重みの合計が1にならないエラー
427  return( false );
428  }
429  else
430  {
431  for( size_type m = 0 ; m < nComponents ; m++ )
432  {
433  Weight( k, m ) /= tmp;
434  }
435  }
436  }
437 
438  // M-step
439  for( m = 0 ; m < nComponents ; m++ )
440  {
441  double weight_sum = 0;
442  vector2< double > average( 0, 0 );
443  double v1 = 0;
444  double v2 = 0;
445  double v3 = 0;
446 
447  for( k = 0 ; k < nSamples ; k++ )
448  {
449  weight_sum += Weight( k, m );
450  average += rSamples[ k ] * Weight( k, m );
451  }
452 
453  if( weight_sum > 0.0 )
454  {
455  pdp[ m ].weight = weight_sum / static_cast< double >( nSamples );
456  pdp[ m ].av = average / weight_sum;
457 
458  for( k = 0 ; k < nSamples ; k++ )
459  {
460  double w = Weight( k, m );
461  double xx = rSamples[ k ].x - pdp[ m ].av.x;
462  double yy = rSamples[ k ].y - pdp[ m ].av.y;
463  v1 += w * xx * xx;
464  v2 += w * yy * yy;
465  v3 += w * xx * yy;
466  }
467 
468  v1 /= weight_sum;
469  v2 /= weight_sum;
470  v3 /= weight_sum;
471 
472  if( v1 * v2 < v3 * v3 )
473  {
474  v3 = std::sqrt( v1 * v2 ) - 1.0e-10;
475  }
476  }
477  else
478  {
479  // 重みの合計が1にならないエラー
480  return( false );
481  }
482 
483 
484  pdp[ m ].v[ 0 ] = v1;
485  pdp[ m ].v[ 3 ] = v2;
486  pdp[ m ].v[ 1 ] = pdp[ m ].v[ 2 ] = v3;
487  }
488 
489 
490  double weight_sum = 0;
491  for( m = 0 ; m < nComponents ; m++ )
492  {
493  weight_sum += pdp[ m ].weight;
494  }
495 
496  if( std::abs( weight_sum - 1.0 ) > 0.1 )
497  {
498  // 重みの合計が1にならないエラー
499  return( false );
500  }
501 
502  double fLikelihood = 0.0;
503 
504  for( k = 0 ; k < nSamples ; k++ )
505  {
506  double tmp = 0.0;
507 
508  for( m = 0 ; m < nComponents ; m++ )
509  {
510  double t = pdp[ m ].v[ 0 ] * pdp[ m ].v[ 3 ] - pdp[ m ].v[ 1 ] * pdp[ m ].v[ 2 ];
511  double a = pdp[ m ].v[ 3 ];
512  double b = -pdp[ m ].v[ 1 ];
513  double c = -pdp[ m ].v[ 2 ];
514  double d = pdp[ m ].v[ 0 ];
515  double x = rSamples[ k ].x - pdp[ m ].av.x;
516  double y = rSamples[ k ].y - pdp[ m ].av.y;
517  double vvv = ( ( a * x + b * y ) * x + ( c * x + d * y ) * y ) / t;
518  tmp += Weight( k, m ) * pdp[ m ].weight / sqrt( t ) * std::exp( - vvv / 2.0 );
519  }
520 
521  if( tmp == 0.0 )
522  {
523  return( false );
524  }
525 
526  fLikelihood += std::log( tmp ) - _log_2pi;
527  }
528 
529 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
530  for( m = 0 ; m < nComponents ; m++ )
531  {
532  std::cerr << pdp[ m ] << std::endl;
533  }
534 #elif defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 2
535  printf( "%f = ( %f, %f )\n", fLikelihood, pdp[ 0 ].weight, pdp[ 0 ].av );
536 #endif
537 
538  if( fLastLikelihood >= fLikelihood || 2.0 * std::abs( fLastLikelihood - fLikelihood ) < tolerance * ( std::abs( fLastLikelihood ) + std::abs( fLikelihood ) ) )
539  {
540  break;
541  }
542 
543  // 出力に作業データを反映させる
544  for( m = 0 ; m < nComponents ; m++ )
545  {
546  opdp[ m ] = pdp[ m ];
547  }
548 
549  fLastLikelihood = fLikelihood;
550  }
551 
552  return( true );
553 }
554 
555 
570 template < class Array1, class Array2 >
571 bool estimate_mixture( const Array1 &rSamples, Array2 &pdp, typename Array1::size_type nMaxIteration, double tolerance, typename Array1::size_type &nIteration )
572 {
573  return( estimate_mixture( rSamples, &pdp[ 0 ], rSamples.size( ), pdp.size( ), nMaxIteration, tolerance, nIteration ) );
574 }
575 
576 
590 template < class Array1, class Array2 >
591 bool estimate_mixture( const Array1 &rSamples, Array2 &pdp, typename Array1::size_type nMaxIteration, double tolerance )
592 {
593  typename Array1::size_type nIteration = 0;
594  return( estimate_mixture( rSamples, &pdp[ 0 ], rSamples.size( ), pdp.size( ), nMaxIteration, tolerance, nIteration ) );
595 }
596 
597 
611 template < class Array >
612 bool estimate_mixture( const Array &rSamples, mixture::distribution *pdp, typename Array::size_type nComponents, typename Array::size_type nMaxIteration, double tolerance )
613 {
614  size_t nIteration = 0;
615  return( estimate_mixture( rSamples, pdp, rSamples.size( ), nComponents, nMaxIteration, tolerance, nIteration ) );
616 }
617 
618 
633 template < class Array >
634 bool estimate_mixture( const Array &rSamples, mixture::distribution2 *pdp, typename Array::size_type nComponents, typename Array::size_type nMaxIteration, double tolerance )
635 {
636  size_t nIteration = 0;
637  return( estimate_mixture( rSamples, pdp, rSamples.size( ), nComponents, nMaxIteration, tolerance, nIteration ) );
638 }
639 
640 
641 
643 namespace histogram
644 {
662  template < class Array >
663  bool estimate_mixture( const Array &rSamples, mixture::distribution *opdp, size_t nSamples, size_t nComponents, double minimum, double bin, size_t nMaxIteration, double tolerance, size_t &nIteration )
664  {
665  if( rSamples.empty( ) || nComponents == 0 || bin == 0.0 )
666  {
667  return( false );
668  }
669 
670  typedef size_t size_type;
671 
672  size_type k, m;
673 
674  const double pi = 3.1415926535897932384626433832795;
675  const double _2pi = std::sqrt( 2.0 * pi );
676  const double _log_2pi = std::log( _2pi );
677  double fLastLikelihood = -1.0e30;
678  double tmp, number_of_samples;
679 
680  array2< double > Weight( nSamples, nComponents );
681  std::vector< mixture::distribution > pdp( nComponents );
682 
683 
684  // 入力データを作業データにコピーする
685  for( m = 0 ; m < nComponents ; m++ )
686  {
687  pdp[ m ] = opdp[ m ];
688  }
689 
690  // 初期分布データの重みの和を1に正規化する
691  tmp = 0.0;
692  for( m = 0 ; m < nComponents ; m++ )
693  {
694  tmp += pdp[ m ].weight;
695  }
696 
697  if( tmp <= 0.0 )
698  {
699  return( false );
700  }
701 
702  // 平均値を0に設定し,重みの和を1にする
703  for( m = 0 ; m < nComponents ; m++ )
704  {
705  pdp[ m ].weight /= tmp;
706  pdp[ m ].av -= minimum;
707  }
708 
709  for( k = 0, number_of_samples = 0.0 ; k < nSamples ; k++ )
710  {
711  number_of_samples += rSamples[ k ];
712  }
713 
714 
715 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
716  for( m = 0 ; m < nComponents ; m++ )
717  {
718  std::cerr << pdp[ m ] << std::endl;
719  }
720 #endif
721 
722 
723  // EMアルゴリズムの開始
724  for( nIteration = 0 ; nIteration < nMaxIteration ; nIteration++ )
725  {
726  // E-step
727  for( k = 0 ; k < nSamples ; k++ )
728  {
729  if( rSamples[ k ] == 0 )
730  {
731  continue;
732  }
733 
734  tmp = 0.0;
735 
736  for( m = 0 ; m < nComponents ; m++ )
737  {
738  double myu = ( k + 0.5 ) * bin - pdp[ m ].av;
739  double v = pdp[ m ].weight * std::exp( - myu * myu / ( 2.0 * pdp[ m ].sd * pdp[ m ].sd ) ) / pdp[ m ].sd;
740  Weight( k, m ) = v;
741  tmp += v;
742  }
743 
744  if( tmp == 0.0 )
745  {
746  // 重みの合計が1にならないエラー
747  return( false );
748  }
749  else
750  {
751  for( size_type m = 0 ; m < nComponents ; m++ )
752  {
753  Weight( k, m ) /= tmp;
754  }
755  }
756  }
757 
758  // M-step
759  for( m = 0 ; m < nComponents ; m++ )
760  {
761  double weight_sum = 0;
762  double average = 0;
763  double variance = 0;
764 
765  for( k = 0 ; k < nSamples ; k++ )
766  {
767  if( rSamples[ k ] == 0 )
768  {
769  continue;
770  }
771  double w = Weight( k, m ) * rSamples[ k ];
772  weight_sum += w;
773  average += ( k + 0.5 ) * bin * w;
774  }
775 
776  if( weight_sum > 0.0 )
777  {
778  pdp[ m ].weight = weight_sum / number_of_samples;
779  pdp[ m ].av = average / weight_sum;
780 
781  for( k = 0 ; k < nSamples ; k++ )
782  {
783  double myu = ( k + 0.5 ) * bin - pdp[ m ].av;
784  variance += Weight( k, m ) * rSamples[ k ] * myu * myu;
785  }
786 
787  variance /= weight_sum;
788  }
789  else
790  {
791  // 重みの合計が1にならないエラー
792  return( false );
793  }
794 
795  pdp[ m ].sd = std::sqrt( variance );
796  }
797 
798 
799  double weight_sum = 0;
800  for( m = 0 ; m < nComponents ; m++ )
801  {
802  weight_sum += pdp[ m ].weight;
803  }
804 
805  if( std::abs( weight_sum - 1.0 ) > 0.1 )
806  {
807  // 重みの合計が1にならないエラー
808  return( false );
809  }
810 
811  double fLikelihood = 0.0;
812 
813  for( k = 0 ; k < nSamples ; k++ )
814  {
815  if( rSamples[ k ] == 0 )
816  {
817  continue;
818  }
819 
820  tmp = 0.0;
821 
822  for( m = 0 ; m < nComponents ; m++ )
823  {
824  double myu = ( k + 0.5 ) * bin - pdp[ m ].av;
825  tmp += Weight( k, m ) * pdp[ m ].weight * std::exp( - myu * myu / ( 2.0 * pdp[ m ].sd * pdp[ m ].sd ) ) / pdp[ m ].sd;
826  }
827 
828  if( tmp == 0.0 )
829  {
830  return( false );
831  }
832 
833  fLikelihood += rSamples[ k ] * ( std::log( tmp ) - _log_2pi );
834  }
835 
836 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
837  for( m = 0 ; m < nComponents ; m++ )
838  {
839  std::cerr << pdp[ m ] << std::endl;
840  }
841 #elif defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 2
842  printf( "%f = ( %f, %f, %f )\n", fLikelihood, pdp[ 0 ].weight, pdp[ 0 ].av, pdp[ 0 ].sd );
843 #endif
844 
845  if( fLastLikelihood >= fLikelihood || 2.0 * std::abs( fLastLikelihood - fLikelihood ) < tolerance * ( std::abs( fLastLikelihood ) + std::abs( fLikelihood ) ) )
846  {
847  break;
848  }
849 
850  // 出力に作業データを反映させる
851  for( m = 0 ; m < nComponents ; m++ )
852  {
853  opdp[ m ] = pdp[ m ];
854  }
855 
856  fLastLikelihood = fLikelihood;
857  }
858 
859 
860  // 平均値を元に戻す
861  for( m = 0 ; m < nComponents ; m++ )
862  {
863  opdp[ m ].av += minimum;
864  }
865 
866  return( true );
867  }
868 
869 
870 
888  template < class T, class Allocator >
889  bool estimate_mixture( const array2< T, Allocator > &rSamples, mixture::distribution2 *opdp, size_t nComponents, double minimum1, double minimum2, double bin, size_t nMaxIteration, double tolerance, size_t &nIteration )
890  {
891  if( rSamples.empty( ) || nComponents == 0 )
892  {
893  return( false );
894  }
895 
896  typedef size_t size_type;
897 
898  size_type i, j, k, m;
899 
900  const double pi = 3.1415926535897932384626433832795;
901  const double _2pi = 2.0 * pi;
902  const double _log_2pi = std::log( _2pi );
903  double fLastLikelihood = -1.0e30;
904  double number_of_samples;
905 
906  array3< double > Weight( rSamples.width( ), rSamples.height( ), nComponents );
907  std::vector< mixture::distribution2 > pdp( nComponents );
908 
909 
910  // 入力データを作業データにコピーする
911  for( m = 0 ; m < nComponents ; m++ )
912  {
913  pdp[ m ] = opdp[ m ];
914  }
915 
916 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
917  for( m = 0 ; m < nComponents ; m++ )
918  {
919  mixture::distribution2 tmp = pdp[ m ];
920  tmp.av.x += minimum1;
921  tmp.av.y += minimum2;
922  std::cerr << tmp << std::endl;
923  }
924 #endif
925 
926  // 初期分布データの重みの和を1に正規化する
927  {
928  double tmp = 0.0;
929  for( m = 0 ; m < nComponents ; m++ )
930  {
931  tmp += pdp[ m ].weight;
932  }
933 
934  if( tmp <= 0.0 )
935  {
936  return( false );
937  }
938 
939  for( m = 0 ; m < nComponents ; m++ )
940  {
941  pdp[ m ].weight /= tmp;
942  pdp[ m ].av.x -= minimum1;
943  pdp[ m ].av.y -= minimum2;
944  }
945 
946  for( k = 0, number_of_samples = 0.0 ; k < rSamples.size( ) ; k++ )
947  {
948  number_of_samples += rSamples[ k ];
949  }
950  }
951 
952 
953  // EMアルゴリズムの開始
954  for( nIteration = 0 ; nIteration < nMaxIteration ; nIteration++ )
955  {
956  // E-step
957 
958  for( j = 0 ; j < rSamples.height( ) ; j++ )
959  {
960  for( i = 0 ; i < rSamples.width( ) ; i++ )
961  {
962  if( rSamples( i, j ) == 0 )
963  {
964  continue;
965  }
966 
967  double tmp = 0.0;
968 
969  for( m = 0 ; m < nComponents ; m++ )
970  {
971  double t = pdp[ m ].v[ 0 ] * pdp[ m ].v[ 3 ] - pdp[ m ].v[ 1 ] * pdp[ m ].v[ 2 ];
972  double a = pdp[ m ].v[ 3 ];
973  double b = -pdp[ m ].v[ 1 ];
974  double c = -pdp[ m ].v[ 2 ];
975  double d = pdp[ m ].v[ 0 ];
976  double x = pdp[ m ].av.x - ( i + 0.5 ) * bin;
977  double y = pdp[ m ].av.y - ( j + 0.5 ) * bin;
978  double vvv = ( ( a * x + b * y ) * x + ( c * x + d * y ) * y ) / t;
979  double v = pdp[ m ].weight * std::exp( - vvv / 2.0 ) / sqrt( t );
980  Weight( i, j, m ) = v;
981  tmp += v;
982  }
983  if( tmp == 0.0 )
984  {
985  // 重みの合計が1にならないエラー
986  return( false );
987  }
988  else
989  {
990  for( size_type m = 0 ; m < nComponents ; m++ )
991  {
992  Weight( i, j, m ) /= tmp;
993  }
994  }
995  }
996  }
997 
998  // M-step
999  for( m = 0 ; m < nComponents ; m++ )
1000  {
1001  double weight_sum = 0;
1002  double avex = 0;
1003  double avey = 0;
1004  double v1 = 0;
1005  double v2 = 0;
1006  double v3 = 0;
1007 
1008  for( j = 0 ; j < rSamples.height( ) ; j++ )
1009  {
1010  for( i = 0 ; i < rSamples.width( ) ; i++ )
1011  {
1012  double w = Weight( i, j, m ) * rSamples( i, j );
1013  weight_sum += w;
1014  avex += static_cast< double >( ( i + 0.5 ) * bin ) * w;
1015  avey += static_cast< double >( ( j + 0.5 ) * bin ) * w;
1016  }
1017  }
1018 
1019  if( weight_sum > 0.0 )
1020  {
1021  pdp[ m ].weight = weight_sum / number_of_samples;
1022  pdp[ m ].av.x = avex / weight_sum;
1023  pdp[ m ].av.y = avey / weight_sum;
1024 
1025  for( j = 0 ; j < rSamples.height( ) ; j++ )
1026  {
1027  for( i = 0 ; i < rSamples.width( ) ; i++ )
1028  {
1029  double w = Weight( i, j, m );
1030  double xx = ( i + 0.5 ) * bin - pdp[ m ].av.x;
1031  double yy = ( j + 0.5 ) * bin - pdp[ m ].av.y;
1032  double num = rSamples( i, j );
1033  v1 += w * xx * xx * num;
1034  v2 += w * yy * yy * num;
1035  v3 += w * xx * yy * num;
1036  }
1037  }
1038 
1039  v1 /= weight_sum;
1040  v2 /= weight_sum;
1041  v3 /= weight_sum;
1042 
1043  if( v1 * v2 < v3 * v3 )
1044  {
1045  v3 = std::sqrt( v1 * v2 ) - 1.0e-10;
1046  }
1047  }
1048  else
1049  {
1050  // 重みの合計が1にならないエラー
1051  return( false );
1052  }
1053 
1054 
1055  pdp[ m ].v[ 0 ] = v1;
1056  pdp[ m ].v[ 3 ] = v2;
1057  pdp[ m ].v[ 1 ] = pdp[ m ].v[ 2 ] = v3;
1058  }
1059 
1060 
1061  double weight_sum = 0;
1062  for( m = 0 ; m < nComponents ; m++ )
1063  {
1064  weight_sum += pdp[ m ].weight;
1065  }
1066 
1067  if( std::abs( weight_sum - 1.0 ) > 0.1 )
1068  {
1069  // 重みの合計が1にならないエラー
1070  return( false );
1071  }
1072 
1073  double fLikelihood = 0.0;
1074 
1075  for( j = 0 ; j < rSamples.height( ) ; j++ )
1076  {
1077  for( i = 0 ; i < rSamples.width( ) ; i++ )
1078  {
1079  if( rSamples( i, j ) == 0 )
1080  {
1081  continue;
1082  }
1083 
1084  double tmp = 0.0;
1085 
1086  for( m = 0 ; m < nComponents ; m++ )
1087  {
1088  double t = pdp[ m ].v[ 0 ] * pdp[ m ].v[ 3 ] - pdp[ m ].v[ 1 ] * pdp[ m ].v[ 2 ];
1089  double a = pdp[ m ].v[ 3 ];
1090  double b = -pdp[ m ].v[ 1 ];
1091  double c = -pdp[ m ].v[ 2 ];
1092  double d = pdp[ m ].v[ 0 ];
1093  double x = pdp[ m ].av.x - ( i + 0.5 ) * bin;
1094  double y = pdp[ m ].av.y - ( j + 0.5 ) * bin;
1095  double vvv = ( ( a * x + b * y ) * x + ( c * x + d * y ) * y ) / t;
1096  tmp += Weight( i, j, m ) * pdp[ m ].weight * std::exp( - vvv / 2.0 ) / std::sqrt( t );
1097  }
1098 
1099  if( tmp == 0.0 )
1100  {
1101  return( false );
1102  }
1103 
1104  fLikelihood += rSamples( i, j ) * ( std::log( tmp ) - _log_2pi );
1105  }
1106  }
1107 
1108 #if defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 1
1109  for( m = 0 ; m < nComponents ; m++ )
1110  {
1111  mixture::distribution2 tmp = pdp[ m ];
1112  tmp.av.x += minimum1;
1113  tmp.av.y += minimum2;
1114  std::cerr << tmp << std::endl;
1115  }
1116 #elif defined( EMALGORITHM_DEBUG ) && EMALGORITHM_DEBUG == 2
1117  printf( "%f = ( %f, %f )\n", fLikelihood, pdp[ 0 ].weight, pdp[ 0 ].av );
1118 #endif
1119 
1120  if( fLastLikelihood >= fLikelihood || 2.0 * std::abs( fLastLikelihood - fLikelihood ) < tolerance * ( std::abs( fLastLikelihood ) + std::abs( fLikelihood ) ) )
1121  {
1122  break;
1123  }
1124 
1125  // 出力に作業データを反映させる
1126  for( m = 0 ; m < nComponents ; m++ )
1127  {
1128  opdp[ m ] = pdp[ m ];
1129  }
1130 
1131  fLastLikelihood = fLikelihood;
1132  }
1133 
1134  for( m = 0 ; m < nComponents ; m++ )
1135  {
1136  opdp[ m ].av.x += minimum1;
1137  opdp[ m ].av.y += minimum2;
1138  }
1139 
1140  return( true );
1141  }
1142 
1143 
1159  template < class Array1, class Array2 >
1160  bool estimate_mixture( const Array1 &rSamples, Array2 &pdp, double minimum, double bin, typename Array1::size_type nMaxIteration, double tolerance, typename Array1::size_type &nIteration )
1161  {
1162  return( histogram::estimate_mixture( rSamples, &pdp[ 0 ], rSamples.size( ), pdp.size( ), minimum, bin, nMaxIteration, tolerance, nIteration ) );
1163  }
1164 
1165 
1180  template < class Array1, class Array2 >
1181  bool estimate_mixture( const Array1 &rSamples, Array2 &pdp, double minimum, double bin, typename Array1::size_type nMaxIteration, double tolerance )
1182  {
1183  typename Array1::size_type nIteration = 0;
1184  return( histogram::estimate_mixture( rSamples, &pdp[ 0 ], rSamples.size( ), pdp.size( ), minimum, bin, nMaxIteration, tolerance, nIteration ) );
1185  }
1186 
1187 
1203  template < class Array >
1204  bool estimate_mixture( const Array &rSamples, mixture::distribution *pdp, typename Array::size_type nComponents, double minimum, double bin, typename Array::size_type nMaxIteration, double tolerance )
1205  {
1206  size_t nIteration = 0;
1207  return( histogram::estimate_mixture( rSamples, pdp, rSamples.size( ), nComponents, minimum, bin, nMaxIteration, tolerance, nIteration ) );
1208  }
1209 
1210 
1211 
1212 
1213 
1214 
1233  template < class T, class Allocator, class Array1 >
1234  bool estimate_mixture( const array2< T, Allocator > &rSamples, Array1 &pdp, double minimum1, double minimum2, double bin, typename Array1::size_type nMaxIteration, double tolerance, typename Array1::size_type &nIteration )
1235  {
1236  return( histogram::estimate_mixture( rSamples, &pdp[ 0 ], pdp.size( ), minimum1, minimum2, bin, nMaxIteration, tolerance, nIteration ) );
1237  }
1238 
1239 
1257  template < class T, class Allocator, class Array1 >
1258  bool estimate_mixture( const array2< T, Allocator > &rSamples, Array1 &pdp, double minimum1, double minimum2, double bin, typename Array1::size_type nMaxIteration, double tolerance )
1259  {
1260  typename array< T, Allocator >::size_type nIteration = 0;
1261  return( histogram::estimate_mixture( rSamples, &pdp[ 0 ], pdp.size( ), minimum1, minimum2, bin, nMaxIteration, tolerance, nIteration ) );
1262  }
1263 
1264 
1283  template < class T, class Allocator >
1284  bool estimate_mixture( const array2< T, Allocator > &rSamples, mixture::distribution2 *pdp, typename array2< T, Allocator >::size_type nComponents, double minimum1, double minimum2, double bin, typename array2< T, Allocator >::size_type nMaxIteration, double tolerance )
1285  {
1286  size_t nIteration = 0;
1287  return( histogram::estimate_mixture( rSamples, pdp, nComponents, minimum1, minimum2, bin, nMaxIteration, tolerance, nIteration ) );
1288  }
1289 }
1290 
1292 // EMアルゴリズムを用いた混合分布の推定グループの終わり
1293 
1294 
1295 
1296 // mist名前空間の終わり
1297 _MIST_END
1298 
1299 
1300 #endif // __INCLUDE_MIXTURE__
1301 

Generated on Wed Nov 12 2014 19:44:20 for MIST by doxygen 1.8.1.2