csv.h
説明を見る。
1 //
2 // Copyright (c) 2003-2011, MIST Project, Nagoya University
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without modification,
6 // are permitted provided that the following conditions are met:
7 //
8 // 1. Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // 2. Redistributions in binary form must reproduce the above copyright notice,
12 // this list of conditions and the following disclaimer in the documentation
13 // and/or other materials provided with the distribution.
14 //
15 // 3. Neither the name of the Nagoya University nor the names of its contributors
16 // may be used to endorse or promote products derived from this software
17 // without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
20 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 // FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
26 // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 //
28 
33 #ifndef __INCLUDE_MIST_CSV__
34 #define __INCLUDE_MIST_CSV__
35 
36 
37 #ifndef __INCLUDE_MIST_CONF_H__
38 #include "../config/mist_conf.h"
39 #endif
40 
41 #ifndef __INCLUDE_MIST_MATRIX__
42 #include "../matrix.h"
43 #endif
44 
45 
46 #include <iostream>
47 #include <string>
48 #include <vector>
49 
50 #if defined( _COMPRESSED_CSV_SUPPORT_ ) && _COMPRESSED_CSV_SUPPORT_ != 0
51  #include <zlib.h>
52 #endif
53 
54 
55 // mist名前空間の始まり
57 
58 
59 namespace __csv_controller__
60 {
61  template < class T >
62  struct csv_data_converter
63  {
64  typedef T value_type;
65 
66  static value_type convert_to( const unsigned char *s, const unsigned char *e )
67  {
68  return( atoi( std::string( s, e ).c_str( ) ) );
69  }
70 
71  static std::string convert_from( const value_type &val )
72  {
73  static char buff[ 256 ];
74  sprintf( buff, "%d", val );
75  return( buff );
76  }
77  };
78 
79  template < >
80  struct csv_data_converter< float >
81  {
82  typedef float value_type;
83 
84  static value_type convert_to( const unsigned char *s, const unsigned char *e )
85  {
86  return( ( float )atof( std::string( s, e ).c_str( ) ) );
87  }
88 
89  static std::string convert_from( const value_type &val )
90  {
91  static char buff[ 256 ];
92  sprintf( buff, "%f", val );
93  return( buff );
94  }
95  };
96 
97  template < >
98  struct csv_data_converter< double >
99  {
100  typedef double value_type;
101 
102  static value_type convert_to( const unsigned char *s, const unsigned char *e )
103  {
104  return( atof( std::string( s, e ).c_str( ) ) );
105  }
106 
107  static std::string convert_from( const value_type &val )
108  {
109  static char buff[ 256 ];
110  sprintf( buff, "%f", val );
111  return( buff );
112  }
113  };
114 
115  template < >
116  struct csv_data_converter< std::string >
117  {
118  typedef std::string value_type;
119 
120  static value_type convert_to( const unsigned char *s, const unsigned char *e )
121  {
122  return( std::string( s, e ).c_str( ) );
123  }
124 
125  static std::string convert_from( const value_type &val )
126  {
127  return( val );
128  }
129  };
130 
131  template < class Array >
132  struct csv_controller
133  {
134  typedef typename Array::value_type element_type;
135  typedef typename element_type::value_type value_type;
136  typedef typename Array::size_type size_type;
137  typedef csv_data_converter< value_type > converter;
138 
139  static const unsigned char *get_line( const unsigned char *s, const unsigned char *e, bool &is_empty_line )
140  {
141  const unsigned char *sp = s;
142  const unsigned char *ep = s;
143  while( s < e )
144  {
145  if( s[ 0 ] == '\r' )
146  {
147  if( s + 1 != e && s[ 1 ] == '\n' )
148  {
149  s = s + 2;
150  }
151  else
152  {
153  s++;
154  }
155  break;
156  }
157  else if( s[ 0 ] == '\n' )
158  {
159  s = s + 1;
160  break;
161  }
162 
163  ep++;
164  s++;
165  }
166 
167  // 空行はスキップする
168  if( s < e && sp == ep )
169  {
170  return( get_line( s, e, is_empty_line ) );
171  }
172  else
173  {
174  is_empty_line = sp == ep;
175  return( s > e ? e : s );
176  }
177  }
178 
179  static const unsigned char *get_value( const unsigned char *s, const unsigned char *e, value_type &val, const std::string &separator )
180  {
181  // 先頭の空白(改行やタブを含む)を飛ばす
182  while( s < e )
183  {
184  if( s[ 0 ] == '\r' )
185  {
186  if( s + 1 != e && s[ 1 ] == '\n' )
187  {
188  s = s + 2;
189  }
190  else
191  {
192  s++;
193  }
194  }
195  else if( s[ 0 ] == '\n' )
196  {
197  s++;
198  }
199  else if( s[ 0 ] == ' ' || s[ 0 ] == '\t' )
200  {
201  s++;
202  }
203  else
204  {
205  break;
206  }
207  }
208 
209  const unsigned char *sp = s;
210  const unsigned char *ep = sp;
211 
212  // 次にコンマが来る前まで進める
213  while( s < e )
214  {
215  if( s[ 0 ] == '\r' )
216  {
217  if( s + 1 != e && s[ 1 ] == '\n' )
218  {
219  s = s + 2;
220  }
221  else
222  {
223  s++;
224  }
225  break;
226  }
227  else if( s[ 0 ] == '\n' )
228  {
229  s++;
230  break;
231  }
232  else
233  {
234  bool isFound = false;
235  for( size_type i = 0 ; i < separator.size( ) ; i++ )
236  {
237  if( s[ 0 ] == separator[ i ] )
238  {
239  isFound = true;
240  break;
241  }
242  }
243 
244  if( isFound )
245  {
246  s++;
247  break;
248  }
249  }
250 
251  ep++;
252  s++;
253  }
254 
255  if( sp < ep )
256  {
257  val = converter::convert_to( sp, ep );
258  }
259 
260  return( s > e ? e : s );
261  }
262 
263  static bool convert_from_csv_data( Array &csv, const unsigned char *buff, size_type len, const std::string &separator )
264  {
265  const unsigned char *p = buff;
266  const unsigned char *e = buff + len;
267 
268  while( p < e )
269  {
270  bool is_empty_line = false;
271  const unsigned char *np = get_line( p, e, is_empty_line );
272 
273  if( is_empty_line )
274  {
275  p = np;
276  continue;
277  }
278 
279  element_type element;
280  while( p < np )
281  {
282  value_type val;
283  p = get_value( p, np, val, separator );
284  element.push_back( val );
285  }
286 
287  p = np;
288 
289  if( csv.size( ) == 0 )
290  {
291  csv.push_back( element );
292  }
293  else if( csv[ csv.size( ) - 1 ].size( ) == element.size( ) )
294  {
295  csv.push_back( element );
296  }
297  else
298  {
299  return( false );
300  }
301  }
302 
303  return( true );
304  }
305 
306  static bool read( Array &csv, const std::string &filename, const std::string &separator )
307  {
308 #if defined( _COMPRESSED_CSV_SUPPORT_ ) && _COMPRESSED_CSV_SUPPORT_ != 0
309  gzFile fp;
310  if( ( fp = gzopen( filename.c_str( ), "rb" ) ) == NULL )
311  {
312  return( false );
313  }
314 #else
315  FILE *fp;
316  if( ( fp = fopen( filename.c_str( ), "rb" ) ) == NULL )
317  {
318  return( false );
319  }
320 #endif
321 
322  size_type numBytes = 4096;
323  unsigned char *buff = new unsigned char[ numBytes ];
324  unsigned char *sp = buff;
325  ptrdiff_t read_size = 0;
326 
327  bool ret = true;
328 #if defined( _COMPRESSED_CSV_SUPPORT_ ) && _COMPRESSED_CSV_SUPPORT_ != 0
329  while( gzeof( fp ) == 0 )
330 #else
331  while( feof( fp ) == 0 )
332 #endif
333  {
334  ptrdiff_t restBytes = ( buff + numBytes ) - sp;
335 #if defined( _COMPRESSED_CSV_SUPPORT_ ) && _COMPRESSED_CSV_SUPPORT_ != 0
336  read_size = gzread( fp, ( void * )sp, static_cast< unsigned int >( sizeof( unsigned char ) * restBytes ) );
337 #else
338  read_size = fread( ( void * )sp, sizeof( unsigned char ), static_cast< unsigned int >( restBytes ), fp );
339 #endif
340 
341  unsigned char *eep = sp + read_size;
342  if( eep < buff + numBytes )
343  {
344  ret = convert_from_csv_data( csv, buff, sp + read_size - buff, separator );
345  break;
346  }
347 
348  unsigned char *e = sp;
349  unsigned char *ep = eep + 1;
350  for( ; e < eep ; e++ )
351  {
352  if( e[ 0 ] == '\r' )
353  {
354  if( e < e && e[ 1 ] == '\n' )
355  {
356  e += 2;
357  ep = e;
358  }
359  else
360  {
361  e++;
362  ep = e;
363  }
364  }
365  else if( e[ 0 ] == '\n' )
366  {
367  e++;
368  ep = e;
369  }
370  else
371  {
372  e++;
373  }
374  }
375 
376  if( ep > eep )
377  {
378  // 一行分のデータを読み込めなかったのでテンポラリ領域を拡張する
379  unsigned char *tmp = new unsigned char[ numBytes * 2 ];
380  memcpy( tmp, buff, sizeof( unsigned char ) * numBytes );
381  delete [] buff;
382  buff = tmp;
383  sp = buff + numBytes;
384  numBytes *= 2;
385  }
386  else
387  {
388  if( !convert_from_csv_data( csv, buff, ep - buff, separator ) )
389  {
390  ret = false;
391  break;
392  }
393 
394  unsigned char *s1 = buff;
395  unsigned char *s2 = ep;
396  while( s2 < eep )
397  {
398  *s1++ = *s2++;
399  }
400 
401  sp = buff + ( eep - ep );
402  }
403  }
404 
405 #if defined( _COMPRESSED_CSV_SUPPORT_ ) && _COMPRESSED_CSV_SUPPORT_ != 0
406  gzclose( fp );
407 #else
408  fclose( fp );
409 #endif
410 
411  delete [] buff;
412  return( ret );
413  }
414  };
415 }
416 
417 
428 
429 
442 template < class Array >
443 bool read_csv( Array &csv, const std::string &filename, const std::string &separator =", " )
444 {
445  // データをクリアする
446  csv.clear( );
447  return( __csv_controller__::csv_controller< Array >::read( csv, filename, separator ) );
448 }
449 
450 
463 template < class Array >
464 bool read_csv( Array &csv, const std::wstring &filename, const std::wstring &separator =", " )
465 {
466  return( read_csv( csv, wstr2str( filename ), separator ) );
467 }
468 
469 
482 template < class T, class Allocator >
483 bool read_csv( matrix< T, Allocator > &csv, const std::string &filename, const std::string &separator =", " )
484 {
485  typedef typename matrix< T, Allocator >::size_type size_type;
486  typedef std::vector< std::vector< T > > csv_data_type;
487 
488  csv_data_type csv_data;
489  if( read_csv( csv_data, filename, separator ) && csv_data.size( ) > 0 && csv_data[ 0 ].size( ) > 0 )
490  {
491  csv.resize( csv_data.size( ), csv_data[ 0 ].size( ) );
492 
493  for( size_type r = 0 ; r < csv_data.size( ) ; r++ )
494  {
495  typename csv_data_type::value_type &data = csv_data[ r ];
496 
497  for( size_type c = 0 ; c < data.size( ) ; c++ )
498  {
499  csv( r, c ) = data[ c ];
500  }
501  }
502 
503  return( true );
504  }
505 
506  return( false );
507 }
508 
521 template < class T, class Allocator >
522 bool read_csv( matrix< T, Allocator > &csv, const std::wstring &filename, const std::string &separator =", " )
523 {
524  return( read_csv( csv, wstr2str( filename ), separator ) );
525 }
526 
528 // CSVデータ入出力グループの終わり
529 
530 
531 // mist名前空間の終わり
532 _MIST_END
533 
534 
535 #endif // __INCLUDE_MIST_CSV__
536 

Generated on Wed Nov 12 2014 19:44:13 for MIST by doxygen 1.8.1.2