CARMA C++
Column.h
Go to the documentation of this file.
1 #ifndef CARMA_DBMS_COLUMN_H
2 #define CARMA_DBMS_COLUMN_H
3 
14 #include <algorithm>
15 #include <string>
16 #include <sstream>
17 #include <map>
18 #include <math.h>
19 #include <vector>
22 
23 namespace carma {
24 namespace dbms {
25 
26 
30 template < typename T >
31 class Column : public ::std::vector< T > {
32 
33 public:
34 
39  Column(const std::string& name) {
40  name_ = name;
41  }
42 
46  virtual ~Column() {
47  }
48 
53  std::string getName() const {return name_;}
54 
59  bool hasNulls() const {return ! nullIndices_.empty();}
60 
66  std::vector< unsigned > getNullIndices() const { return nullIndices_; }
67 
71  void push_back_null() {
72  this->push_back(nullValue_);
73  nullIndices_.push_back(this->size()-1);
74  }
75 
81  void setNullValue(const T& nullValue) {
82  nullValue_ = nullValue;
83  for(::size_t i = 0; i < nullIndices_.size(); i++) {
84  (*this)[nullIndices_[i]] = nullValue_;
85  }
86  }
87 
88 
93  inline T getNullValue() const {return nullValue_;}
94 
99  std::string toString() const {
100  std::ostringstream ss;
101  unsigned int nullPos = 0;
102  //for (int i=0; i<data_.size(); i++) {
103  for (::size_t i=0; i<this->size(); i++) {
104  if(nullPos < nullIndices_.size() && i == nullIndices_[nullPos]) {
105  ss << "NULL";
106  nullPos++;
107  } else {
108  //ss << data_[i];
109  ss << (*this)[i];
110  }
111  ss << " ";
112  }
113  return ss.str();
114  }
115 
123  // FIXME brain dead search algorithm which needs to be improved
124  // would probably be OK in most cases though since in CARMA we expect
125  // not to have many nulls in our columns. Ultimately, we probably want a binary
126  // search algorithm
127  bool isElementNull(unsigned index) const {
128  if(index >= this->size()) {
129  std::ostringstream emsg;
130  emsg << "Requested index " << index << " is >= number of elements "
131  << "which is " << this->size();
133  emsg.str());
134  }
135  std::vector< unsigned >::const_iterator iter;
136  for(iter=nullIndices_.begin(); iter!=nullIndices_.end(); iter++) {
137  if(*iter == index) {
138  return true;
139  } else if (*iter > index) {
140  // take advantage of the fact that nullIndices_ is sorted in
141  // ascending order
142  return false;
143  }
144  }
145  return false;
146  }
147 
153  T sum() const {
154  T sum=0;
155  for(::size_t i=0; i<this->size(); i++) {
156  if(!isElementNull(i)) {
157  //sum += data_[i];
158  sum += (*this)[i];
159  }
160  }
161  return sum;
162  }
163 
168  T mean() const {
169  //return sum/(data_.size()-nullIndices_.size());
170  return sum()/(this->size()-nullIndices_.size());
171  }
172 
177  T max() const {
178  T max;
179  // get the first non-null value as the initial test value
180  for(int i=0; i<this->size(); i++) {
181  if(!isElementNull(i)) {
182  max = (*this)[i];
183  break;
184  }
185  }
186  for(int i=0; i<this->size(); i++) {
187  if(!isElementNull(i)) {
188  max = std::max((*this)[i],max);
189  }
190  }
191  return max;
192  }
193 
194 
199  T min() const {
200  T min;
201  for(int i=0; i<this->size(); i++) {
202  if(!isElementNull(i)) {
203  min = (*this)[i];
204  break;
205  }
206  }
207  for(int i=0; i<this->size(); i++) {
208  if(!isElementNull(i)) {
209  min = std::min((*this)[i],min);
210  }
211  }
212  return min;
213  }
214 
223  int indexOf(const T& value) const {
224  typename Column<T>::const_iterator iter;
225  unsigned count = 0;
226  std::vector< unsigned >::const_iterator niter = nullIndices_.begin();
227  bool elementIsNull = false;
228  bool hasNulls = !nullIndices_.empty();
229  for(iter = this->begin(); iter != this->end(); iter++) {
230  // takes advantage of the fact that nullIndices_ is sorted
231  if(hasNulls) {
232  while(niter != nullIndices_.end() && *niter < count) {
233  niter++;
234  }
235  }
236  elementIsNull = (hasNulls) ? (*niter == count) : false;
237  if(!elementIsNull && (*iter == value)) {
238  return count;
239  }
240  count++;
241  }
242  return -1;
243  }
244 
252  Column
253  getDistinctValues( const ::std::string & name = "Distinct Values",
254  bool includeNull = true ) const;
255 
256 protected:
257  std::string name_;
258  std::vector< unsigned > nullIndices_;
259  T nullValue_;
260 
261  // prohibit use of the default constructor
262  Column() {}
263 
264  class ValueMajorIndexMinorOrderingPred {
265  private:
266  const Column & c_;
267 
268  public:
269  explicit ValueMajorIndexMinorOrderingPred( const Column & c );
270 
271  bool operator()( const size_t rhsIndex,
272  const size_t lhsIndex ) const;
273  };
274 };
275 
276 
296  template <typename T, typename U>
297  std::map<T,U> columnsToMap(const carma::dbms::Column<T>& keyColumn,
298  const carma::dbms::Column<U>& valueColumn) {
299  std::ostringstream emsg;
300  if(keyColumn.size() != valueColumn.size()) {
301  emsg << "The key column has " << keyColumn.size() << " elements "
302  << "while the value column has " << valueColumn.size()
303  << " elements. This method can only be run on columns of the "
304  << "same size.";
306  emsg.str());
307  }
308  if(keyColumn.hasNulls()) {
309  emsg << "The key column has NULL values. Both columns must not "
310  << " have NULLS";
312  emsg.str());
313  }
314  if(valueColumn.hasNulls()) {
315  emsg << "The value column has NULL values. Both columns must not "
316  << " have NULLS";
318  emsg.str());
319  }
320  if(keyColumn.size() != keyColumn.getDistinctValues().size()) {
321  emsg << "Some of the key column's values are not distinct. In order "
322  << "for the mapping to work, all of this column's elements "
323  << "must be unique";
325  emsg.str());
326  }
327  typename Column<T>::const_iterator iter = keyColumn.begin();
328  typename Column<U>::const_iterator niter = valueColumn.begin();
329  std::map<T,U> mapping;
330  for ( ; iter != keyColumn.end(); iter++) {
331  mapping[*iter] = *niter;
332  niter++;
333  }
334  return mapping;
335  };
336 
351 }}
352  template <typename T> std::ostream& operator<<
353  (std::ostream &os, const carma::dbms::Column<T> &column) {
354  os << column.toString();
355  return os;
356  }
357 
358 
359 template < typename T >
360 inline
363  const Column & c ) :
364 c_( c )
365 {
366 }
367 
368 
369 template < typename T >
370 inline bool
373  const size_t rhsIndex,
374  const size_t lhsIndex ) const
375 {
376  if ( c_[ rhsIndex ] == c_[ lhsIndex ] )
377  return (rhsIndex < lhsIndex);
378  else
379  return (c_[ rhsIndex ] < c_[ lhsIndex ]);
380 }
381 
382 
383 template < typename T >
386  const ::std::string & name,
387  const bool includeNull ) const
388 {
389  const size_t mySize = this->size();
390 
391  // Get a guaranteed sorted vector of null value indices with no duplicates
392  // Note that it could still have indices greater than or equal to mySize!
393  ::std::vector< unsigned > localNullIndices = nullIndices_;
394  if ( localNullIndices.empty() == false ) {
395  stable_sort( localNullIndices.begin(), localNullIndices.end() );
396 
397  ::std::vector< unsigned >::iterator eraseBegin =
398  unique( localNullIndices.begin(), localNullIndices.end() );
399 
400  if ( eraseBegin != localNullIndices.end() )
401  localNullIndices.erase( eraseBegin, localNullIndices.end() );
402  }
403 
404  // Get the vector of non-null indices
405  ::std::vector< unsigned > nonNullIndices;
406  {
407  if ( localNullIndices.size() < mySize )
408  nonNullIndices.reserve( mySize - localNullIndices.size() );
409 
410  ::std::vector< unsigned >::const_iterator k =
411  localNullIndices.begin();
412 
413  const ::std::vector< unsigned >::const_iterator kEnd =
414  localNullIndices.end();
415 
416  for ( unsigned i = 0; i < mySize; ++i ) {
417  if ( (k != kEnd) && (i == *k) )
418  ++k; // It's the next null valued index
419  else
420  nonNullIndices.push_back( i );
421  }
422  }
423 
424  if ( nonNullIndices.empty() == false ) {
425  // Sort the non-null indices by value and, more importantly,
426  // by index for equal values
427  {
428  const ValueMajorIndexMinorOrderingPred orderingPred( *this );
429 
430  stable_sort( nonNullIndices.begin(),
431  nonNullIndices.end(),
432  orderingPred );
433  }
434 
435  // Throw away all but the first index for each distinct non-null value
436  {
437  ::std::vector< unsigned > firstIndexForNonNullValue;
438  firstIndexForNonNullValue.reserve( nonNullIndices.size() );
439 
440  ::std::vector< unsigned >::const_iterator j =
441  nonNullIndices.begin();
442 
443  const ::std::vector< unsigned >::const_iterator jEnd =
444  nonNullIndices.end();
445 
446  // First element is always a first index for a non-null value
447  firstIndexForNonNullValue.push_back( *j );
448  ::std::vector< unsigned >::const_iterator k = j;
449  ++j;
450 
451  for ( ; j != jEnd; ++j ) {
452  if ( ((*this)[*j]) == ((*this)[*k]) )
453  continue;
454 
455  // New distinct value
456  firstIndexForNonNullValue.push_back( *j );
457  k = j;
458  }
459 
460  nonNullIndices.swap( firstIndexForNonNullValue );
461  }
462 
463  // Sort the first indices that are left
464  stable_sort( nonNullIndices.begin(), nonNullIndices.end() );
465  }
466 
467  // Produce the final result
468  Column distinct( name );
469 
470  const size_t distinctCount =
471  ((includeNull && (localNullIndices.empty() == false)) ? 1 : 0) +
472  nonNullIndices.size();
473 
474  if ( distinctCount != 0 ) {
475  distinct.reserve( distinctCount );
476 
477  const ::std::vector< unsigned >::const_iterator kBegin =
478  localNullIndices.begin();
479 
480  const ::std::vector< unsigned >::const_iterator kEnd =
481  localNullIndices.end();
482 
483  ::std::vector< unsigned >::const_iterator k = kBegin;
484 
485  ::std::vector< unsigned >::const_iterator j =
486  nonNullIndices.begin();
487 
488  const ::std::vector< unsigned >::const_iterator jEnd =
489  nonNullIndices.end();
490 
491  for ( unsigned i = 0; i < mySize; ++i ) {
492  if ( (k != kEnd) && (i == *k) ) {
493  if ( includeNull && (k == kBegin) )
494  distinct.push_back_null();
495  ++k;
496  } else if ( (j != jEnd) && (i == *j) ) {
497  ++j;
498  distinct.push_back( (*this)[i] );
499  }
500  }
501  }
502 
503  return distinct;
504 }
505 
506 
507 #endif // CARMA_DBMS_COLUMN_H
template to mimic a db column.
Definition: Column.h:31
an exception indicating that an object is in an illegal state for the operation that was called on or...
void setNullValue(const T &nullValue)
Set the value of NULL values.
Definition: Column.h:81
void push_back_null()
add a NULL element to the end of the column
Definition: Column.h:71
Exception class for errors.
virtual ~Column()
destructor, derived classes may want to override
Definition: Column.h:46
bool hasNulls() const
return true if the column contains NULL values
Definition: Column.h:59
Column(const std::string &name)
constuctor
Definition: Column.h:39
int indexOf(const T &value) const
find the first occurence of a specified value
Definition: Column.h:223
IllegalArgumentException class.
T sum() const
sum all non-null column values FIXME probably need a way to guard against doing this for strings ...
Definition: Column.h:153
T max() const
get the maximum non-null column values
Definition: Column.h:177
std::vector< unsigned > getNullIndices() const
get the indices for NULL values.
Definition: Column.h:66
std::map< T, U > columnsToMap(const carma::dbms::Column< T > &keyColumn, const carma::dbms::Column< U > &valueColumn)
Definition: Column.h:297
T getNullValue() const
get the NULL data value
Definition: Column.h:93
#define CARMA_EXCEPTION(x, y)
Trick to get the file name and line number passed to the exception handler.
T mean() const
get the average of the non-null column values
Definition: Column.h:168
std::string toString() const
return a string representation of the column
Definition: Column.h:99
Column getDistinctValues(const ::std::string &name="Distinct Values", bool includeNull=true) const
get distinct values of a column
Definition: Column.h:385
T min() const
get the minumum non-null column value
Definition: Column.h:199
bool isElementNull(unsigned index) const
is the specified element null?
Definition: Column.h:127
std::string getName() const
get the column name
Definition: Column.h:53