Edinburgh Speech Tools  2.4-release
wagon.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : May 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* A Classification and Regression Tree (CART) Program */
37 /* A basic implementation of many of the techniques in */
38 /* Briemen et al. 1984 */
39 /* */
40 /* Added decision list support, Feb 1997 */
41 /* Added stepwise use of features, Oct 1997 */
42 /* */
43 /*=======================================================================*/
44 
45 #include <cstdlib>
46 #include <iostream>
47 #include <fstream>
48 #include <cstring>
49 #include "EST_Token.h"
50 #include "EST_FMatrix.h"
51 #include "EST_multistats.h"
52 #include "EST_Wagon.h"
53 #include "EST_math.h"
54 
55 Discretes wgn_discretes;
56 
57 WDataSet wgn_dataset;
58 WDataSet wgn_test_dataset;
59 EST_FMatrix wgn_DistMatrix;
60 EST_Track wgn_VertexTrack;
61 EST_Track wgn_VertexFeats;
62 EST_Track wgn_UnitTrack;
63 
64 int wgn_min_cluster_size = 50;
65 int wgn_max_questions = 2000000; /* not ideal, but adequate */
66 int wgn_held_out = 0;
67 float wgn_dropout_feats = 0.0;
68 float wgn_dropout_samples = 0.0;
69 int wgn_cos = 1;
70 int wgn_prune = TRUE;
71 int wgn_quiet = FALSE;
72 int wgn_verbose = FALSE;
73 int wgn_count_field = -1;
74 EST_String wgn_count_field_name = "";
75 int wgn_predictee = 0;
76 EST_String wgn_predictee_name = "";
77 float wgn_float_range_split = 10;
78 float wgn_balance = 0;
79 EST_String wgn_opt_param = "";
80 EST_String wgn_vertex_output = "mean";
81 EST_String wgn_vertex_otype = "mean";
82 
83 static float do_summary(WNode &tree,WDataSet &ds,ostream *output);
84 static float test_tree_float(WNode &tree,WDataSet &ds,ostream *output);
85 static float test_tree_class(WNode &tree,WDataSet &ds,ostream *output);
86 static float test_tree_cluster(WNode &tree,WDataSet &dataset, ostream *output);
87 static float test_tree_vector(WNode &tree,WDataSet &dataset,ostream *output);
88 static float test_tree_trajectory(WNode &tree,WDataSet &dataset,ostream *output);
89 static float test_tree_ols(WNode &tree,WDataSet &dataset,ostream *output);
90 static int wagon_split(int margin,WNode &node);
91 static WQuestion find_best_question(WVectorVector &dset);
92 static void construct_binary_ques(int feat,WQuestion &test_ques);
93 static float construct_float_ques(int feat,WQuestion &ques,WVectorVector &ds);
94 static float construct_class_ques(int feat,WQuestion &ques,WVectorVector &ds);
95 static void wgn_set_up_data(WVectorVector &data,const WVectorList &ds,int held_out,int in);
96 static WNode *wagon_stepwise_find_next_best(float &bscore,int &best_feat);
97 
98 Declare_TList_T(WVector *, WVectorP)
99 
100 Declare_TVector_Base_T(WVector *,NULL,NULL,WVectorP)
101 
102 #if defined(INSTANTIATE_TEMPLATES)
103 // Instantiate class
104 #include "../base_class/EST_TList.cc"
105 #include "../base_class/EST_TVector.cc"
106 
107 Instantiate_TList_T(WVector *, WVectorP)
108 
109 Instantiate_TVector(WVector *)
110 
111 #endif
112 
113 void wgn_load_datadescription(EST_String fname,LISP ignores)
114 {
115  // Load field description for a file
116  wgn_dataset.load_description(fname,ignores);
117  wgn_test_dataset.load_description(fname,ignores);
118 }
119 
120 void wgn_load_dataset(WDataSet &dataset,EST_String fname)
121 {
122  // Read the data set from a filename. One vector per line
123  // Assume all numbers are numbers and non-nums are categorical
124  EST_TokenStream ts;
125  WVector *v;
126  int nvec=0,i;
127 
128  if (ts.open(fname) == -1)
129  wagon_error(EST_String("unable to open data file \"")+
130  fname+"\"");
131  ts.set_PunctuationSymbols("");
133  ts.set_SingleCharSymbols("");
134 
135  for ( ;!ts.eof(); )
136  {
137  v = new WVector(dataset.width());
138  i = 0;
139  do
140  {
141  int type = dataset.ftype(i);
142  if ((type == wndt_float) ||
143  (type == wndt_ols) ||
144  (wgn_count_field == i))
145  {
146  // need to ensure this is not NaN or Infinity
147  float f = atof(ts.get().string());
148  if (isfinite(f))
149  v->set_flt_val(i,f);
150  else
151  {
152  cout << fname << ": bad float " << f
153  << " in field " <<
154  dataset.feat_name(i) << " vector " <<
155  dataset.samples() << endl;
156  v->set_flt_val(i,0.0);
157  }
158  }
159  else if (type == wndt_binary)
160  v->set_int_val(i,atoi(ts.get().string()));
161  else if (type == wndt_cluster) /* index into distmatrix */
162  v->set_int_val(i,atoi(ts.get().string()));
163  else if (type == wndt_vector) /* index into VertexTrack */
164  v->set_int_val(i,atoi(ts.get().string()));
165  else if (type == wndt_trajectory) /* index to index and length */
166  { /* a number pointing to a vector in UnitTrack that */
167  /* has an idex into VertexTrack and a number of Vertices */
168  /* Thus if its 15, UnitTrack.a(15,0) is the start frame in */
169  /* VertexTrack and UnitTrack.a(15,1) is the number of */
170  /* frames in the unit */
171  v->set_int_val(i,atoi(ts.get().string()));
172  }
173  else if (type == wndt_ignore)
174  {
175  ts.get(); // skip it
176  v->set_int_val(i,0);
177  }
178  else // should check the different classes
179  {
180  EST_String s = ts.get().string();
181  int n = wgn_discretes.discrete(type).name(s);
182  if (n == -1)
183  {
184  cout << fname << ": bad value " << s << " in field " <<
185  dataset.feat_name(i) << " vector " <<
186  dataset.samples() << endl;
187  n = 0;
188  }
189  v->set_int_val(i,n);
190  }
191  i++;
192  }
193  while (!ts.eoln() && i<dataset.width());
194  nvec ++;
195  if (i != dataset.width())
196  {
197  wagon_error(fname+": data vector "+itoString(nvec)+" contains "
198  +itoString(i)+" parameters instead of "+
199  itoString(dataset.width()));
200  }
201  if (!ts.eoln())
202  {
203  cerr << fname << ": data vector " << nvec <<
204  " contains too many parameters instead of "
205  << dataset.width() << endl;
206  wagon_error(EST_String("extra parameter(s) from ")+
207  ts.peek().string());
208  }
209  dataset.append(v);
210  }
211 
212  cout << "Dataset of " << dataset.samples() << " vectors of " <<
213  dataset.width() << " parameters from: " << fname << endl;
214  ts.close();
215 }
216 
217 float summary_results(WNode &tree,ostream *output)
218 {
219  if (wgn_test_dataset.samples() != 0)
220  return do_summary(tree,wgn_test_dataset,output);
221  else
222  return do_summary(tree,wgn_dataset,output);
223 }
224 
225 static float do_summary(WNode &tree,WDataSet &ds,ostream *output)
226 {
227  if (wgn_dataset.ftype(wgn_predictee) == wndt_cluster)
228  return test_tree_cluster(tree,ds,output);
229  else if (wgn_dataset.ftype(wgn_predictee) == wndt_vector)
230  return test_tree_vector(tree,ds,output);
231  else if (wgn_dataset.ftype(wgn_predictee) == wndt_trajectory)
232  return test_tree_trajectory(tree,ds,output);
233  else if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
234  return test_tree_ols(tree,ds,output);
235  else if (wgn_dataset.ftype(wgn_predictee) >= wndt_class)
236  return test_tree_class(tree,ds,output);
237  else
238  return test_tree_float(tree,ds,output);
239 }
240 
241 WNode *wgn_build_tree(float &score)
242 {
243  // Build init node and split it while reducing the impurity
244  WNode *top = new WNode();
245  int margin = 0;
246 
247  wgn_set_up_data(top->get_data(),wgn_dataset,wgn_held_out,TRUE);
248 
249  margin = 0;
250  wagon_split(margin,*top); // recursively split data;
251 
252  if (wgn_held_out > 0)
253  {
254  wgn_set_up_data(top->get_data(),wgn_dataset,wgn_held_out,FALSE);
255  top->held_out_prune();
256  }
257 
258  if (wgn_prune)
259  top->prune();
260 
261  score = summary_results(*top,0);
262 
263  return top;
264 }
265 
266 static void wgn_set_up_data(WVectorVector &data,const WVectorList &ds,int held_out,int in)
267 {
268  // Set data ommitting held_out percent if in is true
269  // or only including 100-held_out percent if in is false
270  int i,j;
271  EST_Litem *d;
272 
273  // Make it definitely big enough
274  data.resize(ds.length());
275 
276  for (j=i=0,d=ds.head(); d != 0; d=d->next(),j++)
277  {
278  if ((in) && ((j%100) >= held_out))
279  data[i++] = ds(d);
280 // else if ((!in) && ((j%100 < held_out)))
281 // data[i++] = ds(d);
282  else if (!in)
283  data[i++] = ds(d);
284 // if ((in) && (j < held_out))
285 // data[i++] = ds(d);
286 // else if ((!in) && (j >=held_out))
287 // data[i++] = ds(d);
288  }
289  // make it the actual size, but don't reset values
290  data.resize(i,1);
291 }
292 
293 static float test_tree_class(WNode &tree,WDataSet &dataset,ostream *output)
294 {
295  // Test tree against data to get summary of results
296  EST_StrStr_KVL pairs;
297  EST_StrList lex;
298  EST_Litem *p;
299  EST_String predict,real;
300  WNode *pnode;
301  double H=0,prob;
302  int i,type;
303  float correct=0,total=0, count=0;
304 
305  float bcorrect=0, bpredicted=0, bactual=0;
306  float precision=0, recall=0;
307 
308  for (p=dataset.head(); p != 0; p=p->next())
309  {
310  pnode = tree.predict_node((*dataset(p)));
311  predict = (EST_String)pnode->get_impurity().value();
312  if (wgn_count_field == -1)
313  count = 1.0;
314  else
315  count = dataset(p)->get_flt_val(wgn_count_field);
316  prob = pnode->get_impurity().pd().probability(predict);
317  H += (log(prob))*count;
318  type = dataset.ftype(wgn_predictee);
319  real = wgn_discretes[type].name(dataset(p)->get_int_val(wgn_predictee));
320 
321  if (wgn_opt_param == "B_NB_F1")
322  {
323  //cout << real << " " << predict << endl;
324  if (real == "B")
325  bactual +=count;
326  if (predict == "B")
327  {
328  bpredicted += count;
329  if (real == predict)
330  bcorrect += count;
331  }
332  // cout <<bactual << " " << bpredicted << " " << bcorrect << endl;
333  }
334  if (real == predict)
335  correct += count;
336  total += count;
337  pairs.add_item(real,predict,1);
338  }
339  for (i=0; i<wgn_discretes[dataset.ftype(wgn_predictee)].length(); i++)
340  lex.append(wgn_discretes[dataset.ftype(wgn_predictee)].name(i));
341 
342  const EST_FMatrix &m = confusion(pairs,lex);
343 
344  if (output != NULL)
345  {
346  print_confusion(m,pairs,lex); // should be to output not stdout
347  *output << ";; entropy " << (-1*(H/total)) << " perplexity " <<
348  pow(2.0,(-1*(H/total))) << endl;
349  }
350 
351 
352  // Minus it so bigger is better
353  if (wgn_opt_param == "entropy")
354  return -pow(2.0,(-1*(H/total)));
355  else if(wgn_opt_param == "B_NB_F1")
356  {
357  if(bpredicted == 0)
358  precision = 1;
359  else
360  precision = bcorrect/bpredicted;
361  if(bactual == 0)
362  recall = 1;
363  else
364  recall = bcorrect/bactual;
365  float fmeasure = 0;
366  if((precision+recall) !=0)
367  fmeasure = 2* (precision*recall)/(precision+recall);
368  cout<< "F1 :" << fmeasure << " Prec:" << precision << " Rec:" << recall << " B-Pred:" << bpredicted << " B-Actual:" << bactual << " B-Correct:" << bcorrect << endl;
369  return fmeasure;
370  }
371  else
372  return (float)correct/(float)total;
373 }
374 
375 static float test_tree_vector(WNode &tree,WDataSet &dataset,ostream *output)
376 {
377  // Test tree against data to get summary of results VECTOR
378  // distance is calculated in zscores (as the values in vector may
379  // have quite different ranges
380  WNode *leaf;
381  EST_Litem *p;
382  float predict, actual;
383  EST_SuffStats x,y,xx,yy,xy,se,e;
384  EST_SuffStats b;
385  int i,j,pos;
386  double cor,error;
387  double count;
388  EST_Litem *pp;
389 
390  for (p=dataset.head(); p != 0; p=p->next())
391  {
392  leaf = tree.predict_node((*dataset(p)));
393  pos = dataset(p)->get_int_val(wgn_predictee);
394  for (j=0; j<wgn_VertexFeats.num_channels(); j++)
395  if (wgn_VertexFeats.a(0,j) > 0.0)
396  {
397  b.reset();
398  for (pp=leaf->get_impurity().members.head(); pp != 0; pp=pp->next())
399  {
400  i = leaf->get_impurity().members.item(pp);
401  b += wgn_VertexTrack.a(i,j);
402  }
403  predict = b.mean();
404  actual = wgn_VertexTrack.a(pos,j);
405  if (wgn_count_field == -1)
406  count = 1.0;
407  else
408  count = dataset(p)->get_flt_val(wgn_count_field);
409  x.cumulate(predict,count);
410  y.cumulate(actual,count);
411  /* Normalized the error by the standard deviation */
412  if (b.stddev() == 0)
413  error = predict-actual;
414  else
415  error = (predict-actual)/b.stddev();
416  error = predict-actual; /* awb_debug */
417  se.cumulate((error*error),count);
418  e.cumulate(fabs(error),count);
419  xx.cumulate(predict*predict,count);
420  yy.cumulate(actual*actual,count);
421  xy.cumulate(predict*actual,count);
422  }
423  }
424 
425  // Pearson's product moment correlation coefficient
426 // cor = (xy.mean() - (x.mean()*y.mean()))/
427 // (sqrt(xx.mean()-(x.mean()*x.mean())) *
428 // sqrt(yy.mean()-(y.mean()*y.mean())));
429  // Because when the variation is X is very small we can
430  // go negative, thus cause the sqrt's to give FPE
431  double v1 = xx.mean()-(x.mean()*x.mean());
432  double v2 = yy.mean()-(y.mean()*y.mean());
433 
434  double v3 = v1*v2;
435 
436  if (v3 <= 0)
437  // happens when there's very little variation in x
438  cor = 0;
439  else
440  cor = (xy.mean() - (x.mean()*y.mean()))/ sqrt(v3);
441 
442  if (output != NULL)
443  {
444  if (output != &cout) // save in output file
445  *output
446  << ";; RMSE " << ftoString(sqrt(se.mean()),4,1)
447  << " Correlation is " << ftoString(cor,4,1)
448  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
449  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
450 
451  cout << "RMSE " << ftoString(sqrt(se.mean()),4,1)
452  << " Correlation is " << ftoString(cor,4,1)
453  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
454  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
455  }
456 
457  if (wgn_opt_param == "rmse")
458  return -sqrt(se.mean()); // * -1 so bigger is better
459  else
460  return cor; // should really be % variance, I think
461 }
462 
463 static float test_tree_trajectory(WNode &tree,WDataSet &dataset,ostream *output)
464 {
465  // Test tree against data to get summary of results TRAJECTORY
466  // distance is calculated in zscores (as the values in vector may
467  // have quite different ranges)
468  // NOT WRITTEN YET
469  WNode *leaf;
470  EST_Litem *p;
471  float predict, actual;
472  EST_SuffStats x,y,xx,yy,xy,se,e;
473  EST_SuffStats b;
474  int i,j,pos;
475  double cor,error;
476  double count;
477  EST_Litem *pp;
478 
479  for (p=dataset.head(); p != 0; p=p->next())
480  {
481  leaf = tree.predict_node((*dataset(p)));
482  pos = dataset(p)->get_int_val(wgn_predictee);
483  for (j=0; j<wgn_VertexFeats.num_channels(); j++)
484  if (wgn_VertexFeats.a(0,j) > 0.0)
485  {
486  b.reset();
487  for (pp=leaf->get_impurity().members.head(); pp != 0; pp=pp->next())
488  {
489  i = leaf->get_impurity().members.item(pp);
490  b += wgn_VertexTrack.a(i,j);
491  }
492  predict = b.mean();
493  actual = wgn_VertexTrack.a(pos,j);
494  if (wgn_count_field == -1)
495  count = 1.0;
496  else
497  count = dataset(p)->get_flt_val(wgn_count_field);
498  x.cumulate(predict,count);
499  y.cumulate(actual,count);
500  /* Normalized the error by the standard deviation */
501  if (b.stddev() == 0)
502  error = predict-actual;
503  else
504  error = (predict-actual)/b.stddev();
505  error = predict-actual; /* awb_debug */
506  se.cumulate((error*error),count);
507  e.cumulate(fabs(error),count);
508  xx.cumulate(predict*predict,count);
509  yy.cumulate(actual*actual,count);
510  xy.cumulate(predict*actual,count);
511  }
512  }
513 
514  // Pearson's product moment correlation coefficient
515 // cor = (xy.mean() - (x.mean()*y.mean()))/
516 // (sqrt(xx.mean()-(x.mean()*x.mean())) *
517 // sqrt(yy.mean()-(y.mean()*y.mean())));
518  // Because when the variation is X is very small we can
519  // go negative, thus cause the sqrt's to give FPE
520  double v1 = xx.mean()-(x.mean()*x.mean());
521  double v2 = yy.mean()-(y.mean()*y.mean());
522 
523  double v3 = v1*v2;
524 
525  if (v3 <= 0)
526  // happens when there's very little variation in x
527  cor = 0;
528  else
529  cor = (xy.mean() - (x.mean()*y.mean()))/ sqrt(v3);
530 
531  if (output != NULL)
532  {
533  if (output != &cout) // save in output file
534  *output
535  << ";; RMSE " << ftoString(sqrt(se.mean()),4,1)
536  << " Correlation is " << ftoString(cor,4,1)
537  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
538  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
539 
540  cout << "RMSE " << ftoString(sqrt(se.mean()),4,1)
541  << " Correlation is " << ftoString(cor,4,1)
542  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
543  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
544  }
545 
546  if (wgn_opt_param == "rmse")
547  return -sqrt(se.mean()); // * -1 so bigger is better
548  else
549  return cor; // should really be % variance, I think
550 }
551 
552 static float test_tree_cluster(WNode &tree,WDataSet &dataset,ostream *output)
553 {
554  // Test tree against data to get summary of results for cluster trees
555  WNode *leaf;
556  int real;
557  int right_cluster=0;
558  EST_SuffStats ranking, meandist;
559  EST_Litem *p;
560 
561  for (p=dataset.head(); p != 0; p=p->next())
562  {
563  leaf = tree.predict_node((*dataset(p)));
564  real = dataset(p)->get_int_val(wgn_predictee);
565  meandist += leaf->get_impurity().cluster_distance(real);
566  right_cluster += leaf->get_impurity().in_cluster(real);
567  ranking += leaf->get_impurity().cluster_ranking(real);
568  }
569 
570  if (output != NULL)
571  {
572  // Want number in right class, mean distance in sds, mean ranking
573  if (output != &cout) // save in output file
574  *output << ";; Right cluster " << right_cluster << " (" <<
575  (int)(100.0*(float)right_cluster/(float)dataset.length()) <<
576  "%) mean ranking " << ranking.mean() << " mean distance "
577  << meandist.mean() << endl;
578  cout << "Right cluster " << right_cluster << " (" <<
579  (int)(100.0*(float)right_cluster/(float)dataset.length()) <<
580  "%) mean ranking " << ranking.mean() << " mean distance "
581  << meandist.mean() << endl;
582  }
583 
584  return 10000-meandist.mean(); // this doesn't work but I tested it
585 }
586 
587 static float test_tree_float(WNode &tree,WDataSet &dataset,ostream *output)
588 {
589  // Test tree against data to get summary of results FLOAT
590  EST_Litem *p;
591  float predict,real;
592  EST_SuffStats x,y,xx,yy,xy,se,e;
593  double cor,error;
594  double count;
595 
596  for (p=dataset.head(); p != 0; p=p->next())
597  {
598  predict = tree.predict((*dataset(p)));
599  real = dataset(p)->get_flt_val(wgn_predictee);
600  if (wgn_count_field == -1)
601  count = 1.0;
602  else
603  count = dataset(p)->get_flt_val(wgn_count_field);
604  x.cumulate(predict,count);
605  y.cumulate(real,count);
606  error = predict-real;
607  se.cumulate((error*error),count);
608  e.cumulate(fabs(error),count);
609  xx.cumulate(predict*predict,count);
610  yy.cumulate(real*real,count);
611  xy.cumulate(predict*real,count);
612  }
613 
614  // Pearson's product moment correlation coefficient
615 // cor = (xy.mean() - (x.mean()*y.mean()))/
616 // (sqrt(xx.mean()-(x.mean()*x.mean())) *
617 // sqrt(yy.mean()-(y.mean()*y.mean())));
618  // Because when the variation is X is very small we can
619  // go negative, thus cause the sqrt's to give FPE
620  double v1 = xx.mean()-(x.mean()*x.mean());
621  double v2 = yy.mean()-(y.mean()*y.mean());
622 
623  double v3 = v1*v2;
624 
625  if (v3 <= 0)
626  // happens when there's very little variation in x
627  cor = 0;
628  else
629  cor = (xy.mean() - (x.mean()*y.mean()))/ sqrt(v3);
630 
631  if (output != NULL)
632  {
633  if (output != &cout) // save in output file
634  *output
635  << ";; RMSE " << ftoString(sqrt(se.mean()),4,1)
636  << " Correlation is " << ftoString(cor,4,1)
637  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
638  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
639 
640  cout << "RMSE " << ftoString(sqrt(se.mean()),4,1)
641  << " Correlation is " << ftoString(cor,4,1)
642  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
643  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
644  }
645 
646  if (wgn_opt_param == "rmse")
647  return -sqrt(se.mean()); // * -1 so bigger is better
648  else
649  return cor; // should really be % variance, I think
650 }
651 
652 static float test_tree_ols(WNode &tree,WDataSet &dataset,ostream *output)
653 {
654  // Test tree against data to get summary of results OLS
655  EST_Litem *p;
656  WNode *leaf;
657  float predict,real;
658  EST_SuffStats x,y,xx,yy,xy,se,e;
659  double cor,error;
660  double count;
661 
662  for (p=dataset.head(); p != 0; p=p->next())
663  {
664  leaf = tree.predict_node((*dataset(p)));
665  // do ols to get predict;
666  predict = 0.0; // This is incomplete ! you need to use leaf
667  real = dataset(p)->get_flt_val(wgn_predictee);
668  if (wgn_count_field == -1)
669  count = 1.0;
670  else
671  count = dataset(p)->get_flt_val(wgn_count_field);
672  x.cumulate(predict,count);
673  y.cumulate(real,count);
674  error = predict-real;
675  se.cumulate((error*error),count);
676  e.cumulate(fabs(error),count);
677  xx.cumulate(predict*predict,count);
678  yy.cumulate(real*real,count);
679  xy.cumulate(predict*real,count);
680  }
681 
682  // Pearson's product moment correlation coefficient
683 // cor = (xy.mean() - (x.mean()*y.mean()))/
684 // (sqrt(xx.mean()-(x.mean()*x.mean())) *
685 // sqrt(yy.mean()-(y.mean()*y.mean())));
686  // Because when the variation is X is very small we can
687  // go negative, thus cause the sqrt's to give FPE
688  double v1 = xx.mean()-(x.mean()*x.mean());
689  double v2 = yy.mean()-(y.mean()*y.mean());
690 
691  double v3 = v1*v2;
692 
693  if (v3 <= 0)
694  // happens when there's very little variation in x
695  cor = 0;
696  else
697  cor = (xy.mean() - (x.mean()*y.mean()))/ sqrt(v3);
698 
699  if (output != NULL)
700  {
701  if (output != &cout) // save in output file
702  *output
703  << ";; RMSE " << ftoString(sqrt(se.mean()),4,1)
704  << " Correlation is " << ftoString(cor,4,1)
705  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
706  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
707 
708  cout << "RMSE " << ftoString(sqrt(se.mean()),4,1)
709  << " Correlation is " << ftoString(cor,4,1)
710  << " Mean (abs) Error " << ftoString(e.mean(),4,1)
711  << " (" << ftoString(e.stddev(),4,1) << ")" << endl;
712  }
713 
714  if (wgn_opt_param == "rmse")
715  return -sqrt(se.mean()); // * -1 so bigger is better
716  else
717  return cor; // should really be % variance, I think
718 }
719 
720 static int wagon_split(int margin, WNode &node)
721 {
722  // Split given node (if possible)
723  WQuestion q;
724  WNode *l,*r;
725 
726  node.set_impurity(WImpurity(node.get_data()));
727  if (wgn_max_questions < 1)
728  return FALSE;
729 
730  q = find_best_question(node.get_data());
731 
732 /* printf("q.score() %f impurity %f\n",
733  q.get_score(),
734  node.get_impurity().measure()); */
735 
736  double impurity_measure = node.get_impurity().measure();
737  double question_score = q.get_score();
738 
739  if ((question_score < WGN_HUGE_VAL) &&
740  (question_score < impurity_measure))
741  {
742  // Ok its worth a split
743  l = new WNode();
744  r = new WNode();
745  wgn_find_split(q,node.get_data(),l->get_data(),r->get_data());
746  node.set_subnodes(l,r);
747  node.set_question(q);
748  if (wgn_verbose)
749  {
750  int i;
751  for (i=0; i < margin; i++)
752  cout << " ";
753  cout << q << endl;
754  }
755  wgn_max_questions--;
756  margin++;
757  wagon_split(margin,*l);
758  margin++;
759  wagon_split(margin,*r);
760  margin--;
761  return TRUE;
762  }
763  else
764  {
765  if (wgn_verbose)
766  {
767  int i;
768  for (i=0; i < margin; i++)
769  cout << " ";
770  cout << "stopped samples: " << node.samples() << " impurity: "
771  << node.get_impurity() << endl;
772  }
773  margin--;
774  return FALSE;
775  }
776 }
777 
778 void wgn_find_split(WQuestion &q,WVectorVector &ds,
780 {
781  int i, iy, in;
782 
783  if (wgn_dropout_samples > 0.0)
784  {
785  // You need to count the number of yes/no again in all ds
786  for (iy=in=i=0; i < ds.n(); i++)
787  if (q.ask(*ds(i)) == TRUE)
788  iy++;
789  else
790  in++;
791  }
792  else
793  {
794  // Current counts are corrent (as all data was used)
795  iy = q.get_yes();
796  in = q.get_no();
797  }
798 
799  y.resize(iy);
800  n.resize(in);
801 
802  for (iy=in=i=0; i < ds.n(); i++)
803  if (q.ask(*ds(i)) == TRUE)
804  y[iy++] = ds(i);
805  else
806  n[in++] = ds(i);
807 
808 }
809 
810 static float wgn_random_number(float x)
811 {
812  // Returns random number between 0 and x
813  return (((float)random())/RAND_MAX)*x;
814 }
815 
816 #ifdef OMP_WAGON
817 static WQuestion find_best_question(WVectorVector &dset)
818 {
819  // Ask all possible questions and find the best one
820  int i;
821  float bscore,tscore;
822  WQuestion test_ques, best_ques;
823  WQuestion** questions=new WQuestion*[wgn_dataset.width()];
824  float* scores = new float[wgn_dataset.width()];
825  bscore = tscore = WGN_HUGE_VAL;
826  best_ques.set_score(bscore);
827 #pragma omp parallel
828 #pragma omp for
829  for (i=0;i < wgn_dataset.width(); i++)
830  {
831  questions[i] = new WQuestion;
832  questions[i]->set_score(bscore);}
833 #pragma omp parallel
834 #pragma omp for
835  for (i=0;i < wgn_dataset.width(); i++)
836  {
837  if ((wgn_dataset.ignore(i) == TRUE) ||
838  (i == wgn_predictee))
839  scores[i] = WGN_HUGE_VAL; // ignore this feature this time
840  else if (wgn_random_number(1.0) < wgn_dropout_feats)
841  scores[i] = WGN_HUGE_VAL; // randomly dropout feature
842  else if (wgn_dataset.ftype(i) == wndt_binary)
843  {
844  construct_binary_ques(i,*questions[i]);
845  scores[i] = wgn_score_question(*questions[i],dset);
846  }
847  else if (wgn_dataset.ftype(i) == wndt_float)
848  {
849  scores[i] = construct_float_ques(i,*questions[i],dset);
850  }
851  else if (wgn_dataset.ftype(i) == wndt_ignore)
852  scores[i] = WGN_HUGE_VAL; // always ignore this feature
853 #if 0
854  // This doesn't work reasonably
855  else if (wgn_csubset && (wgn_dataset.ftype(i) >= wndt_class))
856  {
857  wagon_error("subset selection temporarily deleted");
858  tscore = construct_class_ques_subset(i,test_ques,dset);
859  }
860 #endif
861  else if (wgn_dataset.ftype(i) >= wndt_class)
862  scores[i] = construct_class_ques(i,*questions[i],dset);
863  }
864  for (i=0;i < wgn_dataset.width(); i++)
865  {
866  if (scores[i] < bscore)
867  {
868  memcpy(&best_ques,questions[i],sizeof(*questions[i]));
869  best_ques.set_score(scores[i]);
870  bscore = scores[i];
871  }
872  delete questions[i];
873  }
874  delete [] questions;
875  delete [] scores;
876  return best_ques;
877 }
878 #else
879 // No OMP parallelism
880 static WQuestion find_best_question(WVectorVector &dset)
881 {
882  // Ask all possible questions and find the best one
883  int i;
884  float bscore,tscore;
885  WQuestion test_ques, best_ques;
886 
887  bscore = tscore = WGN_HUGE_VAL;
888  best_ques.set_score(bscore);
889  // test each feature with each possible question
890  for (i=0;i < wgn_dataset.width(); i++)
891  {
892  if ((wgn_dataset.ignore(i) == TRUE) ||
893  (i == wgn_predictee))
894  tscore = WGN_HUGE_VAL; // ignore this feature this time
895  else if (wgn_random_number(1.0) < wgn_dropout_feats)
896  tscore = WGN_HUGE_VAL; // randomly dropout feature
897  else if (wgn_dataset.ftype(i) == wndt_binary)
898  {
899  construct_binary_ques(i,test_ques);
900  tscore = wgn_score_question(test_ques,dset);
901  }
902  else if (wgn_dataset.ftype(i) == wndt_float)
903  {
904  tscore = construct_float_ques(i,test_ques,dset);
905  }
906  else if (wgn_dataset.ftype(i) == wndt_ignore)
907  tscore = WGN_HUGE_VAL; // always ignore this feature
908 #if 0
909  // This doesn't work reasonably
910  else if (wgn_csubset && (wgn_dataset.ftype(i) >= wndt_class))
911  {
912  wagon_error("subset selection temporarily deleted");
913  tscore = construct_class_ques_subset(i,test_ques,dset);
914  }
915 #endif
916  else if (wgn_dataset.ftype(i) >= wndt_class)
917  tscore = construct_class_ques(i,test_ques,dset);
918  if (tscore < bscore)
919  {
920  best_ques = test_ques;
921  best_ques.set_score(tscore);
922  bscore = tscore;
923  }
924  }
925 
926  return best_ques;
927 }
928 #endif
929 
930 static float construct_class_ques(int feat,WQuestion &ques,WVectorVector &ds)
931 {
932  // Find out which member of a class gives the best split
933  float tscore,bscore = WGN_HUGE_VAL;
934  int cl;
935  WQuestion test_q;
936 
937  test_q.set_fp(feat);
938  test_q.set_oper(wnop_is);
939  ques = test_q;
940 
941  for (cl=0; cl < wgn_discretes[wgn_dataset.ftype(feat)].length(); cl++)
942  {
943  test_q.set_operand1(EST_Val(cl));
944  tscore = wgn_score_question(test_q,ds);
945  if (tscore < bscore)
946  {
947  ques = test_q;
948  bscore = tscore;
949  }
950  }
951 
952  return bscore;
953 }
954 
955 #if 0
956 static float construct_class_ques_subset(int feat,WQuestion &ques,
957  WVectorVector &ds)
958 {
959  // Find out which subset of a class gives the best split.
960  // We first measure the subset of the data for each member of
961  // of the class. Then order those splits. Then go through finding
962  // where the best split of that ordered list is. This is described
963  // on page 247 of Breiman et al.
964  float tscore,bscore = WGN_HUGE_VAL;
965  LISP l;
966  int cl;
967 
968  ques.set_fp(feat);
969  ques.set_oper(wnop_is);
970  float *scores = new float[wgn_discretes[wgn_dataset.ftype(feat)].length()];
971 
972  // Only do it for exists values
973  for (cl=0; cl < wgn_discretes[wgn_dataset.ftype(feat)].length(); cl++)
974  {
975  ques.set_operand(flocons(cl));
976  scores[cl] = wgn_score_question(ques,ds);
977  }
978 
979  LISP order = sort_class_scores(feat,scores);
980  if (order == NIL)
981  return WGN_HUGE_VAL;
982  if (siod_llength(order) == 1)
983  { // Only one so we know the best "split"
984  ques.set_oper(wnop_is);
985  ques.set_operand(car(order));
986  return scores[get_c_int(car(order))];
987  }
988 
989  ques.set_oper(wnop_in);
990  LISP best_l = NIL;
991  for (l=cdr(order); CDR(l) != NIL; l = cdr(l))
992  {
993  ques.set_operand(l);
994  tscore = wgn_score_question(ques,ds);
995  if (tscore < bscore)
996  {
997  best_l = l;
998  bscore = tscore;
999  }
1000 
1001  }
1002 
1003  if (best_l != NIL)
1004  {
1005  if (siod_llength(best_l) == 1)
1006  {
1007  ques.set_oper(wnop_is);
1008  ques.set_operand(car(best_l));
1009  }
1010  else if (equal(cdr(order),best_l) != NIL)
1011  {
1012  ques.set_oper(wnop_is);
1013  ques.set_operand(car(order));
1014  }
1015  else
1016  {
1017  cout << "Found a good subset" << endl;
1018  ques.set_operand(best_l);
1019  }
1020  }
1021  return bscore;
1022 }
1023 
1024 static LISP sort_class_scores(int feat,float *scores)
1025 {
1026  // returns sorted list of (non WGN_HUGE_VAL) items
1027  int i;
1028  LISP items = NIL;
1029  LISP l;
1030 
1031  for (i=0; i < wgn_discretes[wgn_dataset.ftype(feat)].length(); i++)
1032  {
1033  if (scores[i] != WGN_HUGE_VAL)
1034  {
1035  if (items == NIL)
1036  items = cons(flocons(i),NIL);
1037  else
1038  {
1039  for (l=items; l != NIL; l=cdr(l))
1040  {
1041  if (scores[i] < scores[get_c_int(car(l))])
1042  {
1043  CDR(l) = cons(car(l),cdr(l));
1044  CAR(l) = flocons(i);
1045  break;
1046  }
1047  }
1048  if (l == NIL)
1049  items = l_append(items,cons(flocons(i),NIL));
1050  }
1051  }
1052  }
1053  return items;
1054 }
1055 #endif
1056 
1057 static float construct_float_ques(int feat,WQuestion &ques,WVectorVector &ds)
1058 {
1059  // Find out a split of the range that gives the best score
1060  // Naively does this by partitioning the range into float_range_split slots
1061  float tscore,bscore = WGN_HUGE_VAL;
1062  int d, i;
1063  float p;
1064  WQuestion test_q;
1065  float max,min,val,incr;
1066 
1067  test_q.set_fp(feat);
1068  test_q.set_oper(wnop_lessthan);
1069  ques = test_q;
1070 
1071  min = max = ds(0)->get_flt_val(feat); /* set up some value */
1072  for (d=0; d < ds.n(); d++)
1073  {
1074  val = ds(d)->get_flt_val(feat);
1075  if (val < min)
1076  min = val;
1077  else if (val > max)
1078  max = val;
1079  }
1080  if (max == min) // we're pure
1081  return WGN_HUGE_VAL;
1082  incr = (max-min)/wgn_float_range_split;
1083  // so do float_range-1 splits
1084  /* We calculate this based on the number splits, not the increments, */
1085  /* becuase incr can be so small it doesn't increment p */
1086  for (i=0,p=min+incr; i < wgn_float_range_split; i++,p += incr )
1087  {
1088  test_q.set_operand1(EST_Val(p));
1089  tscore = wgn_score_question(test_q,ds);
1090  if (tscore < bscore)
1091  {
1092  ques = test_q;
1093  bscore = tscore;
1094  }
1095  }
1096 
1097  return bscore;
1098 }
1099 
1100 static void construct_binary_ques(int feat,WQuestion &test_ques)
1101 {
1102  // construct a question. Not sure about this in general
1103  // of course continuous/categorical features will require different
1104  // rule and non-binary ones will require some test point
1105 
1106  test_ques.set_fp(feat);
1107  test_ques.set_oper(wnop_binary);
1108  test_ques.set_operand1(EST_Val(""));
1109 }
1110 
1111 static float score_question_set(WQuestion &q, WVectorVector &ds, int ignorenth)
1112 {
1113  // score this question as a possible split by finding
1114  // the sum of the impurities when ds is split with this question
1115  WImpurity y,n;
1116  int d, num_yes, num_no;
1117  float count;
1118  WVector *wv;
1119 
1120  num_yes = num_no = 0;
1121  y.data = &ds;
1122  n.data = &ds;
1123  for (d=0; d < ds.n(); d++)
1124  {
1125  if (wgn_random_number(1.0) < wgn_dropout_samples)
1126  {
1127  continue; // dropout this sample
1128  }
1129  else if ((ignorenth < 2) ||
1130  (d%ignorenth != ignorenth-1))
1131  {
1132  wv = ds(d);
1133  if (wgn_count_field == -1)
1134  count = 1.0;
1135  else
1136  count = (*wv)[wgn_count_field];
1137 
1138  if (q.ask(*wv) == TRUE)
1139  {
1140  num_yes++;
1141  if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
1142  y.cumulate(d,count); // note the sample number not value
1143  else
1144  y.cumulate((*wv)[wgn_predictee],count);
1145  }
1146  else
1147  {
1148  num_no++;
1149  if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
1150  n.cumulate(d,count); // note the sample number not value
1151  else
1152  n.cumulate((*wv)[wgn_predictee],count);
1153  }
1154  }
1155  }
1156 
1157  q.set_yes(num_yes);
1158  q.set_no(num_no);
1159 
1160  int min_cluster;
1161 
1162  if ((wgn_balance == 0.0) ||
1163  (ds.n()/wgn_balance < wgn_min_cluster_size))
1164  min_cluster = wgn_min_cluster_size;
1165  else
1166  min_cluster = (int)(ds.n()/wgn_balance);
1167 
1168  if ((y.samples() < min_cluster) ||
1169  (n.samples() < min_cluster))
1170  return WGN_HUGE_VAL;
1171 
1172  float ym,nm,bm;
1173  // printf("awb_debug score_question_set X %f Y %f\n",
1174  // y.samples(), n.samples());
1175  ym = y.measure();
1176  nm = n.measure();
1177  bm = ym + nm;
1178 
1179  /* cout << q << endl;
1180  printf("test question y %f n %f b %f\n",
1181  ym, nm, bm); */
1182 
1183  return bm/2.0;
1184 }
1185 
1186 float wgn_score_question(WQuestion &q, WVectorVector &ds)
1187 {
1188  // This level of indirection was introduced for later expansion
1189 
1190  return score_question_set(q,ds,1);
1191 }
1192 
1193 WNode *wagon_stepwise(float limit)
1194 {
1195  // Find the best single features and incrementally add features
1196  // that best improve result until it doesn't improve.
1197  // This is basically to automate what Kurt was doing in building
1198  // trees, he then automated it in PERL and as it seemed to work
1199  // I put it into wagon itself.
1200  // This can be pretty computationally intensive.
1201  WNode *best = 0,*new_best = 0;
1202  float bscore,best_score = -WGN_HUGE_VAL;
1203  int best_feat,i;
1204  int nf = 1;
1205 
1206  // Set all features to ignore
1207  for (i=0; i < wgn_dataset.width(); i++)
1208  wgn_dataset.set_ignore(i,TRUE);
1209 
1210  for (i=0; i < wgn_dataset.width(); i++)
1211  {
1212  if ((wgn_dataset.ftype(i) == wndt_ignore) || (i == wgn_predictee))
1213  {
1214  // This skips the round not because this has anything to
1215  // do with this feature being (user specified) ignored
1216  // but because it indicates there is one less cycle that is
1217  // necessary
1218  continue;
1219  }
1220  new_best = wagon_stepwise_find_next_best(bscore,best_feat);
1221 
1222  if ((bscore - fabs(bscore * (limit/100))) <= best_score)
1223  {
1224  // gone as far as we can
1225  delete new_best;
1226  break;
1227  }
1228  else
1229  {
1230  best_score = bscore;
1231  delete best;
1232  best = new_best;
1233  wgn_dataset.set_ignore(best_feat,FALSE);
1234  if (!wgn_quiet)
1235  {
1236  fprintf(stdout,"FEATURE %d %s: %2.4f\n",
1237  nf,
1238  (const char *)wgn_dataset.feat_name(best_feat),
1239  best_score);
1240  fflush(stdout);
1241  nf++;
1242  }
1243  }
1244  }
1245 
1246  return best;
1247 }
1248 
1249 static WNode *wagon_stepwise_find_next_best(float &bscore,int &best_feat)
1250 {
1251  // Find which of the currently ignored features will best improve
1252  // the result
1253  WNode *best = 0;
1254  float best_score = -WGN_HUGE_VAL;
1255  int best_new_feat = -1;
1256  int i;
1257 
1258  for (i=0; i < wgn_dataset.width(); i++)
1259  {
1260  if (wgn_dataset.ftype(i) == wndt_ignore)
1261  continue; // user wants me to ignore this completely
1262  else if (i == wgn_predictee) // can't use the answer
1263  continue;
1264  else if (wgn_dataset.ignore(i) == TRUE)
1265  {
1266  WNode *current;
1267  float score;
1268 
1269  // Allow this feature to participate
1270  wgn_dataset.set_ignore(i,FALSE);
1271 
1272  current = wgn_build_tree(score);
1273 
1274  if (score > best_score)
1275  {
1276  best_score = score;
1277  delete best;
1278  best = current;
1279  best_new_feat = i;
1280 // fprintf(stdout,"BETTER FEATURE %d %s: %2.4f\n",
1281 // i,
1282 // (const char *)wgn_dataset.feat_name(i),
1283 // best_score);
1284 // fflush(stdout);
1285  }
1286  else
1287  delete current;
1288 
1289  // switch it off again
1290  wgn_dataset.set_ignore(i,TRUE);
1291  }
1292  }
1293 
1294  bscore = best_score;
1295  best_feat = best_new_feat;
1296  return best;
1297 }
EST_TokenStream::eof
int eof()
end of file
Definition: EST_Token.h:356
EST_Val
Definition: EST_Val.h:70
WQuestion
Definition: EST_Wagon.h:106
EST_TList::item
T & item(const EST_Litem *p)
Definition: EST_TList.h:133
EST_TList
Definition: EST_TList.h:59
EST_Discrete::name
const EST_String & name(const int n) const
The name given the index.
Definition: EST_simplestats.h:94
EST_SuffStats::mean
double mean(void) const
mean of currently cummulated values
Definition: EST_simplestats.h:163
EST_TokenStream
Definition: EST_Token.h:235
EST_TList::append
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:191
EST_SuffStats
Definition: EST_simplestats.h:136
EST_Track::a
float & a(int i, int c=0)
Definition: EST_Track.cc:1022
EST_SuffStats::reset
void reset(void)
reset internal values
Definition: EST_simplestats.h:153
EST_Track::num_channels
int num_channels() const
return number of channels in track
Definition: EST_Track.h:656
EST_TokenStream::set_SingleCharSymbols
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:338
EST_TokenStream::close
void close(void)
Close stream.
Definition: EST_Token.cc:406
WDataSet
Definition: EST_Wagon.h:86
EST_TokenStream::set_PrePunctuationSymbols
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:344
EST_TokenStream::eoln
int eoln()
end of line
Definition: EST_Token.cc:818
EST_TokenStream::open
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition: EST_Token.cc:200
EST_UItem
Definition: EST_UList.h:51
EST_TVector::resize
void resize(int n, int set=1)
Definition: EST_TVector.cc:196
EST_TokenStream::get
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486
EST_String
Definition: EST_String.h:70
EST_SuffStats::stddev
double stddev(void) const
standard deviation of currently cummulated values
Definition: EST_simplestats.h:168
WImpurity
Definition: EST_Wagon.h:146
EST_TokenStream::set_PunctuationSymbols
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:341
EST_TKVL< EST_String, EST_String >
EST_Track
Definition: EST_Track.h:89
Discretes
Definition: EST_simplestats.h:114
EST_TKVL::add_item
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
EST_TVector::n
INLINE int n() const
number of items in vector.
Definition: EST_TVector.h:254
WNode
Definition: EST_Wagon.h:222
WVector
Definition: EST_Wagon.h:65
EST_TVector< WVector * >
EST_TokenStream::peek
EST_Token & peek(void)
peek at next token
Definition: EST_Token.cc:830
EST_FMatrix
Definition: EST_FMatrix.h:56