# HG changeset patch
# User Jordi Gutiérrez Hermoso <jordigh@octave.org>
# Date 1323550562 18000
# Node ID ace890ed0ed922e36afea7414b65195a2fb37d53
# Parent  7f92093ea77d410c586cca2ce2c5aace03b6279a
Use lookup to look for all words at once

diff --git a/processEmail.m b/processEmail.m
--- a/processEmail.m
+++ b/processEmail.m
@@ -54,6 +54,8 @@
   ## Process file
   l = 0;
 
+  str_words = {};
+
   while ~isempty(email_contents)
 
     ## Tokenize and also get rid of any punctuation
@@ -74,12 +76,9 @@
     if length(str) < 1
       continue;
     endif
-    
-    ## Convert the vocabulary list
-    idx = lookup (vocabList, str, "m");
-    if (idx)
-      word_indices(end+1) = idx;
-    endif
+
+    ## Store the words
+    str_words{end+1} = str;
 
     ## Print to screen, ensuring that the output lines are not too long
     if (l + length(str) + 1) > 78
@@ -91,6 +90,9 @@
     
   endwhile
 
+  word_indices = lookup (vocabList, str_words, "m");
+  word_indices (word_indices == 0) = [];
+  
   ## Print footer
   fprintf("\n\n=========================\n");