# HG changeset patch # User Jordi GutiƩrrez Hermoso # Date 1323550562 18000 # Node ID ace890ed0ed922e36afea7414b65195a2fb37d53 # Parent 7f92093ea77d410c586cca2ce2c5aace03b6279a Use lookup to look for all words at once diff --git a/processEmail.m b/processEmail.m --- a/processEmail.m +++ b/processEmail.m @@ -54,6 +54,8 @@ ## Process file l = 0; + str_words = {}; + while ~isempty(email_contents) ## Tokenize and also get rid of any punctuation @@ -74,12 +76,9 @@ if length(str) < 1 continue; endif - - ## Convert the vocabulary list - idx = lookup (vocabList, str, "m"); - if (idx) - word_indices(end+1) = idx; - endif + + ## Store the words + str_words{end+1} = str; ## Print to screen, ensuring that the output lines are not too long if (l + length(str) + 1) > 78 @@ -91,6 +90,9 @@ endwhile + word_indices = lookup (vocabList, str_words, "m"); + word_indices (word_indices == 0) = []; + ## Print footer fprintf("\n\n=========================\n");