Mercurial > hg > machine-learning-hw6
diff processEmail.m @ 3:ace890ed0ed9 default tip
Use lookup to look for all words at once
author | Jordi GutiƩrrez Hermoso <jordigh@octave.org> |
---|---|
date | Sat, 10 Dec 2011 15:56:02 -0500 (2011-12-10) |
parents | 7f92093ea77d |
children |
line wrap: on
line diff
--- a/processEmail.m +++ b/processEmail.m @@ -54,6 +54,8 @@ ## Process file l = 0; + str_words = {}; + while ~isempty(email_contents) ## Tokenize and also get rid of any punctuation @@ -74,12 +76,9 @@ if length(str) < 1 continue; endif - - ## Convert the vocabulary list - idx = lookup (vocabList, str, "m"); - if (idx) - word_indices(end+1) = idx; - endif + + ## Store the words + str_words{end+1} = str; ## Print to screen, ensuring that the output lines are not too long if (l + length(str) + 1) > 78 @@ -91,6 +90,9 @@ endwhile + word_indices = lookup (vocabList, str_words, "m"); + word_indices (word_indices == 0) = []; + ## Print footer fprintf("\n\n=========================\n");