diff getVocabList.m @ 0:f602dc601e9e

Initial commit
author Jordi GutiƩrrez Hermoso <jordigh@octave.org>
date Mon, 21 Nov 2011 15:08:02 -0500 (2011-11-21)
parents
children
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/getVocabList.m
@@ -0,0 +1,25 @@
+function vocabList = getVocabList()
+%GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a
+%cell array of the words
+%   vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 
+%   and returns a cell array of the words in vocabList.
+
+
+%% Read the fixed vocabulary list
+fid = fopen('vocab.txt');
+
+% Store all dictionary words in cell array vocab{}
+n = 1899;  % Total number of words in the dictionary
+
+% For ease of implementation, we use a struct to map the strings => integers
+% In practice, you'll want to use some form of hashmap
+vocabList = cell(n, 1);
+for i = 1:n
+    % Word Index (can ignore since it will be = i)
+    fscanf(fid, '%d', 1);
+    % Actual Word
+    vocabList{i} = fscanf(fid, '%s', 1);
+end
+fclose(fid);
+
+end