Modificación clase glove_word_embeddins

parent 80e851ea
,fmplaza,SINAI-155-1,27.06.2018 17:01,file:///home/fmplaza/.config/libreoffice/4;
\ No newline at end of file
......@@ -149,7 +149,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
self.__word_embeddings.clear()
def __load_full(self, index):
def __load_full(self, index, embeddings_number):
"""Load the full file of word embeddings
Args:
......@@ -158,12 +158,21 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
own_partition = str.partition
own_strip = str.strip
with open(self.__path_file, "r", encoding="utf-8") as glove_file:
for line in glove_file:
glove_fields = own_partition(line, self.__GLOVE_SEPARATOR)
glove_vector = fromstring(own_strip(glove_fields[2]), dtype=float, sep=" ")
self.__word_indexes[own_strip(glove_fields[0])] = index
self.__word_embeddings.append(glove_vector)
index += 1
if(embeddings_number == None):
for line in glove_file:
glove_fields = own_partition(line, self.__GLOVE_SEPARATOR)
glove_vector = fromstring(own_strip(glove_fields[2]), dtype=float, sep=" ")
self.__word_indexes[own_strip(glove_fields[0])] = index
self.__word_embeddings.append(glove_vector)
index += 1
elif(embeddings_number!=None):
for i in range(embeddings_number):
line = glove_file.readline()
glove_fields = own_partition(line, self.__GLOVE_SEPARATOR)
glove_vector = fromstring(own_strip(glove_fields[2]), dtype=float, sep=" ")
self.__word_indexes[own_strip(glove_fields[0])] = index
self.__word_embeddings.append(glove_vector)
index += 1
def __load_only_vocabulary(self, index, vocabulary):
"""Load only the words of the vocabulary
......@@ -187,7 +196,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
index += 1
def load(self, begin_ofset=None, vocabulary=None):
def load(self, embeddings_number = None, begin_ofset=None, vocabulary=None):
"""Load the Glove vectors file into memory
Args:
......@@ -209,7 +218,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
self.__word_embeddings.append([]) #Index 0 is reserved
if(vocabulary is None):
self.__load_full(begin_ofset)
self.__load_full(begin_ofset, embeddings_number)
else:
self.__load_only_vocabulary(begin_ofset, vocabulary)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment