Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Flor Miriam Plaza del Arco
/
WASSA 2018
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
effc7fb5
authored
Jun 28, 2018
by
Flor Miriam Plaza del Arco
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Modificación clase glove_word_embeddins
parent
80e851ea
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
11 deletions
corpus/.~lock.train-v2.csv#
model/glove_word_embedings.py
corpus/.~lock.train-v2.csv#
deleted
100644 → 0
View file @
80e851ea
,fmplaza,SINAI-155-1,27.06.2018 17:01,file:///home/fmplaza/.config/libreoffice/4;
\ No newline at end of file
model/glove_word_embedings.py
View file @
effc7fb5
...
...
@@ -149,7 +149,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
self
.
__word_embeddings
.
clear
()
def
__load_full
(
self
,
index
):
def
__load_full
(
self
,
index
,
embeddings_number
):
"""Load the full file of word embeddings
Args:
...
...
@@ -158,12 +158,21 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
own_partition
=
str
.
partition
own_strip
=
str
.
strip
with
open
(
self
.
__path_file
,
"r"
,
encoding
=
"utf-8"
)
as
glove_file
:
for
line
in
glove_file
:
glove_fields
=
own_partition
(
line
,
self
.
__GLOVE_SEPARATOR
)
glove_vector
=
fromstring
(
own_strip
(
glove_fields
[
2
]),
dtype
=
float
,
sep
=
" "
)
self
.
__word_indexes
[
own_strip
(
glove_fields
[
0
])]
=
index
self
.
__word_embeddings
.
append
(
glove_vector
)
index
+=
1
if
(
embeddings_number
==
None
):
for
line
in
glove_file
:
glove_fields
=
own_partition
(
line
,
self
.
__GLOVE_SEPARATOR
)
glove_vector
=
fromstring
(
own_strip
(
glove_fields
[
2
]),
dtype
=
float
,
sep
=
" "
)
self
.
__word_indexes
[
own_strip
(
glove_fields
[
0
])]
=
index
self
.
__word_embeddings
.
append
(
glove_vector
)
index
+=
1
elif
(
embeddings_number
!=
None
):
for
i
in
range
(
embeddings_number
):
line
=
glove_file
.
readline
()
glove_fields
=
own_partition
(
line
,
self
.
__GLOVE_SEPARATOR
)
glove_vector
=
fromstring
(
own_strip
(
glove_fields
[
2
]),
dtype
=
float
,
sep
=
" "
)
self
.
__word_indexes
[
own_strip
(
glove_fields
[
0
])]
=
index
self
.
__word_embeddings
.
append
(
glove_vector
)
index
+=
1
def
__load_only_vocabulary
(
self
,
index
,
vocabulary
):
"""Load only the words of the vocabulary
...
...
@@ -187,7 +196,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
index
+=
1
def
load
(
self
,
begin_ofset
=
None
,
vocabulary
=
None
):
def
load
(
self
,
embeddings_number
=
None
,
begin_ofset
=
None
,
vocabulary
=
None
):
"""Load the Glove vectors file into memory
Args:
...
...
@@ -209,7 +218,7 @@ class GloveWordEmbednigs(AbstractWordEmbedding):
self
.
__word_embeddings
.
append
([])
#Index 0 is reserved
if
(
vocabulary
is
None
):
self
.
__load_full
(
begin_ofset
)
self
.
__load_full
(
begin_ofset
,
embeddings_number
)
else
:
self
.
__load_only_vocabulary
(
begin_ofset
,
vocabulary
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment