Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
textflow
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
1
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
02e97587
authored
Apr 07, 2022
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Refactoring Sequence class
parent
f3b23a01
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
35 deletions
textflow/Sequence.py
textflow/Sequence.py
View file @
02e97587
...
@@ -3,9 +3,10 @@ from typing import Optional
...
@@ -3,9 +3,10 @@ from typing import Optional
class
SequenceIterator
:
class
SequenceIterator
:
def
__init__
(
self
,
sequences
):
def
__init__
(
self
,
children
,
sequences
):
self
.
idx
=
0
self
.
idx
=
0
self
.
data
=
sequences
self
.
children
=
children
self
.
sequences
=
sequences
def
__iter__
(
self
):
def
__iter__
(
self
):
return
self
return
self
...
@@ -13,12 +14,18 @@ class SequenceIterator:
...
@@ -13,12 +14,18 @@ class SequenceIterator:
def
__next__
(
self
):
def
__next__
(
self
):
self
.
idx
+=
1
self
.
idx
+=
1
try
:
try
:
return
self
.
data
[
self
.
idx
-
1
]
#return self.data[self.idx-1]
return
{
"child"
:
self
.
children
[
self
.
idx
-
1
],
"sequences"
:
self
.
sequences
[
self
.
idx
-
1
]
}
except
IndexError
:
except
IndexError
:
self
.
idx
=
0
self
.
idx
=
0
raise
StopIteration
raise
StopIteration
_VALID_FORMATS
=
[
"string"
,
"text"
,
"token"
,
None
]
class
Sequence
:
class
Sequence
:
"""Summary of class here.
"""Summary of class here.
...
@@ -30,75 +37,105 @@ class Sequence:
...
@@ -30,75 +37,105 @@ class Sequence:
text: ...
text: ...
sequences: ...
sequences: ...
"""
"""
def
__init__
(
self
,
format
:
str
,
item
:
object
,
id
:
Optional
[
str
]
=
None
):
def
__init__
(
self
,
format
:
Optional
[
str
]
=
None
,
src
:
Optional
[
object
]
=
None
,
id
:
Optional
[
str
]
=
None
):
"""Creates a sequence from an input object.
"""Creates a sequence from an input object.
Args:
Args:
format: A string containing the input data's type.
format: A string containing the input data's type.
item
: An object representing the input data. It can be a string for a
src
: An object representing the input data. It can be a string for a
string format or a file path for a text format.
string format or a file path for a text format.
id: A string to overwrite the default's sequence id.
id: A string to overwrite the default's sequence id.
"""
"""
VALID_FORMATS
=
(
"string"
,
"text"
)
if
format
not
in
VALID_FORMATS
:
if
format
not
in
_VALID_FORMATS
:
raise
ValueError
(
f
"{format} is not a valid format. Valid formats: {_VALID_FORMATS}"
)
if
format
==
"token"
:
raise
ValueError
(
raise
ValueError
(
f
"
{format} is not a valid format. Valid formats: {VALID_FORMATS}
"
f
"
Tokens can not be split
"
)
)
# Empty sequence
if
format
is
None
:
self
.
id
=
id
self
.
text
=
None
self
.
children
=
[]
self
.
sequences
=
[]
# Splits string text by whitespace
# Splits string text by whitespace
if
format
==
"string"
:
if
format
==
"string"
:
if
not
isinstance
(
item
,
str
):
if
not
isinstance
(
src
,
str
):
raise
ValueError
(
f
"{
item
} is not an instance of string"
)
raise
ValueError
(
f
"{
src
} is not an instance of string"
)
self
.
id
=
id
if
id
else
"string"
self
.
id
=
id
if
id
else
"string"
self
.
text
=
item
self
.
text
=
src
self
.
sequences
=
item
.
split
(
" "
)
self
.
children
=
[(
"token"
,
token_src
)
for
token_src
in
src
.
split
(
" "
)]
self
.
sequences
=
[
Sequence
()
for
_
in
self
.
children
]
# Splits file text by \n
# Splits file text by \n
if
format
==
"text"
:
if
format
==
"text"
:
self
.
id
=
id
if
id
else
os
.
path
.
basename
(
item
)
.
split
(
"."
)[
0
]
self
.
id
=
id
if
id
else
os
.
path
.
basename
(
src
)
.
split
(
"."
)[
0
]
with
open
(
item
,
"r"
)
as
f
:
with
open
(
src
,
"r"
)
as
f
:
self
.
text
=
f
.
read
()
self
.
text
=
f
.
read
()
self
.
sequences
=
self
.
text
.
split
(
"
\n
"
)
self
.
children
=
[(
"string"
,
line_src
)
for
line_src
in
self
.
text
.
split
(
"
\n
"
)]
self
.
sequences
=
[
Sequence
()
for
_
in
self
.
children
]
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
text
return
self
.
text
def
__repr__
(
self
):
def
__repr__
(
self
):
values
=
", "
.
join
([
sequence
.
__repr__
()
for
sequence
in
self
.
sequences
])
children
=
", "
.
join
([
child
.
__repr__
()
for
child
in
self
.
children
])
sequences
=
", "
.
join
([
sequence
.
__repr__
()
for
sequence
in
self
.
sequences
])
return
(
return
(
"Sequence(
\n
"
"Sequence(
\n
"
f
" id: {self.id}
\n
"
f
" id: {self.id}
\n
"
f
" sequences: {values}
\n
"
f
" text: {self.text}
\n
"
f
" children: {children}
\n
"
f
" sequences: {sequences}
\n
"
")"
")"
)
)
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
sequences
)
return
len
(
self
.
children
)
def
__iter__
(
self
):
def
__iter__
(
self
):
return
SequenceIterator
(
self
.
sequences
)
return
SequenceIterator
(
self
.
children
,
self
.
sequences
)
def
__getitem__
(
self
,
i
):
def
__getitem__
(
self
,
idx
):
if
isinstance
(
i
,
str
):
if
isinstance
(
idx
,
str
):
# Get src by string (e.g. seq["doc1"])
for
sequence
in
self
.
sequences
:
if
self
.
sequences
[
0
]
is
None
:
if
isinstance
(
sequence
,
Sequence
):
raise
ValueError
(
f
"Sequence id '{idx}' not found in {self.sequences}"
)
if
sequence
.
id
==
i
:
return
sequence
for
cont
,
sequence
in
enumerate
(
self
.
sequences
):
raise
ValueError
(
f
"Sequence index '{i}' not found"
)
if
sequence
.
id
==
idx
:
return
{
elif
isinstance
(
i
,
int
):
"child"
:
self
.
children
[
cont
],
if
i
<
0
:
"sequences"
:
self
.
sequences
[
cont
]
i
=
len
(
self
.
sequences
)
+
i
}
raise
ValueError
(
f
"Sequence id '{idx}' not found in {self}"
)
if
i
>=
len
(
self
.
sequences
):
raise
IndexError
(
f
"Sequence index '{i}' out of range"
)
elif
isinstance
(
idx
,
int
):
# Get src by int (e.g. seq[0])
else
:
if
abs
(
idx
)
>=
len
(
self
.
children
):
return
self
.
sequences
[
i
]
raise
IndexError
(
f
"Sequence index '{idx}' out of range"
)
if
idx
<
0
:
idx
=
len
(
self
.
children
)
+
idx
return
{
"child"
:
self
.
children
[
idx
],
"sequences"
:
self
.
sequences
[
idx
]
}
else
:
# TODO: Should it support slices (e.g. [2:4])?
else
:
# TODO: Should it support slices (e.g. [2:4])?
invalid_type
=
type
(
i
)
raise
TypeError
(
raise
TypeError
(
f
"Sequence indices must be integers or strings, not {
invalid_type
.__name__}"
f
"Sequence indices must be integers or strings, not {
type(idx)
.__name__}"
)
)
def
set_sequence
(
self
,
new_sequence
):
print
(
"Setting value..."
)
self
.
id
=
new_sequence
.
id
self
.
text
=
new_sequence
.
text
self
.
children
=
new_sequence
.
children
self
.
sequences
=
new_sequence
.
sequences
def
get_depth
(
self
):
def
get_depth
(
self
):
pass
# TODO
pass
# TODO
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment