Commit 80ea6919 by Jaime Collado

Developing Sequence class

parent a776068a
[[package]]
name = "atomicwrites"
version = "1.4.0"
description = "Atomic file writes."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "attrs"
version = "21.4.0"
description = "Classes Without Boilerplate"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.extras]
dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
[[package]]
name = "colorama"
version = "0.4.4"
description = "Cross-platform colored terminal text."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "more-itertools"
version = "8.12.0"
description = "More routines for operating on iterables, beyond itertools"
category = "dev"
optional = false
python-versions = ">=3.5"
[[package]]
name = "packaging"
version = "21.3"
description = "Core utilities for Python packages"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
[[package]]
name = "pluggy"
version = "0.13.1"
description = "plugin and hook calling mechanisms for python"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[package.extras]
dev = ["pre-commit", "tox"]
[[package]]
name = "py"
version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "pyparsing"
version = "3.0.7"
description = "Python parsing module"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.extras]
diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
name = "pytest"
version = "5.4.3"
description = "pytest: simple powerful testing with Python"
category = "dev"
optional = false
python-versions = ">=3.5"
[package.dependencies]
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
attrs = ">=17.4.0"
colorama = {version = "*", markers = "sys_platform == \"win32\""}
more-itertools = ">=4.0.0"
packaging = "*"
pluggy = ">=0.12,<1.0"
py = ">=1.5.0"
wcwidth = "*"
[package.extras]
checkqa-mypy = ["mypy (==v0.761)"]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
[[package]]
name = "wcwidth"
version = "0.2.5"
description = "Measures the displayed width of unicode strings in a terminal"
category = "dev"
optional = false
python-versions = "*"
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "c27944f25b55067b06883f1cea204be7d97841a4b8228fab69b91895347494ad"
[metadata.files]
atomicwrites = [
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
]
attrs = [
{file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
{file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
]
colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
more-itertools = [
{file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"},
{file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"},
]
packaging = [
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
]
pluggy = [
{file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
{file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
]
py = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
]
pyparsing = [
{file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"},
{file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"},
]
pytest = [
{file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
{file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"},
]
wcwidth = [
{file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
{file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
]
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque ullamcorper id ante eu maximus. Nullam efficitur vehicula ante, in luctus ante sollicitudin iaculis. Donec efficitur porta ante, ut venenatis enim faucibus quis. Sed vitae egestas neque, a tempor tortor. Suspendisse nec maximus mi. Nam iaculis convallis ultricies. Nunc vel tempor magna, in ultrices enim. Duis id tristique nisl. In hac habitasse platea dictumst. Suspendisse at arcu placerat lectus interdum sollicitudin.
\ No newline at end of file
Nam lectus turpis, convallis molestie vestibulum vel, dapibus sit amet neque. Vestibulum nunc velit, gravida at hendrerit quis, molestie sed nisi. Morbi posuere mi a mauris posuere egestas. Integer nulla massa, porttitor et vehicula id, efficitur vitae erat. Donec vel neque eget nibh cursus condimentum. Sed tempor nisl consequat scelerisque tristique. Praesent felis mi, laoreet id egestas ac, ullamcorper ac ligula. Nulla posuere tristique felis, vitae ornare sapien dapibus ac. Morbi tempus elit nec orci suscipit, sed interdum velit pulvinar. Nunc viverra urna ut sollicitudin commodo.
\ No newline at end of file
Nunc cursus non orci nec elementum. Mauris ullamcorper mollis elementum. Morbi bibendum, odio sed congue vehicula, quam turpis accumsan enim, sit amet semper sem mauris eget dolor. Nulla diam diam, suscipit sed pulvinar nec, tristique at diam. Nam vel neque eu purus lacinia efficitur nec at lorem. Pellentesque dictum sem eros, quis ultrices magna congue congue. Aenean id dignissim nulla, quis molestie tortor. Praesent at bibendum eros, nec ornare nisi. Nullam feugiat felis nec aliquet auctor. Praesent blandit enim quis quam porttitor, quis rhoncus nibh mollis. Pellentesque eu purus ullamcorper arcu tempus ultrices. Pellentesque nec auctor turpis. Proin ligula magna, tempus sit amet venenatis ut, fermentum nec nunc. Donec at blandit lectus. Praesent auctor neque a egestas finibus.
\ No newline at end of file
...@@ -16,9 +16,14 @@ class SequenceIterator: ...@@ -16,9 +16,14 @@ class SequenceIterator:
class Sequence: class Sequence:
def __init__(self, id, sequences): def __init__(self, object):
self.id = id # TODO: Extraer id y sequences a partir del object de cualquier forma que se nos ocurra
self.sequences = sequences # ver: https://huggingface.co/docs/datasets/v2.0.0/en/package_reference/loading_methods#datasets.load_dataset
if isinstance(object, str):
self.id = object
else:
self.id = "collection"
self.sequences = ["subcollection_1", "subcollection_2", "subcollection_2"]
def __str__(self): def __str__(self):
return f"id: {self.id}, sequences: {self.sequences}" return f"id: {self.id}, sequences: {self.sequences}"
...@@ -35,20 +40,22 @@ class Sequence: ...@@ -35,20 +40,22 @@ class Sequence:
def __getitem__(self, i): def __getitem__(self, i):
if isinstance(i, str): if isinstance(i, str):
return "Str indexing is not supported yet" # TODO for sequence in self.sequences:
if isinstance(sequence, Sequence):
if sequence.id == i: return sequence
raise ValueError(f"Sequence index '{i}' not found")
elif isinstance(i, int): elif isinstance(i, int):
if i < 0: if i < 0:
i = len(self.sequences) + i i = len(self.sequences) + i
if i >= len(self.sequences): if i >= len(self.sequences):
raise IndexError("Sequence index out of range") raise IndexError(f"Sequence index '{i}' out of range")
else: else:
return self.sequences[i] return self.sequences[i]
else: else: # TODO: Support slices (e.g. [2:4])?
invalid_type = type(i) invalid_type = type(i)
raise TypeError( raise TypeError(
"LockableList indices must be integers or slices, not {}" f"Sequence indices must be integers or strings, not {invalid_type.__name__}"
.format(invalid_type.__name__)
) )
def get_depth(self): def get_depth(self):
...@@ -59,10 +66,14 @@ class Sequence: ...@@ -59,10 +66,14 @@ class Sequence:
# TODO: Move these tests to the ./tests folder # TODO: Move these tests to the ./tests folder
if __name__ == "__main__": if __name__ == "__main__":
seq = Sequence(1, ["doc1", 3, Sequence(4, [2])]) sequence_from_folder = Sequence("../tests/data/doc_1") # File loading
print(seq) sequence_from_file = Sequence("lorem ipsum") # String loading
print(seq[2])
print(seq["a"]) sequence_from_folder.sequences.append(sequence_from_file)
for s in seq:
print(sequence_from_folder)
print(sequence_from_folder[2])
print(sequence_from_folder["lorem ipsum"])
for s in sequence_from_folder:
print(s) print(s)
print(seq[20]) print(sequence_from_folder[20]) # This should fail
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment