Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
api_pirads
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
644258c2
authored
Mar 21, 2023
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Preprocessing bugfix
parent
3b604dad
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
13 deletions
app.py
utils.py
app.py
View file @
644258c2
...
@@ -36,7 +36,7 @@ with open("./classifiers/pirads_model.joblib", "rb") as pickled_file:
...
@@ -36,7 +36,7 @@ with open("./classifiers/pirads_model.joblib", "rb") as pickled_file:
def
_predict
(
text
,
model
,
vectorizer
):
def
_predict
(
text
,
model
,
vectorizer
):
# Preprocess input text
# Preprocess input text
clean_text
=
utils
.
preprocessing_text
(
text
)
clean_text
=
utils
.
preprocessing_text
(
text
)
clean_text
=
utils
.
clear_
b
irads
(
clean_text
)
clean_text
=
utils
.
clear_
p
irads
(
clean_text
)
# Vectorize text
# Vectorize text
X_test
=
vectorizer
.
transform
([
clean_text
])
X_test
=
vectorizer
.
transform
([
clean_text
])
...
...
utils.py
View file @
644258c2
...
@@ -6,7 +6,7 @@ def preprocessing_text(s):
...
@@ -6,7 +6,7 @@ def preprocessing_text(s):
s
=
re
.
sub
(
'
\t
+'
,
' '
,
s
)
s
=
re
.
sub
(
'
\t
+'
,
' '
,
s
)
# Unicode normalization
# Unicode normalization
s
=
re
.
sub
(
r'
BR'
,
r'b
irads'
,
s
)
s
=
re
.
sub
(
r'
PR'
,
r'p
irads'
,
s
)
# string to lower
# string to lower
s
=
s
.
strip
()
.
lower
()
s
=
s
.
strip
()
.
lower
()
...
@@ -14,9 +14,9 @@ def preprocessing_text(s):
...
@@ -14,9 +14,9 @@ def preprocessing_text(s):
s
=
''
.
join
(
c
for
c
in
unicodedata
.
normalize
(
'NFD'
,
s
)
if
unicodedata
.
category
(
c
)
!=
'Mn'
)
s
=
''
.
join
(
c
for
c
in
unicodedata
.
normalize
(
'NFD'
,
s
)
if
unicodedata
.
category
(
c
)
!=
'Mn'
)
# replace synonyms of birads
# replace synonyms of birads
synon
=
[
'
bi rads'
,
'bi-rads'
,
'b-rads'
,
'birads-'
,
'birads -'
,
'bi_rads'
,
'b
irads/'
]
synon
=
[
'
pi rads'
,
'pi-rads'
,
'p-rads'
,
'pirads-'
,
'pirads -'
,
'pi_rads'
,
'p
irads/'
]
for
sy
in
synon
:
for
sy
in
synon
:
s
=
re
.
sub
(
sy
,
r'
b
irads '
,
s
)
s
=
re
.
sub
(
sy
,
r'
p
irads '
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
...
@@ -32,14 +32,14 @@ def preprocessing_text(s):
...
@@ -32,14 +32,14 @@ def preprocessing_text(s):
}
}
for
key
,
value
in
dic_roman
.
items
():
for
key
,
value
in
dic_roman
.
items
():
start
=
'
b
irads '
+
key
start
=
'
p
irads '
+
key
end
=
'
b
irads '
+
value
end
=
'
p
irads '
+
value
s
=
re
.
sub
(
start
,
end
,
s
)
s
=
re
.
sub
(
start
,
end
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
s
=
re
.
sub
(
r'
birads (\d)([a-z])'
,
r'b
irads \1 \2'
,
s
)
s
=
re
.
sub
(
r'
pirads (\d)([a-z])'
,
r'p
irads \1 \2'
,
s
)
s
=
re
.
sub
(
r'
birads (\d) - (\d)'
,
r'birads \1 b
irads \2'
,
s
)
s
=
re
.
sub
(
r'
pirads (\d) - (\d)'
,
r'pirads \1 p
irads \2'
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
...
@@ -53,8 +53,8 @@ def preprocessing_text(s):
...
@@ -53,8 +53,8 @@ def preprocessing_text(s):
s
=
re
.
sub
(
' +'
,
' '
,
s
)
s
=
re
.
sub
(
' +'
,
' '
,
s
)
# s = re.sub(r' b'+str(i)+' ', r'
b
irads '+str(i)+' ', s)
# s = re.sub(r' b'+str(i)+' ', r'
p
irads '+str(i)+' ', s)
# s = re.sub(r' b '+str(i)+' ', r'
b
irads '+str(i)+' ', s)
# s = re.sub(r' b '+str(i)+' ', r'
p
irads '+str(i)+' ', s)
# replace separate numbers e.g.: 4 x 5 . 9 by 4x5.9
# replace separate numbers e.g.: 4 x 5 . 9 by 4x5.9
for
n
in
[
','
,
'x'
,
'.'
]:
for
n
in
[
','
,
'x'
,
'.'
]:
...
@@ -65,9 +65,9 @@ def preprocessing_text(s):
...
@@ -65,9 +65,9 @@ def preprocessing_text(s):
def
clear_
b
irads
(
text
):
def
clear_
p
irads
(
text
):
text
=
re
.
sub
(
r'
b
irads.{1,3}\d{1}[a|b|c]?'
,
''
,
text
)
text
=
re
.
sub
(
r'
p
irads.{1,3}\d{1}[a|b|c]?'
,
''
,
text
)
text
=
re
.
sub
(
r'
b
irads categoria \d{1}'
,
''
,
text
)
text
=
re
.
sub
(
r'
p
irads categoria \d{1}'
,
''
,
text
)
text
=
text
.
replace
(
"( )"
,
""
)
text
=
text
.
replace
(
"( )"
,
""
)
text
=
re
.
sub
(
'
\t
+'
,
' '
,
text
)
text
=
re
.
sub
(
'
\t
+'
,
' '
,
text
)
text
=
re
.
sub
(
' +'
,
' '
,
text
)
text
=
re
.
sub
(
' +'
,
' '
,
text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment