Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
BabelZoo
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
PLN
BabelZoo
Commits
8210fef0
Unverified
Commit
8210fef0
authored
Nov 27, 2019
by
PLN (Algolia)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat(tokens): Lowercase made optional
parent
e8ebdd6c
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
15 additions
and
9 deletions
+15
-9
dawa.py
KoozDawa/dawa.py
+4
-2
boulbi.py
LeBoulbiNet/boulbi.py
+5
-3
cleaner.py
glossolalia/cleaner.py
+1
-0
lstm.py
glossolalia/lstm.py
+1
-0
tokens.py
glossolalia/tokens.py
+4
-4
No files found.
KoozDawa/dawa.py
View file @
8210fef0
...
...
@@ -8,13 +8,13 @@ from glossolalia.lstm import LisSansTaMaman
def
train
():
# should_train = True
# model_file = "../models/dawa_lstm_%i.hd5" % nb_epoch
nb_words
=
20
nb_epoch
=
100
nb_layers
=
100
dropout
=
.
3
# TODO finetune layers/dropout
validation_split
=
0.2
lstm
=
LisSansTaMaman
(
nb_layers
,
dropout
,
validation_split
,
debug
=
True
)
filename_model
=
"../models/dawa/dawa_lstm
%
i-d
%.1
f-{epoch:02d}_
%
i-{accuracy:.4f}.hdf5"
%
(
nb_layers
,
dropout
,
nb_epoch
)
filename_output
=
"./output/dawa_
%
i-d
%.1
f_
%
s.txt"
%
(
nb_layers
,
dropout
,
datetime
.
now
()
.
strftime
(
"
%
y
%
m
%
d_
%
H
%
M"
))
...
...
@@ -31,7 +31,9 @@ def train():
callbacks
=
callbacks_list
,
validation_split
=
validation_split
)
print
(
lstm
.
predict_seeds
(
nb_words
))
for
output
in
lstm
.
predict_seeds
(
nb_words
):
print
(
output
)
f
.
writelines
(
output
)
for
i
,
seed
in
enumerate
(
load_seeds
(
corpus
,
5
)):
output
=
lstm
.
predict
(
seed
,
nb_words
)
...
...
LeBoulbiNet/boulbi.py
View file @
8210fef0
...
...
@@ -11,10 +11,10 @@ def train():
nb_words
=
20
nb_epoch
=
50
nb_layers
=
64
dropout
=
.
2
# TODO finetune layers/dropout
dropout
=
.
2
# TODO finetune layers/dropout
validation_split
=
0.2
lstm
=
LisSansTaMaman
(
nb_layers
,
dropout
,
validation_split
,
debug
=
True
)
#
filename_model
=
"../models/boulbi/boulbi_lstm
%
i-d
%.1
f-{epoch:02d}_
%
i-{accuracy:.4f}.hdf5"
%
(
nb_layers
,
dropout
,
nb_epoch
)
filename_output
=
"./output/boulbi_
%
i-d
%.1
f_
%
s.txt"
%
(
...
...
@@ -32,7 +32,9 @@ def train():
callbacks
=
callbacks_list
,
validation_split
=
validation_split
)
print
(
lstm
.
predict_seeds
(
nb_words
))
for
output
in
lstm
.
predict_seeds
(
nb_words
):
print
(
output
)
f
.
writelines
(
output
)
for
i
,
seed
in
enumerate
(
load_seeds
(
corpus
,
5
)):
output
=
lstm
.
predict
(
seed
,
nb_words
)
...
...
glossolalia/cleaner.py
View file @
8210fef0
...
...
@@ -2,6 +2,7 @@ from glossolalia import loader
def
clean
(
text
):
# TODO: Remove lines with ???
# Replace literal newlines
# Remove empty lines
# Replace ’ by '
...
...
glossolalia/lstm.py
View file @
8210fef0
...
...
@@ -44,6 +44,7 @@ class LisSansTaMaman(object):
model
.
summary
()
self
.
model
=
model
print
(
"Max sequence length:"
,
self
.
max_sequence_len
)
# TODO: Batch fit? splitting nb_epoch into N step
def
fit
(
self
,
epochs
:
int
,
initial_epoch
:
int
=
0
,
...
...
glossolalia/tokens.py
View file @
8210fef0
...
...
@@ -4,10 +4,10 @@ from glossolalia.loader import load_texts
class
PoemTokenizer
(
Tokenizer
):
def
__init__
(
self
,
**
kwargs
)
->
None
:
super
()
.
__init__
(
lower
=
True
,
# TODO: Better generalization without?
filters
=
'$
%
&
()
*+/<=>@[
\\
]^_`{|}~
\t\n
'
,
oov_token
=
"😢"
,
**
kwargs
)
def
__init__
(
self
,
lower
:
bool
=
True
,
**
kwargs
)
->
None
:
super
()
.
__init__
(
lower
=
lower
,
# TODO: Better generalization without?
filters
=
'$
%
&*+/<=>@[
\\
]^_`{|}~
\t\n
'
,
oov_token
=
"😢"
,
**
kwargs
)
#TODO: keep newlines
def
get_sequence_of_tokens
(
self
,
corpus
):
self
.
fit_on_texts
(
corpus
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment