Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
BabelZoo
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
PLN
BabelZoo
Commits
14c73a05
Unverified
Commit
14c73a05
authored
Nov 23, 2019
by
PLN (Algolia)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor: Extract glossolalia from KoozDawa
parent
29a23ae7
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
72 additions
and
59 deletions
+72
-59
dawa.py
KoozDawa/dawa.py
+66
-0
lyrics.py
KoozDawa/lyrics.py
+1
-0
tweeper.py
KoozDawa/tweeper.py
+1
-1
loader.py
glossolalia/loader.py
+0
-0
lstm.py
glossolalia/lstm.py
+3
-57
tokens.py
glossolalia/tokens.py
+1
-1
No files found.
KoozDawa/dawa.py
0 → 100644
View file @
14c73a05
from
keras.callbacks
import
ModelCheckpoint
,
EarlyStopping
from
glossolalia.loader
import
load_kawa
,
clean_text
,
load_seeds
from
glossolalia.lstm
import
generate_padded_sequences
,
create_model
,
generate_text
from
glossolalia.tokens
import
PoemTokenizer
def
main
():
# should_train = True
# model_file = "../models/dawa_lstm_%i.hd5" % nb_epoch
nb_words
=
20
nb_epoch
=
100
nb_layers
=
128
dropout
=
.
2
tokenizer
=
PoemTokenizer
()
# if should_train:
lines
=
load_kawa
()
corpus
=
[
clean_text
(
x
)
for
x
in
lines
]
print
(
"Corpus:"
,
corpus
[:
10
])
inp_sequences
,
total_words
=
tokenizer
.
get_sequence_of_tokens
(
corpus
)
predictors
,
label
,
max_sequence_len
=
generate_padded_sequences
(
inp_sequences
,
total_words
)
model
=
create_model
(
max_sequence_len
,
total_words
,
layers
=
nb_layers
,
dropout
=
dropout
)
model
.
summary
()
file_path
=
"../models/dawa/dawa_lstm
%
i-d
%.1
f-{epoch:02d}_
%
i-{accuracy:.4f}.hdf5"
%
(
nb_layers
,
dropout
,
nb_epoch
)
checkpoint
=
ModelCheckpoint
(
file_path
,
monitor
=
'accuracy'
,
period
=
10
,
save_best_only
=
True
)
# print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
early_stopping
=
EarlyStopping
(
monitor
=
'accuracy'
,
patience
=
5
)
callbacks_list
=
[
checkpoint
,
early_stopping
]
for
i
in
range
(
0
,
nb_epoch
,
10
):
model
.
fit
(
predictors
,
label
,
initial_epoch
=
i
,
epochs
=
min
(
i
+
10
,
nb_epoch
),
verbose
=
2
,
callbacks
=
callbacks_list
)
print
(
generate_text
(
model
,
tokenizer
,
""
,
nb_words
,
max_sequence_len
))
# model.save(model_file)
# else: # FIXME: Load and predict, maybe reuse checkpoints?
# model = load_model(model_file)
for
i
,
seed
in
enumerate
(
load_seeds
(
lines
,
5
)):
output
=
generate_text
(
model
,
tokenizer
,
seed
,
nb_words
,
max_sequence_len
)
print
(
"
%
i
%
s ->
%
s"
%
(
i
,
seed
,
output
))
with
open
(
"./output/dawa.txt"
,
"a+"
)
as
f
:
while
True
:
input_text
=
input
(
"> "
)
text
=
generate_text
(
model
,
tokenizer
,
input_text
,
nb_words
,
max_sequence_len
)
print
(
text
)
f
.
writelines
(
"
%
s
\n
"
%
text
)
def
debug_unrandomize
():
from
numpy.random
import
seed
from
tensorflow_core.python.framework.random_seed
import
set_random_seed
# set seeds for reproducibility
set_random_seed
(
2
)
seed
(
1
)
if
__name__
==
'__main__'
:
debug_unrandomize
()
main
()
KoozDawa/
dawa/
lyrics.py
→
KoozDawa/lyrics.py
View file @
14c73a05
...
@@ -4,6 +4,7 @@ import lyricsgenius
...
@@ -4,6 +4,7 @@ import lyricsgenius
def
fetch
():
def
fetch
():
genius
=
lyricsgenius
.
Genius
(
"zUSpjfQ9ELXDqOjx9hGfAlJGYQFrNvHh3rlDV298_QSr5ScKf3qlHZtOO2KsXspQ"
)
genius
=
lyricsgenius
.
Genius
(
"zUSpjfQ9ELXDqOjx9hGfAlJGYQFrNvHh3rlDV298_QSr5ScKf3qlHZtOO2KsXspQ"
)
response
=
genius
.
search_artist
(
"Dooz-kawa"
)
response
=
genius
.
search_artist
(
"Dooz-kawa"
)
print
(
response
)
for
hit
in
response
[
"hits"
]:
for
hit
in
response
[
"hits"
]:
print
(
hit
)
print
(
hit
)
...
...
KoozDawa/tweeper.py
View file @
14c73a05
...
@@ -25,7 +25,7 @@ class Tweeper(object):
...
@@ -25,7 +25,7 @@ class Tweeper(object):
def
main
():
def
main
():
Tweeper
()
.
tweet
(
"
un pont de paris sen souvient sur de toi
"
)
Tweeper
()
.
tweet
(
"
les anges se sont fichés
"
)
# Authenticate to Twitter
# Authenticate to Twitter
...
...
KoozDawa/daw
a/loader.py
→
glossolali
a/loader.py
View file @
14c73a05
File moved
KoozDawa/daw
a/lstm.py
→
glossolali
a/lstm.py
View file @
14c73a05
...
@@ -2,15 +2,11 @@ import warnings
...
@@ -2,15 +2,11 @@ import warnings
import
numpy
as
np
import
numpy
as
np
from
keras
import
Sequential
from
keras
import
Sequential
from
keras.callbacks
import
ModelCheckpoint
,
EarlyStopping
from
keras.layers
import
Embedding
,
LSTM
,
Dropout
,
Dense
from
keras.layers
import
Embedding
,
LSTM
,
Dropout
,
Dense
,
Bidirectional
from
keras.utils
import
to_categorical
from
keras.utils
import
to_categorical
from
keras_preprocessing.sequence
import
pad_sequences
from
keras_preprocessing.sequence
import
pad_sequences
from
keras_preprocessing.text
import
Tokenizer
from
keras_preprocessing.text
import
Tokenizer
from
KoozDawa.dawa.loader
import
load_kawa
,
clean_text
,
load_seeds
from
KoozDawa.dawa.tokens
import
PoemTokenizer
warnings
.
filterwarnings
(
"ignore"
)
warnings
.
filterwarnings
(
"ignore"
)
warnings
.
simplefilter
(
action
=
'ignore'
,
category
=
FutureWarning
)
warnings
.
simplefilter
(
action
=
'ignore'
,
category
=
FutureWarning
)
...
@@ -59,55 +55,4 @@ def generate_text(model: Sequential, tokenizer: Tokenizer, seed_text="", nb_word
...
@@ -59,55 +55,4 @@ def generate_text(model: Sequential, tokenizer: Tokenizer, seed_text="", nb_word
token_list
=
pad_sequences
([
token_list
],
maxlen
=
max_sequence_len
-
1
,
padding
=
'pre'
)
token_list
=
pad_sequences
([
token_list
],
maxlen
=
max_sequence_len
-
1
,
padding
=
'pre'
)
predicted
=
model
.
predict_classes
(
token_list
,
verbose
=
2
)[
0
]
predicted
=
model
.
predict_classes
(
token_list
,
verbose
=
2
)[
0
]
output
+=
" "
+
word_indices
[
predicted
]
output
+=
" "
+
word_indices
[
predicted
]
return
output
.
capitalize
()
return
output
.
capitalize
()
\ No newline at end of file
def
main
():
# should_train = True
# model_file = "../models/dawa_lstm_%i.hd5" % nb_epoch
nb_words
=
20
nb_epoch
=
100
nb_layers
=
128
dropout
=
.
2
tokenizer
=
PoemTokenizer
()
# if should_train:
lines
=
load_kawa
()
corpus
=
[
clean_text
(
x
)
for
x
in
lines
]
print
(
"Corpus:"
,
corpus
[:
10
])
inp_sequences
,
total_words
=
tokenizer
.
get_sequence_of_tokens
(
corpus
)
predictors
,
label
,
max_sequence_len
=
generate_padded_sequences
(
inp_sequences
,
total_words
)
model
=
create_model
(
max_sequence_len
,
total_words
,
layers
=
nb_layers
,
dropout
=
dropout
)
model
.
summary
()
file_path
=
"../models/dawa/dawa_lstm
%
i-d
%.1
f-{epoch:02d}_
%
i-{accuracy:.4f}.hdf5"
%
(
nb_layers
,
dropout
,
nb_epoch
)
checkpoint
=
ModelCheckpoint
(
file_path
,
monitor
=
'accuracy'
,
period
=
10
,
save_best_only
=
True
)
# print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
early_stopping
=
EarlyStopping
(
monitor
=
'accuracy'
,
patience
=
5
)
callbacks_list
=
[
checkpoint
,
early_stopping
]
for
i
in
range
(
0
,
nb_epoch
,
10
):
model
.
fit
(
predictors
,
label
,
initial_epoch
=
i
,
epochs
=
min
(
i
+
10
,
nb_epoch
),
verbose
=
2
,
callbacks
=
callbacks_list
)
print
(
generate_text
(
model
,
tokenizer
,
""
,
nb_words
,
max_sequence_len
))
# model.save(model_file)
# else: # FIXME: Load and predict, maybe reuse checkpoints?
# model = load_model(model_file)
for
i
,
seed
in
enumerate
(
load_seeds
(
lines
,
5
)):
output
=
generate_text
(
model
,
tokenizer
,
seed
,
nb_words
,
max_sequence_len
)
print
(
"
%
i
%
s ->
%
s"
%
(
i
,
seed
,
output
))
with
open
(
"./output/dawa.txt"
,
"a+"
)
as
f
:
while
True
:
input_text
=
input
(
"> "
)
text
=
generate_text
(
model
,
tokenizer
,
input_text
,
nb_words
,
max_sequence_len
)
print
(
text
)
f
.
writelines
(
"
%
s
\n
"
%
text
)
if
__name__
==
'__main__'
:
main
()
KoozDawa/daw
a/tokens.py
→
glossolali
a/tokens.py
View file @
14c73a05
from
keras_preprocessing.text
import
Tokenizer
from
keras_preprocessing.text
import
Tokenizer
from
KoozDawa.daw
a.loader
import
load_kawa
from
glossolali
a.loader
import
load_kawa
class
PoemTokenizer
(
Tokenizer
):
class
PoemTokenizer
(
Tokenizer
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment