feat(cleaner): MVP on Lucio

parent 8210fef0
import os
from datetime import datetime
from keras.callbacks import ModelCheckpoint, EarlyStopping
from glossolalia.loader import load_seeds, load_text
from glossolalia.lstm import LisSansTaMaman
def train():
# should_train = True
nb_words = 20
nb_epoch = 50
nb_layers = 100
dropout = .2 # TODO finetune layers/dropout
validation_split = 0.2
lstm = LisSansTaMaman(nb_layers, dropout, validation_split, debug=True)
#
filename_model = "../models/buko/buko_lstm%i-d%.1f-{epoch:02d}_%i-{accuracy:.4f}.hdf5" % (
nb_layers, dropout, nb_epoch)
dir = "./output/"
filename_output = dir + "buko_%i-d%.1f_%s.txt" % (
nb_layers, dropout, datetime.now().strftime("%y%m%d_%H%M"))
callbacks_list = [
ModelCheckpoint(filename_model, monitor='val_accuracy', period=10, save_best_only=True),
EarlyStopping(monitor='val_accuracy', patience=5)]
corpus = load_text("./genius.txt")
print("Corpus:", corpus[:10])
lstm.create_model(corpus[:1000])
if not os.path.exists(dir):
os.makedirs(dir)
with open(filename_output, "a+") as f:
for i in range(0, nb_epoch, 10):
lstm.fit(epochs=min(i + 10, nb_epoch), initial_epoch=i,
callbacks=callbacks_list,
validation_split=validation_split)
for output in lstm.predict_seeds(nb_words):
print(output)
f.writelines(output)
for i, seed in enumerate(load_seeds(corpus, 5)):
output = lstm.predict(seed, nb_words)
print("%i %s -> %s" % (i, seed, output))
f.writelines(output)
while True:
input_text = input("> ")
text = lstm.predict(input_text, nb_words)
print(text)
f.writelines("%s\n" % text)
if __name__ == '__main__':
train()
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -10,7 +10,6 @@ def tweet(): ...@@ -10,7 +10,6 @@ def tweet():
# des neiges d'insuline # des neiges d'insuline
# Un jour de l'an commencé sur les autres # Un jour de l'an commencé sur les autres
# une hypothèse qu'engendre la haine n'est qu'une prison vide # une hypothèse qu'engendre la haine n'est qu'une prison vide
# sniff de Caravage rapide
# Relater l'passionnel dans les casseroles d'eau de marécages # Relater l'passionnel dans les casseroles d'eau de marécages
# La nuit c'est le soleil # La nuit c'est le soleil
......
from glossolalia import loader import json
def clean(text): def clean(text):
...@@ -10,5 +10,22 @@ def clean(text): ...@@ -10,5 +10,22 @@ def clean(text):
if __name__ == '__main__': if __name__ == '__main__':
text = loader.load_text("../LeBoulbiNet/data/lyrics.txt") corpus: str = ""
print(text)
with open("../data/Lyrics_LucioBukowski.json") as f:
text = json.load(f)
songs = text["songs"]
for song in songs:
lyrics = song["lyrics"]
if lyrics is not None:
corpus += lyrics + "\n"
corpus = "\n".join([l.replace("’", "\'")
for l in corpus.split("\n")
if l != ""
and l.find("RapGenius France") == -1])
print(corpus)
with open("../data/genius.txt", "w+") as o:
o.writelines(corpus)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment