feat(LeVerbe): WIP

parent d208707c
This source diff could not be displayed because it is too large. You can view the blob instead.
import re
from ftfy import fix_encoding
def try_fix_encoding():
with open("./bible_fr.txt", encoding='utf-8') as f:
for line in f.readlines():
print(fix_encoding(line))
def extract_lines():
keywords = [
"word",
"voice",
"breath"
]
with open("./james.txt", "r") as fin:
with open("./data.txt", "w") as fout:
for line in fin.readlines():
if len(line) == 1:
print("Skipping empty line")
continue
print("Line:", line)
if any(re.search("\\b%s\\b" % word, line) for word in keywords):
print("Match: %s" % line)
fout.write(line)
if __name__ == '__main__':
# extract_lines()
try_fix_encoding()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
from datetime import datetime
from glossolalia.loader import load_seeds, load_text
from glossolalia.lstm import LisSansTaMaman
"""
he a mighty hand
"""
def train():
# should_train = True
nb_words = 60
nb_epoch = 60
nb_layers = 128
dropout = .3
validation_split = .3
lstm = LisSansTaMaman(nb_layers, dropout, validation_split, debug=True)
filename_output = "./output/verbe_%i-d%.1f_%s.txt" % (
nb_layers, dropout, datetime.now().strftime("%y%m%d_%H%M"))
seeds = ["Thus", "And", "He", "Our", "I", "Thou", "But"]
corpus = load_text("./data.txt")
print("Corpus:", corpus[:10])
lstm.create_model(corpus[:100]) # FIXME: Scale up to whole corpus
with open(filename_output, "a+") as f:
for i in range(0, nb_epoch, 10):
lstm.fit(epochs=min(i + 10, nb_epoch), initial_epoch=i,
validation_split=validation_split)
for output in lstm.predict_seeds(nb_words, seeds):
print(output)
f.writelines(output)
for i, seed in enumerate(load_seeds(corpus, 5)):
output = lstm.predict(seed, nb_words)
print("%i %s -> %s" % (i, seed, output))
f.writelines(output)
while True:
input_text = input("> ")
text = lstm.predict(input_text, nb_words)
print(text)
f.writelines("%s\n" % text)
if __name__ == '__main__':
train()
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment