feat(seeds): 1-2 words max

parent f8c0fb0f
import os
import string
from pprint import pprint
from random import choice, randint
......@@ -37,21 +36,21 @@ def get_lines(filename):
return all_lines
def load_seeds(corpus=None, nb_seeds=10):
def load_seeds(corpus=None, nb_seeds=10, min_len=1, max_len=2):
if corpus is None:
corpus = load_texts()
corpus = load_text()
seeds = []
for i in range(nb_seeds):
plain_lines = filter(lambda k: k != "\n", corpus)
plain_lines = filter(lambda k: k not in "\n" and len(k) > 2, corpus)
chosen = choice(list(plain_lines))
split = chosen.split(" ")
nb_words = randint(1, len(split))
nb_words = randint(min_len, min(max_len, len(split)))
seeds.append(" ".join(split[:nb_words]))
return seeds
def main():
lines = load_texts("../")
lines = load_text("../KoozDawa/data/genius.txt")
print("Some seeds:")
pprint(load_seeds(lines))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment