feat(seeds): 1-2 words max

parent f8c0fb0f
import os import os
import string
from pprint import pprint from pprint import pprint
from random import choice, randint from random import choice, randint
...@@ -37,21 +36,21 @@ def get_lines(filename): ...@@ -37,21 +36,21 @@ def get_lines(filename):
return all_lines return all_lines
def load_seeds(corpus=None, nb_seeds=10): def load_seeds(corpus=None, nb_seeds=10, min_len=1, max_len=2):
if corpus is None: if corpus is None:
corpus = load_texts() corpus = load_text()
seeds = [] seeds = []
for i in range(nb_seeds): for i in range(nb_seeds):
plain_lines = filter(lambda k: k != "\n", corpus) plain_lines = filter(lambda k: k not in "\n" and len(k) > 2, corpus)
chosen = choice(list(plain_lines)) chosen = choice(list(plain_lines))
split = chosen.split(" ") split = chosen.split(" ")
nb_words = randint(1, len(split)) nb_words = randint(min_len, min(max_len, len(split)))
seeds.append(" ".join(split[:nb_words])) seeds.append(" ".join(split[:nb_words]))
return seeds return seeds
def main(): def main():
lines = load_texts("../") lines = load_text("../KoozDawa/data/genius.txt")
print("Some seeds:") print("Some seeds:")
pprint(load_seeds(lines)) pprint(load_seeds(lines))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment