feat(Kawa): tonekize "

parent 170dc893
import lyricsgenius
def fetch():
genius = lyricsgenius.Genius("zUSpjfQ9ELXDqOjx9hGfAlJGYQFrNvHh3rlDV298_QSr5ScKf3qlHZtOO2KsXspQ")
response = genius.search_artist("Dooz-kawa")
for hit in response["hits"]:
print(hit)
def main():
fetch()
if __name__ == '__main__':
main()
...@@ -5,7 +5,9 @@ from KoozDawa.dawa.loader import load_kawa ...@@ -5,7 +5,9 @@ from KoozDawa.dawa.loader import load_kawa
class PoemTokenizer(Tokenizer): class PoemTokenizer(Tokenizer):
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
super().__init__(lower=False, filters='"#$%&()*+,-/<=>@[\\]^_`{|}~\t\n', oov_token="😢", **kwargs) super().__init__(lower=True, # TODO: Better generalization without?
filters='#$%&()*+/<=>@[\\]^_`{|}~\t\n', oov_token="😢",
**kwargs)
def get_sequence_of_tokens(self, corpus): def get_sequence_of_tokens(self, corpus):
self.fit_on_texts(corpus) self.fit_on_texts(corpus)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment