-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearn_jp.py
90 lines (67 loc) · 2.23 KB
/
learn_jp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from contextlib import closing
from pathlib import Path
import boto3
import deepl
import jaconv
import streamlit as st
from sudachipy import Dictionary
PART_OF_SPEECH = {
"名詞": "noun",
"形容詞": "adjective",
"動詞": "verb",
"助詞": "particle",
"形容動詞": "adjective verb",
"副詞": "adverb",
"助動詞": "auxiliary",
"連体詞": "prenominal adjective",
"代名詞": "pronoun",
"補助記号": "symbol",
"接頭辞": "prefix",
"感動詞": "interjection",
}
polly = boto3.client('polly')
tokenizer = Dictionary().create()
translator = deepl.Translator(os.getenv("DEEPL_API_KEY"))
def do_polly(text: str) -> None:
ssml_text = f'<speak><prosody rate="slow">{text}</prosody></speak>'
result = polly.synthesize_speech(
Text=ssml_text, OutputFormat="mp3", TextType="ssml", VoiceId="Mizuki")
with closing(result["AudioStream"]) as stream:
Path("japanese.mp3").write_bytes(stream.read())
return
def translate(text: str) -> str:
result = translator.translate_text(text, target_lang="EN-US")
return f"{result.text}"
st.title('Learn Japanese🇯🇵 with Python🐍')
st.write("""## Sample japanese text
* すもももももももものうち
* 一月一日は元日、昨日は大晦日
""")
# Input text
text = st.text_input("**Japanese:**")
if text:
en_text = translate(text)
st.write(f"**English text**: {en_text}")
words = {}
for i, token in enumerate(tokenizer.tokenize(text)):
word = token.surface()
part_of_speech = PART_OF_SPEECH.get(token.part_of_speech()[0], token.part_of_speech()[0])
if part_of_speech not in ("symbol", "particle"):
word_en = translate(word)
else:
word_en = ""
reading = token.reading_form()
reading_hiragana = jaconv.kata2hira(reading)
reading_roman = jaconv.kata2alphabet(reading)
words[i] = {
"word": word,
"reading(Katakana)": reading,
"reading(Hiragana)": reading_hiragana,
"reading(Roman)": reading_roman,
"english word": word_en,
"part of speech": part_of_speech
}
st.table(words)
do_polly(text)
st.audio("japanese.mp3")