Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial idea of a general translation service #903

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
63 changes: 63 additions & 0 deletions extensions/translator/deepl.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
(in-package :lem-translator)

(defclass deepl (service) ())

(defun auth-key ()
(or (lem:config :deepl-auth-key)
(setf (lem:config :deepl-auth-key)
(lem:prompt-for-string "Auth key: "))))

(defun ja-char-p (char)
(cond ((char<= (code-char 12354) ;#\HIRAGANA_LETTER_A
char
(code-char 12435) ;#\HIRAGANA_LETTER_N
))
((char<= (code-char 12450) ;#\KATAKANA_LETTER_A
char
(code-char 12531) ;#\KATAKANA_LETTER_N
))
((or (<= #x4E00
(char-code char)
#x9FFF)
(find char "仝々〆〇ヶ")))))

(defun ja-text-p (text)
(loop :for c :across text
:when (ja-char-p c)
:return t))

(defmethod translate-string ((service deepl) &key from to string)
(let* ((source-lang (or from "EN"))
(target-lang (or to "JA"))
(response
(dex:post "https://api-free.deepl.com/v2/translate"
;;TODO: Use the object key instead of calling the auth-key function
:headers `(("Authorization" . ,(format nil "DeepL-Auth-Key ~A" (auth-key))))
:content `(("text" . ,string)
("target_lang" . ,target-lang)
("source_lang" . ,source-lang)))))
(with-output-to-string (out)
(loop :for tr :in (gethash "translations" (yason:parse response))
:do (fresh-line out)
(write-string (gethash "text" tr) out)))))

(defmethod translate-region ((service deepl) &key from to
region-start region-end
replace)
(declare (ignore from to))
(let* ((source-text (points-to-string region-start region-end))
(is-japanese (ja-text-p source-text))
(translated-text (translate-string
service
:string (ppcre:regex-replace-all "\\s+" source-text " ")
:from (if is-japanese "EN" "JA")
:to (if is-japanese "JA" "EN")))
(buffer (translate-output
(make-buffer "*deepl*")
source-text
translated-text)))
(cond (replace
(delete-between-points region-start region-end)
(insert-string region-start translated-text))
(t
(pop-to-buffer buffer)))))
6 changes: 6 additions & 0 deletions extensions/translator/lem-translator.asd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(defsystem "lem-translator"
:depends-on ("lem")
:serial t
:components ((:file "urlencode")
(:file "translator")
(:file "lingva")))
34 changes: 34 additions & 0 deletions extensions/translator/lingva.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
(in-package :lem-translator)

(defclass lingva (service)
((language-list
:initarg :language-list
:accessor lingva-language-list
:type list)))

(defun %get-lingva-token (root-url)
"Update the translation token."
(let* ((body (dexador:get root-url))
(position (cl-ppcre:scan "buildManifest" body))
(token (subseq body (- position 23) (- position 2))))
token))

;;(setf lin (make-instance 'lingva :api-url
;; "https://translate.plausibility.cloud/_next/data/~a/~a/~a/~a.json")

(defmethod translate-string ((service lingva) &key from to string)
(let* ((root-url (str:split "/" (service-api-url service)))
(token (%get-lingva-token
(concatenate 'String
(first root-url)
"//"
(third root-url)))))
(gethash "translation"
(gethash
"pageProps"
(yason:parse
(dex:get
(format nil (service-api-url service)
token
from to
(do-urlencode:urlencode string))))))))
41 changes: 41 additions & 0 deletions extensions/translator/translator.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
(defpackage :lem-translator
(:use :cl :lem))

(in-package :lem-translator)

(defclass service ()
((auth-key
:initarg :auth-key
:reader service-key
:type (or string null))
(api-url
:initarg :api-url
:accessor service-api-url
:type string)))

(defvar *translation-service* nil)

(defgeneric translate-string (service &key from to string))

(defgeneric translate-region (service &key from to
region-start region-end
replace))

(defgeneric translate-output (output source-text final-text))

(defmethod translate-output ((buffer buffer) source-text final-text)
(with-open-stream (stream (make-buffer-output-stream (buffer-end-point buffer)))
(fresh-line stream)
(format stream "---------- source ----------~%")
(format stream "~A~%" source-text)
(format stream "---------- target ----------~%")
(format stream "~A~2%" final-text)))

(define-command translator-region (start end &optional is-replace) ("r" "P")
(if *translation-service*
(translate-region *translation-service*
:region-start start
:region-end end
:replace is-replace)
(message
"No translation service selected, please fill the variable *lem-translator:translation-service*")))
59 changes: 59 additions & 0 deletions extensions/translator/urlencode.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
(defpackage :do-urlencode
(:nicknames :urlencode)
(:use :cl)
(:import-from :babel :octets-to-string :string-to-octets)
(:import-from :alexandria :if-let :when-let)
(:export :urlencode-malformed-string :urlencode-malformed-string-string
:urlencode :urldecode))


(cl:in-package :urlencode)

(declaim (ftype (function ((unsigned-byte 8)) character) octet-to-ascii))
(defun octet-to-ascii (octet)
(aref (octets-to-string (make-array '(1)
:element-type '(unsigned-byte 8)
:initial-element octet)
:encoding :ASCII) 0))

(declaim (type (array (unsigned-byte 8) (4)) +extra-unreserved-octets+))
(defvar +extra-unreserved-octets+
(make-array '(4) :element-type '(unsigned-byte 8)
:initial-contents #(#x2D #x2E #x5F #x7E)))

(declaim (ftype (function ((unsigned-byte 8)) boolean) unreserved-octet-p))
(defun unreserved-octet-p (o)
(or (<= #x30 o #x39) ; #\0 to #\9
(<= #x41 o #x5A) ; #\A to #\Z
(<= #x61 o #x7A) ; #\a to #\z
(if (find o +extra-unreserved-octets+ :test #'=) t nil)))

(define-condition urlencode-malformed-string (error)
((string :initarg :string :reader urlencode-malformed-string-string))
(:report (lambda (c stream)
(format stream "The string ~s is not a valid urlencoded string."
(urlencode-malformed-string-string c)))))

(declaim (ftype (function (simple-string
&key (:queryp boolean))
simple-string)
urlencode))
(defun urlencode (string &key (queryp nil))
(loop
with octets of-type (simple-array (unsigned-byte 8) (*)) = (string-to-octets string :encoding :UTF-8)
with result = (make-string (* 3 (length octets)))
for o across octets
with i of-type fixnum = 0
do (flet ((push-char (c)
(setf (aref result i) c)
(incf i)))
(cond ((unreserved-octet-p o)
(push-char (octet-to-ascii o)))
((and queryp (= o #x20))
(push-char #\+))
(t (let ((h (digit-char (ash (dpb 0 (byte 4 0) o) -4) 16))
(l (digit-char (dpb 0 (byte 4 4) o) 16)))
(push-char #\%)
(push-char h)
(push-char l)))))
finally (return (subseq result 0 i))))
3 changes: 2 additions & 1 deletion lem.asd
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@
#+sbcl
"lem-elixir-mode"
"lem-documentation-mode"
"lem-elisp-mode"))
"lem-elisp-mode"
"lem-translator"))

(defsystem "lem/legit"
:serial t
Expand Down