-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranslate.go
122 lines (107 loc) · 3.17 KB
/
translate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
*
* Copyright 2023 casiusbot authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package main
import (
"log"
"net/http"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/dvaumoron/casiusbot/common"
)
type Translater interface {
Translate(msg string) string
}
func bgAddTranslationFilter(messageSender chan<- common.MultipartMessage, selector string, translater Translater) chan<- linkInfo {
filteringChan := make(chan linkInfo)
go addTranslationFiltering(messageSender, initExtracter(selector), translater, filteringChan)
return filteringChan
}
func addTranslationFiltering(messageSender chan<- common.MultipartMessage, extracter func(linkInfo) string, translater Translater, filteringChan <-chan linkInfo) {
for info := range filteringChan {
messageSender <- common.MultipartMessage{
Message: info.link,
FileName: "translated.txt",
FileData: translater.Translate(extracter(info)),
AllowMerge: true,
}
}
}
func initExtracter(selector string) func(linkInfo) string {
if len(selector) > 3 {
if strings.ToLower(selector[:4]) == "css:" {
return createExtracter(selector[4:])
}
}
return extractDescription
}
func extractDescription(info linkInfo) string {
return info.description
}
func createExtracter(selector string) func(linkInfo) string {
toString := htmlToString
if strings.Contains(selector, "noscript") {
toString = noscriptToString
}
return func(info linkInfo) string {
resp, err := http.Get(info.link)
if err != nil {
log.Println("Failed to retrieved content from link :", err)
return ""
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
log.Println("Failed to parse content from link :", err)
return ""
}
return toString(doc.Find(selector))
}
}
func htmlToString(html *goquery.Selection) string {
notBrLast := false
var buffer strings.Builder
walkselection(html, &buffer, ¬BrLast)
return buffer.String()
}
func noscriptToString(noscript *goquery.Selection) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(noscript.Text()))
if err != nil {
log.Println("Failed to parse content from selection :", err)
return ""
}
notBrLast := false
var buffer strings.Builder
walkselection(doc.Find("body"), &buffer, ¬BrLast)
return buffer.String()
}
func walkselection(parent *goquery.Selection, buffer *strings.Builder, notBrLast *bool) {
parent.Each(func(i int, s *goquery.Selection) {
switch goquery.NodeName(s) {
case "br":
if *notBrLast {
*notBrLast = false
buffer.WriteByte('\n')
}
case "#text":
*notBrLast = true
buffer.WriteString(s.Text())
default:
walkselection(s.Contents(), buffer, notBrLast)
}
})
}