-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean.go
57 lines (50 loc) · 1.4 KB
/
clean.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package main
import (
"fmt"
"net/http"
"github.com/pkg/errors"
"github.com/thecsw/katya/analysis"
"github.com/thecsw/katya/storage"
)
func cleanTexts(w http.ResponseWriter, r *http.Request) {
source := r.URL.Query().Get("source")
if source == "" {
httpJSON(w, nil, http.StatusBadRequest, errors.New("bad query"))
return
}
sourceObj, err := storage.GetSource(source, true)
if err != nil {
httpJSON(w, nil, http.StatusInternalServerError, errors.Wrap(err, "oops"))
return
}
if sourceObj.Cleaned {
httpJSON(w, "Already cleaned", http.StatusOK, nil)
return
}
texts, err := storage.GetSourcesTexts(sourceObj.ID)
if err != nil {
httpJSON(w, nil, http.StatusInternalServerError, errors.Wrap(err, "oops"))
return
}
newTexts, deleted, err := analysis.CleanTexts(texts)
if err != nil {
httpJSON(w, nil, http.StatusInternalServerError, errors.Wrap(err, "oops"))
return
}
for _, text := range newTexts {
fmt.Printf("[UPDATE] %s", text.URL)
if err := storage.UpdateText(&text); err != nil {
fmt.Println("[ERROR] FAILED", err)
}
fmt.Printf(" [DONE]\n")
}
sourceObj.Cleaned = true
sourcesNumWordsDelta.DecrementInt(sourceObj.Link, deleted)
globalNumWordsDelta.DecrementInt(globalDeltaCacheKey, deleted)
err = storage.UpdateSource(sourceObj)
if err != nil {
httpJSON(w, nil, http.StatusInternalServerError, errors.Wrap(err, "oops"))
return
}
httpJSON(w, "OK", http.StatusOK, nil)
}