From 93948d7460f3437b003cdbb14872cdf76dcadda6 Mon Sep 17 00:00:00 2001 From: Daniel Yrovas Date: Sun, 13 Aug 2023 12:09:35 +1000 Subject: [PATCH] squash keep rss & web content --- .gitignore | 2 +- internal/api/entry.go | 6 ++- internal/database/migrations.go | 7 +++ internal/locale/translations/en_US.json | 2 + internal/model/entry.go | 1 + internal/reader/processor/processor.go | 11 ++--- internal/reader/rewrite/rewriter.go | 38 ++++++++--------- internal/storage/entry.go | 30 +++++++------ internal/storage/entry_query_builder.go | 3 +- internal/template/templates/views/entry.html | 44 ++++++++++++------- internal/ui/entry_scraper.go | 45 ++++++++++++++++++++ internal/ui/static/js/app.js | 21 +++++++-- internal/ui/ui.go | 1 + 13 files changed, 151 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index f53dc2533d7..268def5864a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ miniflux *.rpm *.deb .idea -.vscode \ No newline at end of file +.vscode diff --git a/internal/api/entry.go b/internal/api/entry.go index e06482a4c1c..cdb8b18814d 100644 --- a/internal/api/entry.go +++ b/internal/api/entry.go @@ -279,7 +279,11 @@ func (h *handler) fetchContent(w http.ResponseWriter, r *http.Request) { return } - json.OK(w, r, map[string]string{"content": entry.Content}) + if err := h.store.UpdateEntryContent(entry); err != nil { + json.ServerError(w, r, err) + } + + json.OK(w, r, map[string]string{"content": entry.Content, "web_content": entry.WebContent}) } func configureFilters(builder *storage.EntryQueryBuilder, r *http.Request) { diff --git a/internal/database/migrations.go b/internal/database/migrations.go index 10975de083a..1ab7b7dd9c5 100644 --- a/internal/database/migrations.go +++ b/internal/database/migrations.go @@ -733,4 +733,11 @@ var migrations = []func(tx *sql.Tx) error{ _, err = tx.Exec(sql) return err }, + func(tx *sql.Tx) (err error) { + sql := ` + ALTER TABLE entries ADD COLUMN web_content text default ''; + ` + _, err = tx.Exec(sql) + return err + }, } diff --git a/internal/locale/translations/en_US.json b/internal/locale/translations/en_US.json index 7b3e2ccf76f..8d3662380fb 100644 --- a/internal/locale/translations/en_US.json +++ b/internal/locale/translations/en_US.json @@ -67,6 +67,8 @@ "entry.save.title": "Save this entry", "entry.save.completed": "Done!", "entry.save.toast.completed": "Entry saved", + "entry.scraper.label.rss": "Show RSS Content", + "entry.scraper.title.rss": "Fetch RSS content", "entry.scraper.label": "Download", "entry.scraper.title": "Fetch original content", "entry.scraper.completed": "Done!", diff --git a/internal/model/entry.go b/internal/model/entry.go index d09bd3ee3a3..57858d87bca 100644 --- a/internal/model/entry.go +++ b/internal/model/entry.go @@ -30,6 +30,7 @@ type Entry struct { CreatedAt time.Time `json:"created_at"` ChangedAt time.Time `json:"changed_at"` Content string `json:"content"` + WebContent string `json:"web_content,omitempty"` Author string `json:"author"` ShareCode string `json:"share_code"` Starred bool `json:"starred"` diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 2deac4cf674..8fba3e4fa60 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -81,14 +81,15 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us logger.Error(`[Processor] Unable to crawl this entry: %q => %v`, entry.URL, scraperErr) } else if content != "" { // We replace the entry content only if the scraper doesn't return any error. - entry.Content = content + // TODO: document change + entry.WebContent = content } } rewrite.Rewriter(url, entry, feed.RewriteRules) // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered. - entry.Content = sanitizer.Sanitize(url, entry.Content) + entry.WebContent = sanitizer.Sanitize(url, entry.WebContent) if entryIsNew { intg, err := store.Integration(feed.UserID) @@ -169,18 +170,18 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) } if content != "" { - entry.Content = content + entry.WebContent = content entry.ReadingTime = calculateReadingTime(content, user) } rewrite.Rewriter(url, entry, entry.Feed.RewriteRules) - entry.Content = sanitizer.Sanitize(url, entry.Content) + entry.WebContent = sanitizer.Sanitize(url, entry.WebContent) return nil } func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string { - var url = entry.URL + url := entry.URL if feed.UrlRewriteRules != "" { parts := customReplaceRuleRegex.FindStringSubmatch(feed.UrlRewriteRules) diff --git a/internal/reader/rewrite/rewriter.go b/internal/reader/rewrite/rewriter.go index c761ef96d9d..e17593c0f4e 100644 --- a/internal/reader/rewrite/rewriter.go +++ b/internal/reader/rewrite/rewriter.go @@ -61,55 +61,55 @@ func parseRules(rulesText string) (rules []rule) { func applyRule(entryURL string, entry *model.Entry, rule rule) { switch rule.name { case "add_image_title": - entry.Content = addImageTitle(entryURL, entry.Content) + entry.WebContent = addImageTitle(entryURL, entry.WebContent) case "add_mailto_subject": - entry.Content = addMailtoSubject(entryURL, entry.Content) + entry.WebContent = addMailtoSubject(entryURL, entry.WebContent) case "add_dynamic_image": - entry.Content = addDynamicImage(entryURL, entry.Content) + entry.WebContent = addDynamicImage(entryURL, entry.WebContent) case "add_youtube_video": - entry.Content = addYoutubeVideo(entryURL, entry.Content) + entry.WebContent = addYoutubeVideo(entryURL, entry.WebContent) case "add_invidious_video": - entry.Content = addInvidiousVideo(entryURL, entry.Content) + entry.WebContent = addInvidiousVideo(entryURL, entry.WebContent) case "add_youtube_video_using_invidious_player": - entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content) + entry.WebContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.WebContent) case "add_youtube_video_from_id": - entry.Content = addYoutubeVideoFromId(entry.Content) + entry.WebContent = addYoutubeVideoFromId(entry.WebContent) case "add_pdf_download_link": - entry.Content = addPDFLink(entryURL, entry.Content) + entry.WebContent = addPDFLink(entryURL, entry.WebContent) case "nl2br": - entry.Content = replaceLineFeeds(entry.Content) + entry.WebContent = replaceLineFeeds(entry.WebContent) case "convert_text_link", "convert_text_links": - entry.Content = replaceTextLinks(entry.Content) + entry.WebContent = replaceTextLinks(entry.WebContent) case "fix_medium_images": - entry.Content = fixMediumImages(entryURL, entry.Content) + entry.WebContent = fixMediumImages(entryURL, entry.WebContent) case "use_noscript_figure_images": - entry.Content = useNoScriptImages(entryURL, entry.Content) + entry.WebContent = useNoScriptImages(entryURL, entry.WebContent) case "replace": // Format: replace("search-term"|"replace-term") if len(rule.args) >= 2 { - entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1]) + entry.WebContent = replaceCustom(entry.WebContent, rule.args[0], rule.args[1]) } else { logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule) } case "remove": // Format: remove("#selector > .element, .another") if len(rule.args) >= 1 { - entry.Content = removeCustom(entry.Content, rule.args[0]) + entry.WebContent = removeCustom(entry.WebContent, rule.args[0]) } else { logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule) } case "add_castopod_episode": - entry.Content = addCastopodEpisode(entryURL, entry.Content) + entry.WebContent = addCastopodEpisode(entryURL, entry.WebContent) case "base64_decode": if len(rule.args) >= 1 { - entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content) + entry.WebContent = applyFuncOnTextContent(entry.WebContent, rule.args[0], decodeBase64Content) } else { - entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content) + entry.WebContent = applyFuncOnTextContent(entry.WebContent, "body", decodeBase64Content) } case "parse_markdown": - entry.Content = parseMarkdown(entry.Content) + entry.WebContent = parseMarkdown(entry.WebContent) case "remove_tables": - entry.Content = removeTables(entry.Content) + entry.WebContent = removeTables(entry.WebContent) case "remove_clickbait": entry.Title = removeClickbait(entry.Title) } diff --git a/internal/storage/entry.go b/internal/storage/entry.go index fd8a011032a..d290ad8fdab 100644 --- a/internal/storage/entry.go +++ b/internal/storage/entry.go @@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error { UPDATE entries SET - content=$1, reading_time=$2 + content=$1, web_content=$2, reading_time=$3 WHERE - id=$3 AND user_id=$4 + id=$4 AND user_id=$5 ` - _, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID) + _, err = tx.Exec(query, entry.Content, entry.WebContent, entry.ReadingTime, entry.ID, entry.UserID) if err != nil { tx.Rollback() return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err) @@ -89,7 +89,7 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error { UPDATE entries SET - document_vectors = setweight(to_tsvector(left(coalesce(title, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce(content, ''), 500000)), 'B') + document_vectors = setweight(to_tsvector(left(coalesce(title, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce(content, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce(web_content, ''), 500000)), 'C') WHERE id=$1 AND user_id=$2 ` @@ -98,7 +98,6 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error { tx.Rollback() return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err) } - return tx.Commit() } @@ -113,6 +112,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error { comments_url, published_at, content, + web_content, author, user_id, feed_id, @@ -133,9 +133,10 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error { $8, $9, $10, + $11, now(), - setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B'), - $11 + setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce($7, ''), 500000)), 'C'), + $12 ) RETURNING id, status @@ -148,13 +149,13 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error { entry.CommentsURL, entry.Date, entry.Content, + entry.WebContent, entry.Author, entry.UserID, entry.FeedID, entry.ReadingTime, pq.Array(removeDuplicates(entry.Tags)), ).Scan(&entry.ID, &entry.Status) - if err != nil { return fmt.Errorf(`store: unable to create entry %q (feed #%d): %v`, entry.URL, entry.FeedID, err) } @@ -183,12 +184,13 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error { url=$2, comments_url=$3, content=$4, - author=$5, - reading_time=$6, - document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B'), - tags=$10 + web_content=$5, + author=$6, + reading_time=$7, + document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B') || setweight(to_tsvector(left(coalesce($5, ''), 500000)), 'C'), + tags=$11 WHERE - user_id=$7 AND feed_id=$8 AND hash=$9 + user_id=$8 AND feed_id=$9 AND hash=$10 RETURNING id ` @@ -198,6 +200,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error { entry.URL, entry.CommentsURL, entry.Content, + entry.WebContent, entry.Author, entry.ReadingTime, entry.UserID, @@ -205,7 +208,6 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error { entry.Hash, pq.Array(removeDuplicates(entry.Tags)), ).Scan(&entry.ID) - if err != nil { return fmt.Errorf(`store: unable to update entry %q: %v`, entry.URL, err) } diff --git a/internal/storage/entry_query_builder.go b/internal/storage/entry_query_builder.go index 70e728935a7..89fa5ffc591 100644 --- a/internal/storage/entry_query_builder.go +++ b/internal/storage/entry_query_builder.go @@ -250,6 +250,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) { e.author, e.share_code, e.content, + e.web_content, e.status, e.starred, e.reading_time, @@ -314,6 +315,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) { &entry.Author, &entry.ShareCode, &entry.Content, + &entry.WebContent, &entry.Status, &entry.Starred, &entry.ReadingTime, @@ -335,7 +337,6 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) { &iconID, &tz, ) - if err != nil { return nil, fmt.Errorf("unable to fetch entry row: %v", err) } diff --git a/internal/template/templates/views/entry.html b/internal/template/templates/views/entry.html index 8b55e17ddd6..db0f8560431 100644 --- a/internal/template/templates/views/entry.html +++ b/internal/template/templates/views/entry.html @@ -78,11 +78,17 @@

  • {{ icon "scraper" }}{{ t "entry.scraper.label" }} + data-fetch-original-content-url="{{ route "fetchOriginal" "entryID" .entry.ID }}" + >{{ icon "scraper" }}{{ if .entry.WebContent }}{{ t "entry.scraper.label.rss" }}{{ else }}{{ t "entry.scraper.label" }}{{ end }}
  • {{ if .entry.CommentsURL }}
  • @@ -186,9 +192,17 @@

    {{ end }} {{end}} {{ if .user }} - {{ noescape (proxyFilter .entry.Content) }} + {{ if .entry.WebContent }} + {{ noescape (proxyFilter .entry.WebContent) }} + {{ else }} + {{ noescape (proxyFilter .entry.Content) }} + {{ end }} {{ else }} - {{ noescape .entry.Content }} + {{ if .entry.WebContent }} + {{ noescape .entry.WebContent }} + {{ else }} + {{ noescape .entry.Content }} + {{ end }} {{ end }} {{ if .entry.Enclosures }} @@ -203,11 +217,11 @@

    data-last-position="{{ .MediaProgression }}" data-save-url="{{ route "saveEnclosureProgression" "enclosureID" .ID }}" > - {{ if (and $.user (mustBeProxyfied "audio")) }} - - {{ else }} - - {{ end }} + {{ if (and $.user (mustBeProxyfied "audio")) }} + + {{ else }} + + {{ end }} {{ else if hasPrefix .MimeType "video/" }} @@ -216,11 +230,11 @@

    data-last-position="{{ .MediaProgression }}" data-save-url="{{ route "saveEnclosureProgression" "enclosureID" .ID }}" > - {{ if (and $.user (mustBeProxyfied "video")) }} - - {{ else }} - - {{ end }} + {{ if (and $.user (mustBeProxyfied "video")) }} + + {{ else }} + + {{ end }} {{ else if hasPrefix .MimeType "image/" }} diff --git a/internal/ui/entry_scraper.go b/internal/ui/entry_scraper.go index b26bf04c4ce..70100c65e57 100644 --- a/internal/ui/entry_scraper.go +++ b/internal/ui/entry_scraper.go @@ -66,5 +66,50 @@ func (h *handler) fetchContent(w http.ResponseWriter, r *http.Request) { readingTime := locale.NewPrinter(user.Language).Plural("entry.estimated_reading_time", entry.ReadingTime, entry.ReadingTime) + json.OK(w, r, map[string]string{"content": proxy.ProxyRewriter(h.router, entry.WebContent), "reading_time": readingTime}) +} + +func (h *handler) fetchOriginal(w http.ResponseWriter, r *http.Request) { + loggedUserID := request.UserID(r) + entryID := request.RouteInt64Param(r, "entryID") + + entryBuilder := h.store.NewEntryQueryBuilder(loggedUserID) + entryBuilder.WithEntryID(entryID) + entryBuilder.WithoutStatus(model.EntryStatusRemoved) + + entry, err := entryBuilder.GetEntry() + if err != nil { + json.ServerError(w, r, err) + return + } + + if entry == nil { + json.NotFound(w, r) + return + } + + user, err := h.store.UserByID(entry.UserID) + if err != nil { + json.ServerError(w, r, err) + } + if user == nil { + json.NotFound(w, r) + } + + feedBuilder := storage.NewFeedQueryBuilder(h.store, loggedUserID) + feedBuilder.WithFeedID(entry.FeedID) + feed, err := feedBuilder.GetFeed() + if err != nil { + json.ServerError(w, r, err) + return + } + + if feed == nil { + json.NotFound(w, r) + return + } + + readingTime := locale.NewPrinter(user.Language).Plural("entry.estimated_reading_time", entry.ReadingTime, entry.ReadingTime) + json.OK(w, r, map[string]string{"content": proxy.ProxyRewriter(h.router, entry.Content), "reading_time": readingTime}) } diff --git a/internal/ui/static/js/app.js b/internal/ui/static/js/app.js index a89da8bd5b8..205a38b85be 100644 --- a/internal/ui/static/js/app.js +++ b/internal/ui/static/js/app.js @@ -326,12 +326,25 @@ function handleFetchOriginalContent() { return; } - let previousInnerHTML = element.innerHTML; - element.innerHTML = '' + element.dataset.labelLoading + ''; + let inner = element.querySelector("span"); + inner.textContent = element.dataset.labelLoading; let request = new RequestBuilder(element.dataset.fetchContentUrl); + let contentType = "web"; + let txt = element.dataset.labelRss; + let title = element.dataset.titleRss; + + if (element.dataset.currentContent === "web") { + request = new RequestBuilder(element.dataset.fetchOriginalContentUrl); + contentType = "rss"; + txt = element.dataset.label; + title = element.dataset.title; + } + request.withCallback((response) => { - element.innerHTML = previousInnerHTML; + inner.textContent = txt; + element.dataset.currentContent = contentType; + element.title = title; response.json().then((data) => { if (data.hasOwnProperty("content") && data.hasOwnProperty("reading_time")) { @@ -689,4 +702,4 @@ function checkShareAPI(title, url) { console.error(err); window.location.reload(); } -} \ No newline at end of file +} diff --git a/internal/ui/ui.go b/internal/ui/ui.go index 3acc32ad5a4..1a73f5e81e8 100644 --- a/internal/ui/ui.go +++ b/internal/ui/ui.go @@ -97,6 +97,7 @@ func Serve(router *mux.Router, store *storage.Storage, pool *worker.Pool) { uiRouter.HandleFunc("/entry/save/{entryID}", handler.saveEntry).Name("saveEntry").Methods(http.MethodPost) uiRouter.HandleFunc("/entry/enclosure/{enclosureID}/save-progression", handler.saveEnclosureProgression).Name("saveEnclosureProgression").Methods(http.MethodPost) uiRouter.HandleFunc("/entry/download/{entryID}", handler.fetchContent).Name("fetchContent").Methods(http.MethodPost) + uiRouter.HandleFunc("/entry/original/{entryID}", handler.fetchOriginal).Name("fetchOriginal").Methods(http.MethodPost) uiRouter.HandleFunc("/proxy/{encodedDigest}/{encodedURL}", handler.mediaProxy).Name("proxy").Methods(http.MethodGet) uiRouter.HandleFunc("/entry/bookmark/{entryID}", handler.toggleBookmark).Name("toggleBookmark").Methods(http.MethodPost)