From 9c97327926937c5aec8d1f00d03aa20e88812ec0 Mon Sep 17 00:00:00 2001 From: GeoffreyChen777 Date: Fri, 28 Jun 2024 15:25:14 +0100 Subject: [PATCH] feat: fuzzy scraping (#572) --- app/locales/locales/en.GB.json | 4 +- app/locales/locales/zh.CN.json | 4 +- app/locales/locales/zh.TW.json | 4 +- app/main/services/contextmenu-service.ts | 51 ++++---- app/renderer/global.d.ts | 2 + app/renderer/services/paper-service.ts | 22 +--- app/renderer/services/scrape-service.ts | 109 +++++++++++++++++ app/renderer/services/uistate-service.ts | 8 ++ .../candidate-view/candidate-view.vue | 114 ++++++++++++++++++ .../list-view/components/paper-list-item.vue | 24 +++- .../components/list-view/list-view.vue | 7 ++ .../table-view/components/table-item.vue | 24 +++- .../table-view/table-preview-view.vue | 10 ++ .../components/table-view/table-view.vue | 15 ++- .../main-view/data-view/paper-data-view.vue | 12 ++ app/renderer/ui/main-view/main-view.vue | 44 ++++++- 16 files changed, 401 insertions(+), 53 deletions(-) create mode 100644 app/renderer/ui/main-view/candidate-view/candidate-view.vue diff --git a/app/locales/locales/en.GB.json b/app/locales/locales/en.GB.json index 3fc41841..48626974 100644 --- a/app/locales/locales/en.GB.json +++ b/app/locales/locales/en.GB.json @@ -43,7 +43,8 @@ "historyreleasenote": "History Release Note", "feed": "Feed", "feedTime": "Feed Time", - "abstract": "Abstract" + "abstract": "Abstract", + "foundcandidates": "Found Candidates" }, "smartfilter": { "startops": "Start Ops", @@ -73,6 +74,7 @@ "menu": { "rescrape": "Scrape", "rescrapefrom": "Scrape from", + "fuzzyscrape": "Fuzzily Scrape", "removefrom": "Remove from", "delete": "Delete", "edit": "Edit", diff --git a/app/locales/locales/zh.CN.json b/app/locales/locales/zh.CN.json index 91babdbf..3541d6b8 100644 --- a/app/locales/locales/zh.CN.json +++ b/app/locales/locales/zh.CN.json @@ -43,7 +43,8 @@ "historyreleasenote": "历史版本", "feed": "订阅", "feedTime": "更新时间", - "abstract": "摘要" + "abstract": "摘要", + "foundcandidates": "找到候选匹配" }, "smartfilter": { "startops": "起始运算符", @@ -73,6 +74,7 @@ "menu": { "rescrape": "搜寻元数据", "rescrapefrom": "从 ... 搜寻元数据", + "fuzzyscrape": "模糊搜寻元数据", "removefrom": "从 ... 移除", "delete": "删除", "edit": "编辑", diff --git a/app/locales/locales/zh.TW.json b/app/locales/locales/zh.TW.json index b441ffd7..adc91da6 100644 --- a/app/locales/locales/zh.TW.json +++ b/app/locales/locales/zh.TW.json @@ -43,7 +43,8 @@ "historyreleasenote": "歷史版本說明", "feed": "訂閱", "feedTime": "更新時間", - "abstract": "摘要" + "abstract": "摘要", + "foundcandidates": "找到候選匹配" }, "smartfilter": { "startops": "起始運算子", @@ -73,6 +74,7 @@ "menu": { "rescrape": "搜尋元數據", "rescrapefrom": "從 ... 搜尋元數據", + "fuzzyscrape": "模糊搜尋元數據", "removefrom": "從 ... 移除", "delete": "刪除", "edit": "編輯", diff --git a/app/main/services/contextmenu-service.ts b/app/main/services/contextmenu-service.ts index cd3fb6ad..afd4e2df 100644 --- a/app/main/services/contextmenu-service.ts +++ b/app/main/services/contextmenu-service.ts @@ -67,6 +67,7 @@ export interface IContextMenuServiceState { dataContextMenuShowInFinderClicked: number; dataContextMenuEditClicked: number; dataContextMenuScrapeClicked: number; + dataContextMenuFuzzyScrapeClicked: number; dataContextMenuDeleteClicked: number; dataContextMenuFlagClicked: number; dataContextMenuExportBibTexClicked: number; @@ -114,6 +115,7 @@ export class ContextMenuService extends Eventable { dataContextMenuShowInFinderClicked: 0, dataContextMenuEditClicked: 0, dataContextMenuScrapeClicked: 0, + dataContextMenuFuzzyScrapeClicked: 0, dataContextMenuDeleteClicked: 0, dataContextMenuFlagClicked: 0, dataContextMenuExportBibTexClicked: 0, @@ -234,14 +236,6 @@ export class ContextMenuService extends Eventable { }, }, { type: "separator" }, - { - label: this._locales.t("menu.edit"), - enabled: allowEdit, - accelerator: preferenceService.get("shortcutEdit") as string, - click: () => { - this.fire("dataContextMenuEditClicked"); - }, - }, { label: this._locales.t("menu.rescrape"), accelerator: preferenceService.get("shortcutScrape") as string, @@ -249,25 +243,25 @@ export class ContextMenuService extends Eventable { this.fire("dataContextMenuScrapeClicked"); }, }, - { label: this._locales.t("menu.rescrapefrom"), submenu: scraperMenuTemplate, }, { - label: this._locales.t("menu.removefrom"), - submenu: [ - { - label: this._locales.t("mainview.folders"), - submenu: removeFolderMenuTemplate, - }, - { - label: this._locales.t("mainview.tags"), - submenu: removeTagMenuTemplate, - }, - ], + label: this._locales.t("menu.fuzzyscrape"), + click: () => { + this.fire("dataContextMenuFuzzyScrapeClicked"); + }, + }, + { type: "separator" }, + { + label: this._locales.t("menu.edit"), + enabled: allowEdit, + accelerator: preferenceService.get("shortcutEdit") as string, + click: () => { + this.fire("dataContextMenuEditClicked"); + }, }, - { label: this._locales.t("menu.delete"), accelerator: preferenceService.get("shortcutDelete") as string, @@ -282,6 +276,19 @@ export class ContextMenuService extends Eventable { this.fire("dataContextMenuFlagClicked"); }, }, + { + label: this._locales.t("menu.removefrom"), + submenu: [ + { + label: this._locales.t("mainview.folders"), + submenu: removeFolderMenuTemplate, + }, + { + label: this._locales.t("mainview.tags"), + submenu: removeTagMenuTemplate, + }, + ], + }, { type: "separator" }, { label: this._locales.t("menu.export"), @@ -555,7 +562,7 @@ export class ContextMenuService extends Eventable { click: () => { this.fire({ supContextMenuRenameClicked: fileURL }); }, - } + }, ]; const menu = Menu.buildFromTemplate(template); menu.popup(); diff --git a/app/renderer/global.d.ts b/app/renderer/global.d.ts index 44952e84..cbfe406d 100644 --- a/app/renderer/global.d.ts +++ b/app/renderer/global.d.ts @@ -17,6 +17,7 @@ import { ShortcutService } from "@/renderer/services/shortcut-service"; import { SmartFilterService } from "@/renderer/services/smartfilter-service"; import { UISlotService } from "@/renderer/services/uislot-service"; import { UIStateService } from "@/renderer/services/uistate-service"; +import { ScrapeService } from "@/renderer/services/scrape-service"; declare global { var preferenceService: PreferenceService; @@ -39,4 +40,5 @@ declare global { var uiStateService: UIStateService; var uiSlotService: UISlotService; var querySentenceService: QuerySentenceService; + var scrapeService: ScrapeService } diff --git a/app/renderer/services/paper-service.ts b/app/renderer/services/paper-service.ts index abe0fa3d..b796be57 100644 --- a/app/renderer/services/paper-service.ts +++ b/app/renderer/services/paper-service.ts @@ -103,27 +103,17 @@ export class PaperFilterOptions implements IPaperFilterOptions { } static checkIsDateFilter(dateFilter: string) { - return dateFilter.match( - /(<|<=|>|>=)\s*\[\d+ DAYS\]/g - ); + return dateFilter.match(/(<|<=|>|>=)\s*\[\d+ DAYS\]/g); } static parseDateFilter(dateFilter: string) { - const compareDateMatch = dateFilter.match( - /(<|<=|>|>=)\s*\[\d+ DAYS\]/g - ); + const compareDateMatch = dateFilter.match(/(<|<=|>|>=)\s*\[\d+ DAYS\]/g); if (compareDateMatch) { for (const match of compareDateMatch) { if (dateFilter.includes("<")) { - dateFilter = dateFilter.replaceAll( - match, - match.replaceAll("<", ">") - ); + dateFilter = dateFilter.replaceAll(match, match.replaceAll("<", ">")); } else if (dateFilter.includes(">")) { - dateFilter = dateFilter.replaceAll( - match, - match.replaceAll(">", "<") - ); + dateFilter = dateFilter.replaceAll(match, match.replaceAll(">", "<")); } } } @@ -595,7 +585,7 @@ export class PaperService extends Eventable { paperEntity.supURLs = paperEntity.supURLs.map((supURL) => { if (supURL === url) { const realSupURL = supURL.split(":::").pop(); - return `${name}:::${realSupURL}` + return `${name}:::${realSupURL}`; } else { return supURL; } @@ -746,7 +736,7 @@ export class PaperService extends Eventable { if (this._preferenceService.get("allowRoutineMatch") as boolean) { if ( Math.round(Date.now() / 1000) - - (this._preferenceService.get("lastRematchTime") as number) < + (this._preferenceService.get("lastRematchTime") as number) < 7 * 86400 - 10 ) { return; diff --git a/app/renderer/services/scrape-service.ts b/app/renderer/services/scrape-service.ts index 1dce4663..63eacc99 100644 --- a/app/renderer/services/scrape-service.ts +++ b/app/renderer/services/scrape-service.ts @@ -6,6 +6,7 @@ import { ILogService, LogService } from "@/common/services/log-service"; import { PaperEntity } from "@/models/paper-entity"; import { HookService, IHookService } from "@/renderer/services/hook-service"; import { ProcessingKey, processing } from "@/renderer/services/uistate-service"; +import { IPaperEntityCollection } from "@/repositories/db-repository/paper-entity-repository"; export const IScrapeService = createDecorator("scrapeService"); @@ -243,4 +244,112 @@ export class ScrapeService extends Eventable<{}> { return scrapedPaperEntityDrafts; } + + /** + * Scrape a data source's metadata. + * @param payloads - data source payloads. + * @returns List of paper entities' candidates. */ + @processing(ProcessingKey.General) + @errorcatching( + "Failed to fuzzily scrape data source.", + true, + "ScrapeService", + [] + ) + async fuzzyScrape( + paperEntities: IPaperEntityCollection + ): Promise> { + // 0. Wait for scraper extension to be ready. + await this._scrapeExtensionReady(); + + // Do in chunks 10 + const jobID = Math.random().toString(36).substring(7); + const results: Record = {}; + for (let i = 0; i < paperEntities.length; i += 10) { + if (paperEntities.length >= 20) { + this._logService.progress( + `Processing ${i} / ${paperEntities.length}...`, + (i / paperEntities.length) * 100, + true, + "ScrapeService", + jobID + ); + } + try { + let paperEntityChunk = paperEntities.slice(i, i + 10); + + const paperEntityDraftCandidates = await this._fuzzyScrape( + paperEntityChunk + ); + + paperEntityChunk.forEach((p, index) => { + results[`${p._id}`] = paperEntityDraftCandidates[index]; + }); + } catch (e) { + this._logService.error( + "Failed to fuzzily scrape data source.", + `${(e as Error).message} ${(e as Error).stack}`, + true, + "ScrapeService" + ); + } + } + + if (paperEntities.length >= 20) { + this._logService.progress(`Done!`, 100, true, "ScrapeService", jobID); + } + + return results; + } + + /** + * Scrape all entry scrapers to transform data source payloads into a PaperEntity list. + * @param payloads - data source payloads. + * @returns List of paper entities. */ + @processing(ProcessingKey.General) + @errorcatching("Failed to scrape entry.", true, "ScrapeService", []) + async _fuzzyScrape( + paperEntities: IPaperEntityCollection + ): Promise { + if (this._hookService.hasHook("beforeFuzzyScrape")) { + [paperEntities] = await this._hookService.modifyHookPoint( + "beforeFuzzyScrape", + 5000, + paperEntities + ); + } + + let paperEntityDraftCandidates: PaperEntity[][] = []; + if (this._hookService.hasHook("fuzzyScrapeMetadata")) { + paperEntityDraftCandidates = await this._hookService.transformhookPoint< + any[], + Object[] + >( + "fuzzyScrapeMetadata", + 600000, // 10 min + paperEntities + ); + paperEntityDraftCandidates.forEach((p) => { + return p.map((p) => { + return new PaperEntity(p); + }); + }); + } + + if (this._hookService.hasHook("afterScrapeEntry")) { + [paperEntityDraftCandidates] = await this._hookService.modifyHookPoint( + "afterScrapeEntry", + 5000, + paperEntityDraftCandidates + ); + + paperEntityDraftCandidates.forEach((p) => { + return p.map((p) => { + return new PaperEntity(p); + }); + }); + } + + return paperEntityDraftCandidates; + } } diff --git a/app/renderer/services/uistate-service.ts b/app/renderer/services/uistate-service.ts index 6b1f1b14..4121131f 100644 --- a/app/renderer/services/uistate-service.ts +++ b/app/renderer/services/uistate-service.ts @@ -33,6 +33,9 @@ export interface IUIStateServiceState { selectedQuerySentenceIds: string[]; selectedFeed: string; + showingCandidatesId: string; + metadataCandidates: Record; + editingPaperSmartFilter: PaperSmartFilter; querySentencesSidebar: Array; @@ -85,6 +88,9 @@ export class UIStateService extends Eventable { selectedFeed: "feed-all", dragingIds: [], + showingCandidatesId: "", + metadataCandidates: {}, + querySentencesSidebar: [], querySentenceCommandbar: "", @@ -182,6 +188,8 @@ export class UIStateService extends Eventable { feedEditViewShown: false, paperSmartFilterEditViewShown: false, deleteConfirmShown: false, + overlayNoticationShown: false, + candidatesViewShown: false, renderRequired: -1, feedEntityAddingStatus: 0, selectedIndex: [], diff --git a/app/renderer/ui/main-view/candidate-view/candidate-view.vue b/app/renderer/ui/main-view/candidate-view/candidate-view.vue new file mode 100644 index 00000000..f338fe53 --- /dev/null +++ b/app/renderer/ui/main-view/candidate-view/candidate-view.vue @@ -0,0 +1,114 @@ + + + diff --git a/app/renderer/ui/main-view/data-view/components/list-view/components/paper-list-item.vue b/app/renderer/ui/main-view/data-view/components/list-view/components/paper-list-item.vue index f7a7803f..920e7162 100644 --- a/app/renderer/ui/main-view/data-view/components/list-view/components/paper-list-item.vue +++ b/app/renderer/ui/main-view/data-view/components/list-view/components/paper-list-item.vue @@ -8,7 +8,7 @@ import { BIconFileEarmark, BIconGithub, } from "bootstrap-icons-vue"; -import { PropType } from "vue"; +import { PropType, ref } from "vue"; import WordHighlighter from "vue-word-highlighter"; import { getCategorizerString, getPubTypeString } from "@/base/string"; @@ -62,11 +62,18 @@ const props = defineProps({ type: String, default: "", }, + showCandidateBtn: { + type: Boolean, + default: false, + }, }); const renderTitle = (title: string) => { return renderService.renderMath(title); }; + +const emits = defineEmits(["event:click-candidate-btn"]); +