From 88af4fd2ab652f34b129e13b2748327892679af5 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 00:34:05 +0000 Subject: [PATCH 1/8] refactor: rename scraper --- .gitignore | 2 +- .run/Zilean.DmmScraper.run.xml | 25 ------------------- Zilean.sln | 2 +- .../Benchmarks/PythonParsing.cs | 2 +- .../Zilean.Benchmarks.csproj | 2 +- .../ServiceCollectionExtensions.cs | 6 ++++- .../Features/Bootstrapping/ServiceLifetime.cs | 5 +++- .../Features/Dmm/DmmFileDownloader.cs | 2 +- .../Features/Dmm/DmmPageProcessor.cs | 4 ++- .../Features/Dmm/DmmScraping.cs | 4 ++- .../Features/Dmm/DmmSyncState.cs | 2 +- .../Features/Imdb/ImdbFileDownloader.cs | 2 +- .../Features/Imdb/ImdbFileExtensions.cs | 2 +- .../Features/Imdb/ImdbFileProcessor.cs | 2 +- .../Features/Imdb/ImdbMetadataLoader.cs | 2 +- .../Features/LzString/Decompressor.cs | 2 +- .../Features/LzString/StringBuilderCache.cs | 2 +- .../PythonSupport/ParseTorrentNameService.cs | 4 +-- .../ParseTorrentTitleResponse.cs | 2 +- .../PythonSupport/PyObjectExtensions.cs | 2 +- .../GlobalUsings.cs | 10 ++++---- .../Program.cs | 0 .../Zilean.Scraper.csproj} | 0 tests/Zilean.Tests/GlobalUsings.cs | 4 +-- tests/Zilean.Tests/Zilean.Tests.csproj | 2 +- 25 files changed, 39 insertions(+), 53 deletions(-) delete mode 100644 .run/Zilean.DmmScraper.run.xml rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Bootstrapping/ServiceCollectionExtensions.cs (87%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Bootstrapping/ServiceLifetime.cs (93%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Dmm/DmmFileDownloader.cs (98%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Dmm/DmmPageProcessor.cs (97%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Dmm/DmmScraping.cs (98%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Dmm/DmmSyncState.cs (97%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Imdb/ImdbFileDownloader.cs (98%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Imdb/ImdbFileExtensions.cs (91%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Imdb/ImdbFileProcessor.cs (97%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/Imdb/ImdbMetadataLoader.cs (97%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/LzString/Decompressor.cs (99%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/LzString/StringBuilderCache.cs (94%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/PythonSupport/ParseTorrentNameService.cs (98%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/PythonSupport/ParseTorrentTitleResponse.cs (60%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Features/PythonSupport/PyObjectExtensions.cs (77%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/GlobalUsings.cs (81%) rename src/{Zilean.DmmScraper => Zilean.Scraper}/Program.cs (100%) rename src/{Zilean.DmmScraper/Zilean.DmmScraper.csproj => Zilean.Scraper/Zilean.Scraper.csproj} (100%) diff --git a/.gitignore b/.gitignore index 37c8dad..ef05ff7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ riderModule.iml .DS_Store .idea/ -src/Zilean.DmmScraper/python/ +src/Zilean.Scraper/python/ diff --git a/.run/Zilean.DmmScraper.run.xml b/.run/Zilean.DmmScraper.run.xml deleted file mode 100644 index cabe62a..0000000 --- a/.run/Zilean.DmmScraper.run.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - \ No newline at end of file diff --git a/Zilean.sln b/Zilean.sln index 607bb47..846c93c 100644 --- a/Zilean.sln +++ b/Zilean.sln @@ -31,7 +31,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "github", "github", "{CF4C96 .github\workflows\cicd.yaml = .github\workflows\cicd.yaml EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zilean.DmmScraper", "src\Zilean.DmmScraper\Zilean.DmmScraper.csproj", "{192B767E-7CFA-4F72-A9CA-35BE2CEE46FD}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zilean.Scraper", "src\Zilean.Scraper\Zilean.Scraper.csproj", "{192B767E-7CFA-4F72-A9CA-35BE2CEE46FD}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zilean.Shared", "src\Zilean.Shared\Zilean.Shared.csproj", "{85F3D012-6CA4-43D3-A08F-0A93FA739F46}" EndProject diff --git a/src/Zilean.Benchmarks/Benchmarks/PythonParsing.cs b/src/Zilean.Benchmarks/Benchmarks/PythonParsing.cs index 7c4273c..8fd3c97 100644 --- a/src/Zilean.Benchmarks/Benchmarks/PythonParsing.cs +++ b/src/Zilean.Benchmarks/Benchmarks/PythonParsing.cs @@ -1,4 +1,4 @@ -using Zilean.DmmScraper.Features.PythonSupport; +using Zilean.Scraper.Features.PythonSupport; namespace Zilean.Benchmarks.Benchmarks; diff --git a/src/Zilean.Benchmarks/Zilean.Benchmarks.csproj b/src/Zilean.Benchmarks/Zilean.Benchmarks.csproj index a3a5752..3c0863b 100644 --- a/src/Zilean.Benchmarks/Zilean.Benchmarks.csproj +++ b/src/Zilean.Benchmarks/Zilean.Benchmarks.csproj @@ -13,7 +13,7 @@ - + diff --git a/src/Zilean.DmmScraper/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs similarity index 87% rename from src/Zilean.DmmScraper/Features/Bootstrapping/ServiceCollectionExtensions.cs rename to src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs index 44b1f41..626a242 100644 --- a/src/Zilean.DmmScraper/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -1,4 +1,8 @@ -namespace Zilean.DmmScraper.Features.Bootstrapping; +using Zilean.Scraper.Features.Dmm; +using Zilean.Scraper.Features.Imdb; +using Zilean.Scraper.Features.PythonSupport; + +namespace Zilean.Scraper.Features.Bootstrapping; public static class ServiceCollectionExtensions { diff --git a/src/Zilean.DmmScraper/Features/Bootstrapping/ServiceLifetime.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs similarity index 93% rename from src/Zilean.DmmScraper/Features/Bootstrapping/ServiceLifetime.cs rename to src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs index 8c15c74..c30bb7f 100644 --- a/src/Zilean.DmmScraper/Features/Bootstrapping/ServiceLifetime.cs +++ b/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs @@ -1,4 +1,7 @@ -namespace Zilean.DmmScraper.Features.Bootstrapping; +using Zilean.Scraper.Features.Dmm; +using Zilean.Scraper.Features.Imdb; + +namespace Zilean.Scraper.Features.Bootstrapping; public class ServiceLifetime(ImdbMetadataLoader metadataLoader, DmmScraping dmmScraper, IServiceProvider serviceProvider, ILogger logger, ZileanConfiguration configuration) : IHostedLifecycleService { diff --git a/src/Zilean.DmmScraper/Features/Dmm/DmmFileDownloader.cs b/src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs similarity index 98% rename from src/Zilean.DmmScraper/Features/Dmm/DmmFileDownloader.cs rename to src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs index a31f060..5065e6a 100644 --- a/src/Zilean.DmmScraper/Features/Dmm/DmmFileDownloader.cs +++ b/src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Dmm; +namespace Zilean.Scraper.Features.Dmm; public class DmmFileDownloader(ILogger logger, ZileanConfiguration configuration) { diff --git a/src/Zilean.DmmScraper/Features/Dmm/DmmPageProcessor.cs b/src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs similarity index 97% rename from src/Zilean.DmmScraper/Features/Dmm/DmmPageProcessor.cs rename to src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs index c309f6f..55c60c3 100644 --- a/src/Zilean.DmmScraper/Features/Dmm/DmmPageProcessor.cs +++ b/src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs @@ -1,4 +1,6 @@ -namespace Zilean.DmmScraper.Features.Dmm; +using Zilean.Scraper.Features.LzString; + +namespace Zilean.Scraper.Features.Dmm; public partial class DmmPageProcessor(DmmSyncState state) : IDisposable diff --git a/src/Zilean.DmmScraper/Features/Dmm/DmmScraping.cs b/src/Zilean.Scraper/Features/Dmm/DmmScraping.cs similarity index 98% rename from src/Zilean.DmmScraper/Features/Dmm/DmmScraping.cs rename to src/Zilean.Scraper/Features/Dmm/DmmScraping.cs index 7723626..a85c77c 100644 --- a/src/Zilean.DmmScraper/Features/Dmm/DmmScraping.cs +++ b/src/Zilean.Scraper/Features/Dmm/DmmScraping.cs @@ -1,4 +1,6 @@ -namespace Zilean.DmmScraper.Features.Dmm; +using Zilean.Scraper.Features.PythonSupport; + +namespace Zilean.Scraper.Features.Dmm; public class DmmScraping( DmmSyncState dmmState, diff --git a/src/Zilean.DmmScraper/Features/Dmm/DmmSyncState.cs b/src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs similarity index 97% rename from src/Zilean.DmmScraper/Features/Dmm/DmmSyncState.cs rename to src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs index 8e83fcc..c7937dc 100644 --- a/src/Zilean.DmmScraper/Features/Dmm/DmmSyncState.cs +++ b/src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Dmm; +namespace Zilean.Scraper.Features.Dmm; public class DmmSyncState(ILogger logger, DmmService dmmService) { diff --git a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileDownloader.cs b/src/Zilean.Scraper/Features/Imdb/ImdbFileDownloader.cs similarity index 98% rename from src/Zilean.DmmScraper/Features/Imdb/ImdbFileDownloader.cs rename to src/Zilean.Scraper/Features/Imdb/ImdbFileDownloader.cs index 6d5572b..8a66609 100644 --- a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileDownloader.cs +++ b/src/Zilean.Scraper/Features/Imdb/ImdbFileDownloader.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Imdb; +namespace Zilean.Scraper.Features.Imdb; public class ImdbFileDownloader(ILogger logger) { diff --git a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileExtensions.cs b/src/Zilean.Scraper/Features/Imdb/ImdbFileExtensions.cs similarity index 91% rename from src/Zilean.DmmScraper/Features/Imdb/ImdbFileExtensions.cs rename to src/Zilean.Scraper/Features/Imdb/ImdbFileExtensions.cs index f78a565..d10c7b9 100644 --- a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileExtensions.cs +++ b/src/Zilean.Scraper/Features/Imdb/ImdbFileExtensions.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Imdb; +namespace Zilean.Scraper.Features.Imdb; public static class ImdbFileExtensions { diff --git a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileProcessor.cs b/src/Zilean.Scraper/Features/Imdb/ImdbFileProcessor.cs similarity index 97% rename from src/Zilean.DmmScraper/Features/Imdb/ImdbFileProcessor.cs rename to src/Zilean.Scraper/Features/Imdb/ImdbFileProcessor.cs index 3952dea..26f1c0b 100644 --- a/src/Zilean.DmmScraper/Features/Imdb/ImdbFileProcessor.cs +++ b/src/Zilean.Scraper/Features/Imdb/ImdbFileProcessor.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Imdb; +namespace Zilean.Scraper.Features.Imdb; public class ImdbFileProcessor(ILogger logger, IImdbFileService imdbFileService) { diff --git a/src/Zilean.DmmScraper/Features/Imdb/ImdbMetadataLoader.cs b/src/Zilean.Scraper/Features/Imdb/ImdbMetadataLoader.cs similarity index 97% rename from src/Zilean.DmmScraper/Features/Imdb/ImdbMetadataLoader.cs rename to src/Zilean.Scraper/Features/Imdb/ImdbMetadataLoader.cs index 52c956c..e3dcb52 100644 --- a/src/Zilean.DmmScraper/Features/Imdb/ImdbMetadataLoader.cs +++ b/src/Zilean.Scraper/Features/Imdb/ImdbMetadataLoader.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.Imdb; +namespace Zilean.Scraper.Features.Imdb; public class ImdbMetadataLoader(ImdbFileDownloader downloader, ImdbFileProcessor processor, ILogger logger, ImdbFileService imdbFileService) { diff --git a/src/Zilean.DmmScraper/Features/LzString/Decompressor.cs b/src/Zilean.Scraper/Features/LzString/Decompressor.cs similarity index 99% rename from src/Zilean.DmmScraper/Features/LzString/Decompressor.cs rename to src/Zilean.Scraper/Features/LzString/Decompressor.cs index 93c5b48..29d3067 100644 --- a/src/Zilean.DmmScraper/Features/LzString/Decompressor.cs +++ b/src/Zilean.Scraper/Features/LzString/Decompressor.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.LzString; +namespace Zilean.Scraper.Features.LzString; public class Decompressor { diff --git a/src/Zilean.DmmScraper/Features/LzString/StringBuilderCache.cs b/src/Zilean.Scraper/Features/LzString/StringBuilderCache.cs similarity index 94% rename from src/Zilean.DmmScraper/Features/LzString/StringBuilderCache.cs rename to src/Zilean.Scraper/Features/LzString/StringBuilderCache.cs index 1b63e46..7e40a0d 100644 --- a/src/Zilean.DmmScraper/Features/LzString/StringBuilderCache.cs +++ b/src/Zilean.Scraper/Features/LzString/StringBuilderCache.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.LzString; +namespace Zilean.Scraper.Features.LzString; public static class StringBuilderCache { diff --git a/src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentNameService.cs b/src/Zilean.Scraper/Features/PythonSupport/ParseTorrentNameService.cs similarity index 98% rename from src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentNameService.cs rename to src/Zilean.Scraper/Features/PythonSupport/ParseTorrentNameService.cs index df83718..900aa9f 100644 --- a/src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentNameService.cs +++ b/src/Zilean.Scraper/Features/PythonSupport/ParseTorrentNameService.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.PythonSupport; +namespace Zilean.Scraper.Features.PythonSupport; public class ParseTorrentNameService { @@ -24,7 +24,7 @@ from loguru import logger custom_format = ( "[{time:HH:mm:ss}] | " f"{light_blue}{{level}}{reset} | " - f"{light_green}\"Zilean.DmmScraper.Features.Python.ParseTorrentNameService\"{reset} | " + f"{light_green}\"Zilean.Scraper.Features.Python.ParseTorrentNameService\"{reset} | " "{message}" ) diff --git a/src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentTitleResponse.cs b/src/Zilean.Scraper/Features/PythonSupport/ParseTorrentTitleResponse.cs similarity index 60% rename from src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentTitleResponse.cs rename to src/Zilean.Scraper/Features/PythonSupport/ParseTorrentTitleResponse.cs index 3369afe..91e5dcd 100644 --- a/src/Zilean.DmmScraper/Features/PythonSupport/ParseTorrentTitleResponse.cs +++ b/src/Zilean.Scraper/Features/PythonSupport/ParseTorrentTitleResponse.cs @@ -1,3 +1,3 @@ -namespace Zilean.DmmScraper.Features.PythonSupport; +namespace Zilean.Scraper.Features.PythonSupport; public record ParseTorrentTitleResponse(bool Success, TorrentInfo? Response); diff --git a/src/Zilean.DmmScraper/Features/PythonSupport/PyObjectExtensions.cs b/src/Zilean.Scraper/Features/PythonSupport/PyObjectExtensions.cs similarity index 77% rename from src/Zilean.DmmScraper/Features/PythonSupport/PyObjectExtensions.cs rename to src/Zilean.Scraper/Features/PythonSupport/PyObjectExtensions.cs index ec408cd..32a6f19 100644 --- a/src/Zilean.DmmScraper/Features/PythonSupport/PyObjectExtensions.cs +++ b/src/Zilean.Scraper/Features/PythonSupport/PyObjectExtensions.cs @@ -1,4 +1,4 @@ -namespace Zilean.DmmScraper.Features.PythonSupport; +namespace Zilean.Scraper.Features.PythonSupport; public static class PyObjectExtensions { diff --git a/src/Zilean.DmmScraper/GlobalUsings.cs b/src/Zilean.Scraper/GlobalUsings.cs similarity index 81% rename from src/Zilean.DmmScraper/GlobalUsings.cs rename to src/Zilean.Scraper/GlobalUsings.cs index 7c66065..3136c16 100644 --- a/src/Zilean.DmmScraper/GlobalUsings.cs +++ b/src/Zilean.Scraper/GlobalUsings.cs @@ -21,11 +21,11 @@ global using Zilean.Database; global using Zilean.Database.Bootstrapping; global using Zilean.Database.Services; -global using Zilean.DmmScraper.Features.Bootstrapping; -global using Zilean.DmmScraper.Features.Dmm; -global using Zilean.DmmScraper.Features.Imdb; -global using Zilean.DmmScraper.Features.LzString; -global using Zilean.DmmScraper.Features.PythonSupport; +global using Zilean.Scraper.Features.Bootstrapping; +global using Zilean.Scraper.Features.Dmm; +global using Zilean.Scraper.Features.Imdb; +global using Zilean.Scraper.Features.LzString; +global using Zilean.Scraper.Features.PythonSupport; global using Zilean.Shared.Extensions; global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; diff --git a/src/Zilean.DmmScraper/Program.cs b/src/Zilean.Scraper/Program.cs similarity index 100% rename from src/Zilean.DmmScraper/Program.cs rename to src/Zilean.Scraper/Program.cs diff --git a/src/Zilean.DmmScraper/Zilean.DmmScraper.csproj b/src/Zilean.Scraper/Zilean.Scraper.csproj similarity index 100% rename from src/Zilean.DmmScraper/Zilean.DmmScraper.csproj rename to src/Zilean.Scraper/Zilean.Scraper.csproj diff --git a/tests/Zilean.Tests/GlobalUsings.cs b/tests/Zilean.Tests/GlobalUsings.cs index 0f0348c..ac078aa 100644 --- a/tests/Zilean.Tests/GlobalUsings.cs +++ b/tests/Zilean.Tests/GlobalUsings.cs @@ -4,8 +4,8 @@ global using NSubstitute; global using Testcontainers.PostgreSql; global using Xunit.Abstractions; -global using Zilean.DmmScraper.Features.Imdb; -global using Zilean.DmmScraper.Features.PythonSupport; +global using Zilean.Scraper.Features.Imdb; +global using Zilean.Scraper.Features.PythonSupport; global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; global using Zilean.Shared.Features.Imdb; diff --git a/tests/Zilean.Tests/Zilean.Tests.csproj b/tests/Zilean.Tests/Zilean.Tests.csproj index 57d52cc..24abf99 100644 --- a/tests/Zilean.Tests/Zilean.Tests.csproj +++ b/tests/Zilean.Tests/Zilean.Tests.csproj @@ -26,7 +26,7 @@ - + From 16738410743b6e689aaf03c136d60c578cbabfc3 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 02:10:01 +0000 Subject: [PATCH 2/8] refactor: started before scraper expansion --- Directory.Packages.props | 1 + Dockerfile | 2 +- .../Bootstrapping/BootstrapIndexesService.cs | 14 ++++++++--- .../ServiceCollectionExtensions.cs | 10 ++++---- .../Bootstrapping/WebApplicationExtensions.cs | 2 ++ .../Features/Dmm/DmmSyncJob.cs | 25 ------------------- .../Features/Dmm/DmmSyncOnDemandState.cs | 6 ----- .../SearchEndpoints.cs} | 16 ++++++------ .../SearchFilteredRequest.cs} | 4 +-- .../Features/Sync/SyncJob.cs | 22 ++++++++++++++++ .../Features/Sync/SyncOnDemandState.cs | 6 +++++ src/Zilean.ApiService/GlobalUsings.cs | 2 +- .../ZileanDbContextModelSnapshot.cs | 6 ++--- .../Services/ITorrentInfoService.cs | 1 + src/Zilean.Scraper/Zilean.Scraper.csproj | 2 +- 15 files changed, 64 insertions(+), 55 deletions(-) delete mode 100644 src/Zilean.ApiService/Features/Dmm/DmmSyncJob.cs delete mode 100644 src/Zilean.ApiService/Features/Dmm/DmmSyncOnDemandState.cs rename src/Zilean.ApiService/Features/{Dmm/DmmEndpoints.cs => Search/SearchEndpoints.cs} (89%) rename src/Zilean.ApiService/Features/{Dmm/DmmFilteredRequest.cs => Search/SearchFilteredRequest.cs} (77%) create mode 100644 src/Zilean.ApiService/Features/Sync/SyncJob.cs create mode 100644 src/Zilean.ApiService/Features/Sync/SyncOnDemandState.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index 710f79e..0256b01 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -28,6 +28,7 @@ + diff --git a/Dockerfile b/Dockerfile index 6a94194..20e875a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY . . RUN dotnet restore -a $TARGETARCH WORKDIR /build/src/Zilean.ApiService RUN dotnet publish -c Release --no-restore -a $TARGETARCH -o /app/out -WORKDIR /build/src/Zilean.DmmScraper +WORKDIR /build/src/Zilean.Scraper RUN dotnet publish -c Release --no-restore -a $TARGETARCH -o /app/out # Run Stage diff --git a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs b/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs index b4ef67e..30baaf5 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs @@ -1,4 +1,4 @@ -using Zilean.Database; +using Zilean.ApiService.Features.Sync; namespace Zilean.ApiService.Features.Bootstrapping; @@ -28,10 +28,16 @@ public async Task StartingAsync(CancellationToken cancellationToken) public async Task StartedAsync(CancellationToken cancellationToken) { - if (configuration.Dmm.EnableScraping && DmmSyncJob.ShouldRunOnStartup()) + if (configuration.Dmm.EnableScraping) { - var dmmJob = new DmmSyncJob(executionService, loggerFactory.CreateLogger()); - await dmmJob.Invoke(); + await using var asyncScope = serviceProvider.CreateAsyncScope(); + var infoService = asyncScope.ServiceProvider.GetRequiredService(); + var dmmJob = new SyncJob(executionService, loggerFactory.CreateLogger(), infoService); + var shouldRun = await dmmJob.ShouldRunOnStartup(); + if (shouldRun) + { + await dmmJob.Invoke(); + } } } } diff --git a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs index dbbdb87..a3e4a51 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -1,4 +1,4 @@ -using Zilean.Database; +using Zilean.ApiService.Features.Sync; namespace Zilean.ApiService.Features.Bootstrapping; @@ -18,8 +18,8 @@ public static IServiceCollection ConditionallyRegisterDmmJob(this IServiceCollec { if (configuration.Dmm.EnableScraping) { - services.AddTransient(); - services.AddSingleton(); + services.AddTransient(); + services.AddSingleton(); } return services; @@ -31,9 +31,9 @@ public static IServiceProvider SetupScheduling(this IServiceProvider provider, Z { if (configuration.Dmm.EnableScraping) { - scheduler.Schedule() + scheduler.Schedule() .Cron(configuration.Dmm.ScrapeSchedule) - .PreventOverlapping(nameof(DmmSyncJob)); + .PreventOverlapping(nameof(SyncJob)); } }) .LogScheduledTaskProgress(); diff --git a/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs b/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs index 34d4214..6a2b22a 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs @@ -1,3 +1,5 @@ +using Zilean.ApiService.Features.Search; + namespace Zilean.ApiService.Features.Bootstrapping; public static class WebApplicationExtensions diff --git a/src/Zilean.ApiService/Features/Dmm/DmmSyncJob.cs b/src/Zilean.ApiService/Features/Dmm/DmmSyncJob.cs deleted file mode 100644 index 97b4752..0000000 --- a/src/Zilean.ApiService/Features/Dmm/DmmSyncJob.cs +++ /dev/null @@ -1,25 +0,0 @@ -namespace Zilean.ApiService.Features.Dmm; - -public class DmmSyncJob(IShellExecutionService shellExecutionService, ILogger logger) : IInvocable, ICancellableInvocable -{ - public static string ParsedPagesFile => Path.Combine(AppContext.BaseDirectory, "data", "parsedPages.json"); - - public CancellationToken CancellationToken { get; set; } - public CancellationToken Token { get; set; } - - public async Task Invoke() - { - logger.LogInformation("DmmSyncJob started"); - - await shellExecutionService.ExecuteCommand(new ShellCommandOptions - { - Command = Path.Combine(AppContext.BaseDirectory, "dmmscraper"), - ShowOutput = true, - CancellationToken = CancellationToken - }); - - logger.LogInformation("DmmSyncJob completed"); - } - - public static bool ShouldRunOnStartup() => !File.Exists(ParsedPagesFile); -} diff --git a/src/Zilean.ApiService/Features/Dmm/DmmSyncOnDemandState.cs b/src/Zilean.ApiService/Features/Dmm/DmmSyncOnDemandState.cs deleted file mode 100644 index 61606f6..0000000 --- a/src/Zilean.ApiService/Features/Dmm/DmmSyncOnDemandState.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Zilean.ApiService.Features.Dmm; - -public class DmmSyncOnDemandState -{ - public bool IsRunning { get; set; } -} diff --git a/src/Zilean.ApiService/Features/Dmm/DmmEndpoints.cs b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs similarity index 89% rename from src/Zilean.ApiService/Features/Dmm/DmmEndpoints.cs rename to src/Zilean.ApiService/Features/Search/SearchEndpoints.cs index 7c92684..3fb4d17 100644 --- a/src/Zilean.ApiService/Features/Dmm/DmmEndpoints.cs +++ b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs @@ -1,6 +1,8 @@ -namespace Zilean.ApiService.Features.Dmm; +using Zilean.ApiService.Features.Sync; -public static class DmmEndpoints +namespace Zilean.ApiService.Features.Search; + +public static class SearchEndpoints { private const string GroupName = "dmm"; private const string Search = "/search"; @@ -34,7 +36,7 @@ private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) return group; } - private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, DmmSyncOnDemandState state) + private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, SyncOnDemandState state) { if (state.IsRunning) { @@ -44,7 +46,7 @@ private static async Task PerformOnDemandScrape(HttpContext context, ILogger> PerformSearch(HttpContext context, } } - private static async Task> PerformFilteredSearch(HttpContext context, ITorrentInfoService torrentInfoService, ZileanConfiguration configuration, ILogger logger, [AsParameters] DmmFilteredRequest request) + private static async Task> PerformFilteredSearch(HttpContext context, ITorrentInfoService torrentInfoService, ZileanConfiguration configuration, ILogger logger, [AsParameters] SearchFilteredRequest request) { try diff --git a/src/Zilean.ApiService/Features/Dmm/DmmFilteredRequest.cs b/src/Zilean.ApiService/Features/Search/SearchFilteredRequest.cs similarity index 77% rename from src/Zilean.ApiService/Features/Dmm/DmmFilteredRequest.cs rename to src/Zilean.ApiService/Features/Search/SearchFilteredRequest.cs index 63edb7e..e74ebc2 100644 --- a/src/Zilean.ApiService/Features/Dmm/DmmFilteredRequest.cs +++ b/src/Zilean.ApiService/Features/Search/SearchFilteredRequest.cs @@ -1,6 +1,6 @@ -namespace Zilean.ApiService.Features.Dmm; +namespace Zilean.ApiService.Features.Search; -public class DmmFilteredRequest +public class SearchFilteredRequest { public string? Query { get; init; } public int? Season { get; init; } diff --git a/src/Zilean.ApiService/Features/Sync/SyncJob.cs b/src/Zilean.ApiService/Features/Sync/SyncJob.cs new file mode 100644 index 0000000..60402da --- /dev/null +++ b/src/Zilean.ApiService/Features/Sync/SyncJob.cs @@ -0,0 +1,22 @@ +namespace Zilean.ApiService.Features.Sync; + +public class SyncJob(IShellExecutionService shellExecutionService, ILogger logger, ITorrentInfoService infoService) : IInvocable, ICancellableInvocable +{ + public CancellationToken CancellationToken { get; set; } + + public async Task Invoke() + { + logger.LogInformation("SyncJob started"); + + await shellExecutionService.ExecuteCommand(new ShellCommandOptions + { + Command = Path.Combine(AppContext.BaseDirectory, "scraper"), + ShowOutput = true, + CancellationToken = CancellationToken + }); + + logger.LogInformation("SyncJob completed"); + } + + public Task ShouldRunOnStartup() => infoService.HasParsedPages(); +} diff --git a/src/Zilean.ApiService/Features/Sync/SyncOnDemandState.cs b/src/Zilean.ApiService/Features/Sync/SyncOnDemandState.cs new file mode 100644 index 0000000..3d62875 --- /dev/null +++ b/src/Zilean.ApiService/Features/Sync/SyncOnDemandState.cs @@ -0,0 +1,6 @@ +namespace Zilean.ApiService.Features.Sync; + +public class SyncOnDemandState +{ + public bool IsRunning { get; set; } +} diff --git a/src/Zilean.ApiService/GlobalUsings.cs b/src/Zilean.ApiService/GlobalUsings.cs index d18e561..bc3c6c7 100644 --- a/src/Zilean.ApiService/GlobalUsings.cs +++ b/src/Zilean.ApiService/GlobalUsings.cs @@ -16,10 +16,10 @@ global using Scalar.AspNetCore; global using SimCube.Aspire.Features.Otlp; global using Zilean.ApiService.Features.Bootstrapping; -global using Zilean.ApiService.Features.Dmm; global using Zilean.ApiService.Features.HealthChecks; global using Zilean.ApiService.Features.Imdb; global using Zilean.ApiService.Features.Torznab; +global using Zilean.Database; global using Zilean.Database.Services; global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; diff --git a/src/Zilean.Database/Migrations/ZileanDbContextModelSnapshot.cs b/src/Zilean.Database/Migrations/ZileanDbContextModelSnapshot.cs index a65aa26..3fb782c 100644 --- a/src/Zilean.Database/Migrations/ZileanDbContextModelSnapshot.cs +++ b/src/Zilean.Database/Migrations/ZileanDbContextModelSnapshot.cs @@ -23,7 +23,7 @@ protected override void BuildModel(ModelBuilder modelBuilder) NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); - modelBuilder.Entity("Zilean.Shared.Features.Dmm.ParsedPages", b => + modelBuilder.Entity("Zilean.Shared.Features.Search.ParsedPages", b => { b.Property("Page") .HasColumnType("text"); @@ -36,7 +36,7 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.ToTable("ParsedPages", (string)null); }); - modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + modelBuilder.Entity("Zilean.Shared.Features.Search.TorrentInfo", b => { b.Property("InfoHash") .HasColumnType("text") @@ -295,7 +295,7 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.ToTable("ImportMetadata"); }); - modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + modelBuilder.Entity("Zilean.Shared.Features.Search.TorrentInfo", b => { b.HasOne("Zilean.Shared.Features.Imdb.ImdbFile", "Imdb") .WithMany() diff --git a/src/Zilean.Database/Services/ITorrentInfoService.cs b/src/Zilean.Database/Services/ITorrentInfoService.cs index 59af965..c5765f7 100644 --- a/src/Zilean.Database/Services/ITorrentInfoService.cs +++ b/src/Zilean.Database/Services/ITorrentInfoService.cs @@ -5,4 +5,5 @@ public interface ITorrentInfoService Task StoreTorrentInfo(List torrents, int batchSize = 10000); Task SearchForTorrentInfoByOnlyTitle(string query); Task SearchForTorrentInfoFiltered(TorrentInfoFilter filter, int? limit = null); + Task HasParsedPages(); } diff --git a/src/Zilean.Scraper/Zilean.Scraper.csproj b/src/Zilean.Scraper/Zilean.Scraper.csproj index 6960dec..feef314 100644 --- a/src/Zilean.Scraper/Zilean.Scraper.csproj +++ b/src/Zilean.Scraper/Zilean.Scraper.csproj @@ -4,7 +4,7 @@ Exe enable enable - dmmscraper + scraper false true From 79e12e155eec9fb2f541dbe841689eb02e477b0c Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 04:17:47 +0000 Subject: [PATCH 3/8] feat: Streaming endpoint --- Directory.Packages.props | 2 +- Zilean.sln.DotSettings.user | 1 + .../Bootstrapping/BootstrapIndexesService.cs | 6 +- .../ServiceCollectionExtensions.cs | 2 - .../Bootstrapping/WebApplicationExtensions.cs | 3 +- .../Features/Search/SearchEndpoints.cs | 6 +- .../Features/Sync/SyncJob.cs | 5 +- .../Features/Torrents/TorrentsEndpoints.cs | 77 +++++++++++++++ src/Zilean.ApiService/GlobalUsings.cs | 6 ++ .../Properties/launchSettings.json | 3 +- .../Services/ITorrentInfoService.cs | 1 - .../Features/Dmm/ZileanScraper.cs | 96 +++++++++++++++++++ src/Zilean.Scraper/GlobalUsings.cs | 2 + .../Configuration/TorrentsConfiguration.cs | 6 ++ .../Configuration/ZileanConfiguration.cs | 1 + .../Features/Scraping/StreamedEntry.cs | 15 +++ 16 files changed, 215 insertions(+), 17 deletions(-) create mode 100644 src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs create mode 100644 src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs create mode 100644 src/Zilean.Shared/Features/Configuration/TorrentsConfiguration.cs create mode 100644 src/Zilean.Shared/Features/Scraping/StreamedEntry.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index 0256b01..eb71a6a 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -29,7 +29,7 @@ - + all diff --git a/Zilean.sln.DotSettings.user b/Zilean.sln.DotSettings.user index 55723bc..9b2cb1e 100644 --- a/Zilean.sln.DotSettings.user +++ b/Zilean.sln.DotSettings.user @@ -3,6 +3,7 @@ ForceIncluded ForceIncluded ForceIncluded + ForceIncluded ForceIncluded ForceIncluded diff --git a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs b/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs index 30baaf5..0c0956e 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs @@ -1,5 +1,3 @@ -using Zilean.ApiService.Features.Sync; - namespace Zilean.ApiService.Features.Bootstrapping; public class BootstrapIndexesService( @@ -31,8 +29,8 @@ public async Task StartedAsync(CancellationToken cancellationToken) if (configuration.Dmm.EnableScraping) { await using var asyncScope = serviceProvider.CreateAsyncScope(); - var infoService = asyncScope.ServiceProvider.GetRequiredService(); - var dmmJob = new SyncJob(executionService, loggerFactory.CreateLogger(), infoService); + var dbContext = asyncScope.ServiceProvider.GetRequiredService(); + var dmmJob = new SyncJob(executionService, loggerFactory.CreateLogger(), dbContext); var shouldRun = await dmmJob.ShouldRunOnStartup(); if (shouldRun) { diff --git a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs index a3e4a51..0a8eee4 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -1,5 +1,3 @@ -using Zilean.ApiService.Features.Sync; - namespace Zilean.ApiService.Features.Bootstrapping; [ExcludeFromCodeCoverage] diff --git a/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs b/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs index 6a2b22a..3c33ad7 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/WebApplicationExtensions.cs @@ -1,5 +1,3 @@ -using Zilean.ApiService.Features.Search; - namespace Zilean.ApiService.Features.Bootstrapping; public static class WebApplicationExtensions @@ -17,5 +15,6 @@ public static WebApplication MapZileanEndpoints(this WebApplication app, ZileanC .MapDmmEndpoints(configuration) .MapImdbEndpoints(configuration) .MapTorznabEndpoints(configuration) + .MapTorrentsEndpoints(configuration) .MapHealthCheckEndpoints(); } diff --git a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs index 3fb4d17..2086de1 100644 --- a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs +++ b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs @@ -1,5 +1,3 @@ -using Zilean.ApiService.Features.Sync; - namespace Zilean.ApiService.Features.Search; public static class SearchEndpoints @@ -36,7 +34,7 @@ private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) return group; } - private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, SyncOnDemandState state) + private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, SyncOnDemandState state, ZileanDbContext dbContext) { if (state.IsRunning) { @@ -54,7 +52,7 @@ private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, ITorrentInfoService infoService) : IInvocable, ICancellableInvocable +public class SyncJob(IShellExecutionService shellExecutionService, ILogger logger, ZileanDbContext dbContext) : IInvocable, ICancellableInvocable { public CancellationToken CancellationToken { get; set; } @@ -18,5 +18,6 @@ await shellExecutionService.ExecuteCommand(new ShellCommandOptions logger.LogInformation("SyncJob completed"); } - public Task ShouldRunOnStartup() => infoService.HasParsedPages(); + // ReSharper disable once MethodSupportsCancellation + public Task ShouldRunOnStartup() => dbContext.ParsedPages.AnyAsync(); } diff --git a/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs b/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs new file mode 100644 index 0000000..5b95818 --- /dev/null +++ b/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs @@ -0,0 +1,77 @@ +namespace Zilean.ApiService.Features.Torrents; + +public static class TorrentsEndpoints +{ + private const string GroupName = "torrents"; + private const string Scrape = "/scrape"; + + public static WebApplication MapTorrentsEndpoints(this WebApplication app, ZileanConfiguration configuration) + { + if (configuration.Torrents.EnableEndpoint) + { + app.MapGroup(GroupName) + .WithTags(GroupName) + .Torrents() + .DisableAntiforgery() + .AllowAnonymous(); + } + + return app; + } + + private static RouteGroupBuilder Torrents(this RouteGroupBuilder group) + { + group.MapGet(Scrape, StreamTorrents); + + return group; + } + + private static async Task StreamTorrents(HttpContext context, ZileanDbContext dbContext, ILogger logger) + { + var sw = Stopwatch.StartNew(); + logger.LogInformation("Starting to stream torrents to client: {Client}", context.Connection.RemoteIpAddress); + + try + { + var response = context.Response; + response.ContentType = "application/json"; + await using var writer = new Utf8JsonWriter(response.Body); + + await response.Body.WriteAsync("["u8.ToArray()); + + var firstItem = true; + + await foreach (var item in dbContext.Torrents + .Select(record => new StreamedEntry + { + Name = record.RawTitle, + InfoHash = record.InfoHash, + Size = long.Parse(record.Size), + }) + .AsAsyncEnumerable() + .WithCancellation(context.RequestAborted)) + { + if (!firstItem) + { + await response.Body.WriteAsync(","u8.ToArray()); + } + + firstItem = false; + + await JsonSerializer.SerializeAsync(response.Body, item); + await writer.FlushAsync(); + } + + await response.Body.WriteAsync("]"u8.ToArray()); + + logger.LogInformation("Finished streaming torrents to client: {Client} in {Elapsed}s", + context.Connection.RemoteIpAddress, sw.Elapsed.TotalSeconds); + } + catch (Exception ex) + { + logger.LogError(ex, "Error while streaming torrents to client: {Client}", context.Connection.RemoteIpAddress); + } + } + + private abstract class StreamLogger; +} diff --git a/src/Zilean.ApiService/GlobalUsings.cs b/src/Zilean.ApiService/GlobalUsings.cs index bc3c6c7..17d1e22 100644 --- a/src/Zilean.ApiService/GlobalUsings.cs +++ b/src/Zilean.ApiService/GlobalUsings.cs @@ -1,8 +1,10 @@ // Global using directives +global using System.Diagnostics; global using System.Diagnostics.CodeAnalysis; global using System.Globalization; global using System.Reflection; +global using System.Text.Json; global using System.Xml.Serialization; global using Coravel; global using Coravel.Invocable; @@ -18,11 +20,15 @@ global using Zilean.ApiService.Features.Bootstrapping; global using Zilean.ApiService.Features.HealthChecks; global using Zilean.ApiService.Features.Imdb; +global using Zilean.ApiService.Features.Search; +global using Zilean.ApiService.Features.Sync; +global using Zilean.ApiService.Features.Torrents; global using Zilean.ApiService.Features.Torznab; global using Zilean.Database; global using Zilean.Database.Services; global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; +global using Zilean.Shared.Features.Scraping; global using Zilean.Shared.Features.Shell; global using Zilean.Shared.Features.Torznab; global using Zilean.Shared.Features.Torznab.Categories; diff --git a/src/Zilean.ApiService/Properties/launchSettings.json b/src/Zilean.ApiService/Properties/launchSettings.json index 72baac7..169b92c 100644 --- a/src/Zilean.ApiService/Properties/launchSettings.json +++ b/src/Zilean.ApiService/Properties/launchSettings.json @@ -5,7 +5,8 @@ "commandName": "Project", "environmentVariables": { "ASPNETCORE_ENVIRONMENT": "Development", - "ASPNETCORE_URLS": "http://localhost:8181" + "ASPNETCORE_URLS": "http://+:8181", + "Zilean__Torrents__EnableEndpoint": "true" } } } diff --git a/src/Zilean.Database/Services/ITorrentInfoService.cs b/src/Zilean.Database/Services/ITorrentInfoService.cs index c5765f7..59af965 100644 --- a/src/Zilean.Database/Services/ITorrentInfoService.cs +++ b/src/Zilean.Database/Services/ITorrentInfoService.cs @@ -5,5 +5,4 @@ public interface ITorrentInfoService Task StoreTorrentInfo(List torrents, int batchSize = 10000); Task SearchForTorrentInfoByOnlyTitle(string query); Task SearchForTorrentInfoFiltered(TorrentInfoFilter filter, int? limit = null); - Task HasParsedPages(); } diff --git a/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs b/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs new file mode 100644 index 0000000..cef137f --- /dev/null +++ b/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs @@ -0,0 +1,96 @@ +namespace Zilean.Scraper.Features.Dmm; + +public class ZileanScraper +{ + private readonly HttpClient _httpClient; + private readonly ILogger _logger; + + public ZileanScraper(IHttpClientFactory clientFactory, string endpoint, ILoggerFactory loggerFactory) + { + _httpClient = clientFactory.CreateClient(); + _httpClient.BaseAddress = new Uri(endpoint); + _logger = loggerFactory.CreateLogger(); + } + + public async Task ProcessTorrentsAsync(string url, int batchSize = 1000, int maxChannelCapacity = 5000) + { + var channel = Channel.CreateBounded>(new BoundedChannelOptions(maxChannelCapacity) + { + SingleReader = true, + SingleWriter = false, + FullMode = BoundedChannelFullMode.Wait + }); + + var producerTask = ProduceAsync(url, channel.Writer); + var consumerTask = ConsumeAsync(channel.Reader, batchSize); + await Task.WhenAll(producerTask, consumerTask); + } + + private async Task ProduceAsync(string url, ChannelWriter> writer) + { + try + { + var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead); + response.EnsureSuccessStatusCode(); + + var stream = await response.Content.ReadAsStreamAsync(); + var options = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }; + + await foreach (var item in JsonSerializer.DeserializeAsyncEnumerable(stream, options)) + { + if (item is not null) + { + await writer.WriteAsync(Task.FromResult(item)); + } + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing item: {Message}", ex.Message); + } + finally + { + writer.Complete(); + } + } + + private async Task ConsumeAsync(ChannelReader> reader, int batchSize) + { + var batch = new List>(batchSize); + + await foreach (var task in reader.ReadAllAsync()) + { + batch.Add(task); + + if (batch.Count >= batchSize) + { + await ProcessBatch(batch); + batch.Clear(); + } + } + + if (batch.Count > 0) + { + await ProcessBatch(batch); + } + } + + private async Task ProcessBatch(IEnumerable> batch) + { + await foreach (var result in Task.WhenEach(batch)) + { + try + { + var current = await result; + _logger.LogInformation("Processing item: {Item}", current.Name); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing item: {Message}", ex.Message); + } + } + } +} diff --git a/src/Zilean.Scraper/GlobalUsings.cs b/src/Zilean.Scraper/GlobalUsings.cs index 3136c16..d0a6c9e 100644 --- a/src/Zilean.Scraper/GlobalUsings.cs +++ b/src/Zilean.Scraper/GlobalUsings.cs @@ -8,6 +8,7 @@ global using System.Text; global using System.Text.Json; global using System.Text.RegularExpressions; +global using System.Threading.Channels; global using CsvHelper; global using CsvHelper.Configuration; global using Microsoft.EntityFrameworkCore; @@ -30,4 +31,5 @@ global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; global using Zilean.Shared.Features.Imdb; +global using Zilean.Shared.Features.Scraping; global using Zilean.Shared.Features.Statistics; diff --git a/src/Zilean.Shared/Features/Configuration/TorrentsConfiguration.cs b/src/Zilean.Shared/Features/Configuration/TorrentsConfiguration.cs new file mode 100644 index 0000000..da7b5dd --- /dev/null +++ b/src/Zilean.Shared/Features/Configuration/TorrentsConfiguration.cs @@ -0,0 +1,6 @@ +namespace Zilean.Shared.Features.Configuration; + +public class TorrentsConfiguration +{ + public bool EnableEndpoint { get; set; } = false; +} diff --git a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs index 7ff6c77..a507f1d 100644 --- a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs @@ -12,6 +12,7 @@ public class ZileanConfiguration public TorznabConfiguration Torznab { get; set; } = new(); public DatabaseConfiguration Database { get; set; } = new(); public ProwlarrConfiguration Prowlarr { get; set; } = new(); + public TorrentsConfiguration Torrents { get; set; } = new(); public ImdbConfiguration Imdb { get; set; } = new(); public static void EnsureExists() diff --git a/src/Zilean.Shared/Features/Scraping/StreamedEntry.cs b/src/Zilean.Shared/Features/Scraping/StreamedEntry.cs new file mode 100644 index 0000000..7a870bb --- /dev/null +++ b/src/Zilean.Shared/Features/Scraping/StreamedEntry.cs @@ -0,0 +1,15 @@ +namespace Zilean.Shared.Features.Scraping; + +public class StreamedEntry +{ + [JsonPropertyName("name")] + public required string Name { get; set; } + + [JsonPropertyName("size")] + public required long Size { get; set; } + + [JsonPropertyName("hash")] + public required string InfoHash { get; set; } + + public TorrentInfo? ParseResponse { get; set; } +} From b39b6a36b57df5be350cdf3905492f64a93a3626 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 14:50:00 +0000 Subject: [PATCH 4/8] feat: service discovery and manual config --- .../Features/Torrents/TorrentsEndpoints.cs | 2 +- .../ServiceCollectionExtensions.cs | 14 ++- .../Features/Bootstrapping/ServiceLifetime.cs | 3 - .../Features/Dmm/ZileanScraper.cs | 96 --------------- .../{Dmm => Ingestion}/DmmFileDownloader.cs | 2 +- .../{Dmm => Ingestion}/DmmPageProcessor.cs | 4 +- .../{Dmm => Ingestion}/DmmScraping.cs | 13 +- .../{Dmm => Ingestion}/DmmSyncState.cs | 2 +- .../Ingestion/GenericIngestionProcessor.cs | 115 ++++++++++++++++++ .../Ingestion/GenericIngestionScraping.cs | 67 ++++++++++ .../Ingestion/KubernetesServiceDiscovery.cs | 57 +++++++++ .../Ingestion/TorrentInfoExtensions.cs | 9 ++ src/Zilean.Scraper/GlobalUsings.cs | 4 +- src/Zilean.Scraper/Program.cs | 2 +- src/Zilean.Scraper/Zilean.Scraper.csproj | 1 + .../Configuration/IngestionConfiguration.cs | 12 ++ .../Configuration/KubernetesConfiguration.cs | 10 ++ .../Configuration/ZileanConfiguration.cs | 1 + src/Zilean.Shared/Features/Dmm/DmmRecords.cs | 5 + 19 files changed, 297 insertions(+), 122 deletions(-) delete mode 100644 src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs rename src/Zilean.Scraper/Features/{Dmm => Ingestion}/DmmFileDownloader.cs (98%) rename src/Zilean.Scraper/Features/{Dmm => Ingestion}/DmmPageProcessor.cs (97%) rename src/Zilean.Scraper/Features/{Dmm => Ingestion}/DmmScraping.cs (93%) rename src/Zilean.Scraper/Features/{Dmm => Ingestion}/DmmSyncState.cs (97%) create mode 100644 src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs create mode 100644 src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs create mode 100644 src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs create mode 100644 src/Zilean.Scraper/Features/Ingestion/TorrentInfoExtensions.cs create mode 100644 src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs create mode 100644 src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs diff --git a/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs b/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs index 5b95818..8ac9af0 100644 --- a/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs +++ b/src/Zilean.ApiService/Features/Torrents/TorrentsEndpoints.cs @@ -3,7 +3,7 @@ namespace Zilean.ApiService.Features.Torrents; public static class TorrentsEndpoints { private const string GroupName = "torrents"; - private const string Scrape = "/scrape"; + private const string Scrape = "/all"; public static WebApplication MapTorrentsEndpoints(this WebApplication app, ZileanConfiguration configuration) { diff --git a/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs index 626a242..f5c3338 100644 --- a/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -1,18 +1,15 @@ -using Zilean.Scraper.Features.Dmm; -using Zilean.Scraper.Features.Imdb; -using Zilean.Scraper.Features.PythonSupport; - namespace Zilean.Scraper.Features.Bootstrapping; public static class ServiceCollectionExtensions { - public static void AddDmmScraper(this IServiceCollection services, IConfiguration configuration) + public static void AddScrapers(this IServiceCollection services, IConfiguration configuration) { var zileanConfiguration = configuration.GetZileanConfiguration(); services.AddSingleton(zileanConfiguration); services.AddImdbServices(); services.AddDmmServices(); + services.AddGenericServices(); services.AddZileanDataServices(zileanConfiguration); services.AddSingleton(); services.AddHostedService(); @@ -27,6 +24,13 @@ private static void AddDmmServices(this IServiceCollection services) services.AddTransient(); } + private static void AddGenericServices(this IServiceCollection services) + { + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + } + private static void AddImdbServices(this IServiceCollection services) { services.AddSingleton(); diff --git a/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs index c30bb7f..f24c2f2 100644 --- a/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs +++ b/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs @@ -1,6 +1,3 @@ -using Zilean.Scraper.Features.Dmm; -using Zilean.Scraper.Features.Imdb; - namespace Zilean.Scraper.Features.Bootstrapping; public class ServiceLifetime(ImdbMetadataLoader metadataLoader, DmmScraping dmmScraper, IServiceProvider serviceProvider, ILogger logger, ZileanConfiguration configuration) : IHostedLifecycleService diff --git a/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs b/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs deleted file mode 100644 index cef137f..0000000 --- a/src/Zilean.Scraper/Features/Dmm/ZileanScraper.cs +++ /dev/null @@ -1,96 +0,0 @@ -namespace Zilean.Scraper.Features.Dmm; - -public class ZileanScraper -{ - private readonly HttpClient _httpClient; - private readonly ILogger _logger; - - public ZileanScraper(IHttpClientFactory clientFactory, string endpoint, ILoggerFactory loggerFactory) - { - _httpClient = clientFactory.CreateClient(); - _httpClient.BaseAddress = new Uri(endpoint); - _logger = loggerFactory.CreateLogger(); - } - - public async Task ProcessTorrentsAsync(string url, int batchSize = 1000, int maxChannelCapacity = 5000) - { - var channel = Channel.CreateBounded>(new BoundedChannelOptions(maxChannelCapacity) - { - SingleReader = true, - SingleWriter = false, - FullMode = BoundedChannelFullMode.Wait - }); - - var producerTask = ProduceAsync(url, channel.Writer); - var consumerTask = ConsumeAsync(channel.Reader, batchSize); - await Task.WhenAll(producerTask, consumerTask); - } - - private async Task ProduceAsync(string url, ChannelWriter> writer) - { - try - { - var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead); - response.EnsureSuccessStatusCode(); - - var stream = await response.Content.ReadAsStreamAsync(); - var options = new JsonSerializerOptions - { - PropertyNameCaseInsensitive = true - }; - - await foreach (var item in JsonSerializer.DeserializeAsyncEnumerable(stream, options)) - { - if (item is not null) - { - await writer.WriteAsync(Task.FromResult(item)); - } - } - } - catch (Exception ex) - { - _logger.LogError(ex, "Error processing item: {Message}", ex.Message); - } - finally - { - writer.Complete(); - } - } - - private async Task ConsumeAsync(ChannelReader> reader, int batchSize) - { - var batch = new List>(batchSize); - - await foreach (var task in reader.ReadAllAsync()) - { - batch.Add(task); - - if (batch.Count >= batchSize) - { - await ProcessBatch(batch); - batch.Clear(); - } - } - - if (batch.Count > 0) - { - await ProcessBatch(batch); - } - } - - private async Task ProcessBatch(IEnumerable> batch) - { - await foreach (var result in Task.WhenEach(batch)) - { - try - { - var current = await result; - _logger.LogInformation("Processing item: {Item}", current.Name); - } - catch (Exception ex) - { - _logger.LogError(ex, "Error processing item: {Message}", ex.Message); - } - } - } -} diff --git a/src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs b/src/Zilean.Scraper/Features/Ingestion/DmmFileDownloader.cs similarity index 98% rename from src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs rename to src/Zilean.Scraper/Features/Ingestion/DmmFileDownloader.cs index 5065e6a..9fb9b6b 100644 --- a/src/Zilean.Scraper/Features/Dmm/DmmFileDownloader.cs +++ b/src/Zilean.Scraper/Features/Ingestion/DmmFileDownloader.cs @@ -1,4 +1,4 @@ -namespace Zilean.Scraper.Features.Dmm; +namespace Zilean.Scraper.Features.Ingestion; public class DmmFileDownloader(ILogger logger, ZileanConfiguration configuration) { diff --git a/src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/DmmPageProcessor.cs similarity index 97% rename from src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs rename to src/Zilean.Scraper/Features/Ingestion/DmmPageProcessor.cs index 55c60c3..355cb30 100644 --- a/src/Zilean.Scraper/Features/Dmm/DmmPageProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/DmmPageProcessor.cs @@ -1,6 +1,4 @@ -using Zilean.Scraper.Features.LzString; - -namespace Zilean.Scraper.Features.Dmm; +namespace Zilean.Scraper.Features.Ingestion; public partial class DmmPageProcessor(DmmSyncState state) : IDisposable diff --git a/src/Zilean.Scraper/Features/Dmm/DmmScraping.cs b/src/Zilean.Scraper/Features/Ingestion/DmmScraping.cs similarity index 93% rename from src/Zilean.Scraper/Features/Dmm/DmmScraping.cs rename to src/Zilean.Scraper/Features/Ingestion/DmmScraping.cs index a85c77c..fcb87d4 100644 --- a/src/Zilean.Scraper/Features/Dmm/DmmScraping.cs +++ b/src/Zilean.Scraper/Features/Ingestion/DmmScraping.cs @@ -1,6 +1,4 @@ -using Zilean.Scraper.Features.PythonSupport; - -namespace Zilean.Scraper.Features.Dmm; +namespace Zilean.Scraper.Features.Ingestion; public class DmmScraping( DmmSyncState dmmState, @@ -111,7 +109,7 @@ await AnsiConsole.Progress() var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents); - var finalizedTorrents = parsedTorrents.Where(WipeSomeTissue).ToList(); + var finalizedTorrents = parsedTorrents.Where(torrentInfo => torrentInfo.WipeSomeTissue()).ToList(); await torrentInfoService.StoreTorrentInfo(finalizedTorrents); } @@ -156,7 +154,7 @@ await Parallel.ForEachAsync(files, parallelOptions, async (file, ct) => var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents); - var finalizedTorrents = parsedTorrents.Where(WipeSomeTissue).ToList(); + var finalizedTorrents = parsedTorrents.Where(torrentInfo => torrentInfo.WipeSomeTissue()).ToList(); logger.LogInformation("Parsed {Count} torrents", finalizedTorrents.Count); @@ -191,9 +189,4 @@ private static async IAsyncEnumerable ProcessFileAsync(string yield return torrent; } } - - private static bool WipeSomeTissue(TorrentInfo torrent) => - !((torrent.RawTitle.Contains(" xxx ", StringComparison.OrdinalIgnoreCase) || - torrent.RawTitle.Contains(" xx ", StringComparison.OrdinalIgnoreCase)) && - !torrent.ParsedTitle.Contains("XXX", StringComparison.OrdinalIgnoreCase)); } diff --git a/src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs b/src/Zilean.Scraper/Features/Ingestion/DmmSyncState.cs similarity index 97% rename from src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs rename to src/Zilean.Scraper/Features/Ingestion/DmmSyncState.cs index c7937dc..afd25c3 100644 --- a/src/Zilean.Scraper/Features/Dmm/DmmSyncState.cs +++ b/src/Zilean.Scraper/Features/Ingestion/DmmSyncState.cs @@ -1,4 +1,4 @@ -namespace Zilean.Scraper.Features.Dmm; +namespace Zilean.Scraper.Features.Ingestion; public class DmmSyncState(ILogger logger, DmmService dmmService) { diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs new file mode 100644 index 0000000..f8b9f96 --- /dev/null +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs @@ -0,0 +1,115 @@ +namespace Zilean.Scraper.Features.Ingestion; + +public class GenericIngestionProcessor( + IHttpClientFactory clientFactory, + ILogger logger, + ParseTorrentNameService parseTorrentNameService, + TorrentInfoService torrentInfoService, + ZileanConfiguration configuration) +{ + public async Task ProcessTorrentsAsync(string url, CancellationToken cancellationToken = default) + { + logger.LogInformation("Processing URL: {Url}", url); + + var channel = Channel.CreateBounded>(new BoundedChannelOptions(configuration.Ingestion.MaxChannelSize) + { + SingleReader = true, + SingleWriter = false, + FullMode = BoundedChannelFullMode.Wait + }); + + var producerTask = ProduceAsync(url, channel.Writer, cancellationToken); + var consumerTask = ConsumeAsync(channel.Reader, configuration.Ingestion.BatchSize, cancellationToken); + await Task.WhenAll(producerTask, consumerTask); + } + + private async Task ProduceAsync(string url, ChannelWriter> writer, CancellationToken cancellationToken = default) + { + try + { + var httpClient = clientFactory.CreateClient(); + var response = await httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + response.EnsureSuccessStatusCode(); + + var stream = await response.Content.ReadAsStreamAsync(cancellationToken); + var options = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true + }; + + await foreach (var item in JsonSerializer.DeserializeAsyncEnumerable(stream, options, cancellationToken)) + { + if (item is not null) + { + await writer.WriteAsync(Task.FromResult(item), cancellationToken); + } + } + } + catch (Exception) + { + logger.LogWarning("Error processing item"); + throw; + } + finally + { + writer.Complete(); + } + } + + private async Task ConsumeAsync(ChannelReader> reader, int batchSize, CancellationToken cancellationToken = default) + { + var batch = new List>(batchSize); + + await foreach (var task in reader.ReadAllAsync(cancellationToken)) + { + batch.Add(task); + + if (batch.Count < batchSize) + { + continue; + } + + await ProcessBatch(batch, cancellationToken); + batch.Clear(); + } + + if (batch.Count > 0) + { + await ProcessBatch(batch, cancellationToken); + } + } + + private async Task ProcessBatch(List> batch, CancellationToken cancellationToken = default) + { + try + { + var torrents = new List(); + + await foreach (var result in Task.WhenEach(batch).WithCancellation(cancellationToken)) + { + var current = await result; + torrents.Add(ExtractedDmmEntry.FromStreamedEntry(current)); + } + + if (torrents.Count == 0 || cancellationToken.IsCancellationRequested) + { + return; + } + + logger.LogInformation("Processing batch of {Count} torrents", torrents.Count); + + if (torrents.Count != 0) + { + var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(torrents); + var finalizedTorrents = parsedTorrents.Where(torrentInfo => torrentInfo.WipeSomeTissue()).ToList(); + logger.LogInformation("Parsed {Count} torrents", finalizedTorrents.Count); + await torrentInfoService.StoreTorrentInfo(finalizedTorrents); + } + } + catch (Exception) + { + logger.LogWarning("Error processing batch of torrents. Batch size: {BatchSize}", batch.Count); + throw; + } + } +} diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs new file mode 100644 index 0000000..99b4a52 --- /dev/null +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs @@ -0,0 +1,67 @@ +namespace Zilean.Scraper.Features.Ingestion; + +public class GenericIngestionScraping( + ZileanConfiguration configuration, + GenericIngestionProcessor ingestionProcessor, + ILogger logger, + KubernetesServiceDiscovery kubernetesServiceDiscovery) +{ + public async Task Execute(CancellationToken cancellationToken) + { + logger.LogInformation("Starting ingestion scraping"); + + List urlsToProcess = []; + + if (configuration.Ingestion.Kubernetes.EnableServiceDiscovery) + { + logger.LogInformation("Discovering URLs from Kubernetes services"); + var urls = await kubernetesServiceDiscovery.DiscoverUrlsAsync(cancellationToken); + logger.LogInformation("Discovered {Count} URLs from Kubernetes services", urls.Count); + urlsToProcess.AddRange(urls); + } + + if (configuration.Ingestion.EnableZurgIngestion) + { + logger.LogInformation("Adding Zurg instances to the list of URLs to process"); + urlsToProcess.AddRange(configuration.Ingestion.ZurgInstances); + } + + if (configuration.Ingestion.EnableZileanIngestion) + { + logger.LogInformation("Adding Zilean instances to the list of URLs to process"); + urlsToProcess.AddRange(configuration.Ingestion.ZileanInstances); + } + + if (urlsToProcess.Count == 0) + { + logger.LogInformation("No URLs to process, exiting"); + return 0; + } + + var completedCount = 0; + + foreach (var url in urlsToProcess) + { + cancellationToken.ThrowIfCancellationRequested(); + + try + { + await ingestionProcessor.ProcessTorrentsAsync(url, cancellationToken); + completedCount++; + } + catch (OperationCanceledException) + { + logger.LogInformation("Ingestion scraping cancelled"); + break; + } + catch (Exception ex) + { + logger.LogError(ex, "Error processing URL: {Url}", url); + } + } + + logger.LogInformation("Ingestion scraping completed for {Count} URLs", completedCount); + + return 0; + } +} diff --git a/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs new file mode 100644 index 0000000..d83021d --- /dev/null +++ b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs @@ -0,0 +1,57 @@ +namespace Zilean.Scraper.Features.Ingestion; + +public class KubernetesServiceDiscovery( + ILogger logger, + ZileanConfiguration configuration) +{ + public async Task> DiscoverUrlsAsync(CancellationToken cancellationToken = default) + { + var urls = new List(); + + try + { + var clientConfig = + KubernetesClientConfiguration.BuildConfigFromConfigFile(configuration.Ingestion.Kubernetes.KubeConfigFile); + var kubernetesClient = new Kubernetes(clientConfig); + + var services = await kubernetesClient.CoreV1.ListServiceForAllNamespacesAsync( + labelSelector: configuration.Ingestion.Kubernetes.LabelSelector, + cancellationToken: cancellationToken); + + foreach (var service in services.Items) + { + try + { + var url = BuildUrlFromService(service); + if (!string.IsNullOrEmpty(url)) + { + urls.Add(url); + logger.LogInformation("Discovered service URL: {Url}", url); + } + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to build URL for service {ServiceName} in namespace {Namespace}", + service.Metadata.Name, service.Metadata.NamespaceProperty); + } + } + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to list services with label selector {LabelSelector}", configuration.Ingestion.Kubernetes.LabelSelector); + } + + return urls; + } + + private string BuildUrlFromService(V1Service service) + { + if (service.Metadata?.NamespaceProperty == null) + { + throw new InvalidOperationException("Service metadata or namespace is missing."); + } + + var namespaceName = service.Metadata.NamespaceProperty; + return string.Format(configuration.Ingestion.Kubernetes.ZurgUrlTemplate, namespaceName); + } +} diff --git a/src/Zilean.Scraper/Features/Ingestion/TorrentInfoExtensions.cs b/src/Zilean.Scraper/Features/Ingestion/TorrentInfoExtensions.cs new file mode 100644 index 0000000..370448e --- /dev/null +++ b/src/Zilean.Scraper/Features/Ingestion/TorrentInfoExtensions.cs @@ -0,0 +1,9 @@ +namespace Zilean.Scraper.Features.Ingestion; + +public static class TorrentInfoExtensions +{ + public static bool WipeSomeTissue(this TorrentInfo torrent) => + !((torrent.RawTitle.Contains(" xxx ", StringComparison.OrdinalIgnoreCase) || + torrent.RawTitle.Contains(" xx ", StringComparison.OrdinalIgnoreCase)) && + !torrent.ParsedTitle.Contains("XXX", StringComparison.OrdinalIgnoreCase)); +} diff --git a/src/Zilean.Scraper/GlobalUsings.cs b/src/Zilean.Scraper/GlobalUsings.cs index d0a6c9e..8450040 100644 --- a/src/Zilean.Scraper/GlobalUsings.cs +++ b/src/Zilean.Scraper/GlobalUsings.cs @@ -11,6 +11,8 @@ global using System.Threading.Channels; global using CsvHelper; global using CsvHelper.Configuration; +global using k8s; +global using k8s.Models; global using Microsoft.EntityFrameworkCore; global using Microsoft.Extensions.Configuration; global using Microsoft.Extensions.DependencyInjection; @@ -23,7 +25,7 @@ global using Zilean.Database.Bootstrapping; global using Zilean.Database.Services; global using Zilean.Scraper.Features.Bootstrapping; -global using Zilean.Scraper.Features.Dmm; +global using Zilean.Scraper.Features.Ingestion; global using Zilean.Scraper.Features.Imdb; global using Zilean.Scraper.Features.LzString; global using Zilean.Scraper.Features.PythonSupport; diff --git a/src/Zilean.Scraper/Program.cs b/src/Zilean.Scraper/Program.cs index 75af6f8..b01823f 100644 --- a/src/Zilean.Scraper/Program.cs +++ b/src/Zilean.Scraper/Program.cs @@ -4,7 +4,7 @@ builder.AddOtlpServiceDefaults(); -builder.Services.AddDmmScraper(builder.Configuration); +builder.Services.AddScrapers(builder.Configuration); var scraper = builder.Build(); diff --git a/src/Zilean.Scraper/Zilean.Scraper.csproj b/src/Zilean.Scraper/Zilean.Scraper.csproj index feef314..0654320 100644 --- a/src/Zilean.Scraper/Zilean.Scraper.csproj +++ b/src/Zilean.Scraper/Zilean.Scraper.csproj @@ -21,6 +21,7 @@ + diff --git a/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs new file mode 100644 index 0000000..eaeb452 --- /dev/null +++ b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs @@ -0,0 +1,12 @@ +namespace Zilean.Shared.Features.Configuration; + +public class IngestionConfiguration +{ + public List ZurgInstances { get; set; } = []; + public List ZileanInstances { get; set; } = []; + public bool EnableZurgIngestion { get; set; } = false; + public bool EnableZileanIngestion { get; set; } = false; + public KubernetesConfiguration Kubernetes { get; set; } = new(); + public int BatchSize { get; set; } = 1000; + public int MaxChannelSize { get; set; } = 5000; +} diff --git a/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs new file mode 100644 index 0000000..27810af --- /dev/null +++ b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs @@ -0,0 +1,10 @@ +namespace Zilean.Shared.Features.Configuration; + +public class KubernetesConfiguration +{ + public bool EnableServiceDiscovery { get; set; } = false; + public string ZurgUrlTemplate { get; set; } = "http://zurg.{0}:9999/debug/torrents"; + public string LabelSelector { get; set; } = "app.elfhosted.com/name=zurg"; + public string KubeConfigFile { get; set; } = "/$HOME/.kube/config"; + public bool IsConfigFile { get; set; } = false; +} diff --git a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs index a507f1d..c0ba023 100644 --- a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs @@ -14,6 +14,7 @@ public class ZileanConfiguration public ProwlarrConfiguration Prowlarr { get; set; } = new(); public TorrentsConfiguration Torrents { get; set; } = new(); public ImdbConfiguration Imdb { get; set; } = new(); + public IngestionConfiguration Ingestion { get; set; } = new(); public static void EnsureExists() { diff --git a/src/Zilean.Shared/Features/Dmm/DmmRecords.cs b/src/Zilean.Shared/Features/Dmm/DmmRecords.cs index 4e03e1f..80bbd63 100644 --- a/src/Zilean.Shared/Features/Dmm/DmmRecords.cs +++ b/src/Zilean.Shared/Features/Dmm/DmmRecords.cs @@ -1,3 +1,5 @@ +using Zilean.Shared.Features.Scraping; + namespace Zilean.Shared.Features.Dmm; public class ExtractedDmmEntry(string? infoHash, string? filename, long filesize, TorrentInfo? parseResponse) @@ -6,6 +8,9 @@ public class ExtractedDmmEntry(string? infoHash, string? filename, long filesize public string? InfoHash { get; set; } = infoHash; public long Filesize { get; set; } = filesize; public TorrentInfo? ParseResponse { get; set; } = parseResponse; + + public static ExtractedDmmEntry FromStreamedEntry(StreamedEntry streamedEntry) => + new(streamedEntry.InfoHash, streamedEntry.Name, streamedEntry.Size, null); } public class ExtractedDmmEntryResponse(TorrentInfo torrentInfo) From 6402511c44d9fe00f53785e597956c182a77221b Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 15:52:10 +0000 Subject: [PATCH 5/8] feat: Implement cli to allow choice of sync process to run --- Directory.Packages.props | 1 + Zilean.sln.DotSettings.user | 3 ++ .../Features/Bootstrapping/EnsureMigrated.cs | 24 ++++++++++ .../Bootstrapping/HostingExtensions.cs | 47 +++++++++++++++++++ .../ServiceCollectionExtensions.cs | 3 +- .../Features/Bootstrapping/ServiceLifetime.cs | 45 ------------------ .../Features/Bootstrapping/TypeRegistrar.cs | 12 +++++ .../Features/Bootstrapping/TypeResolver.cs | 7 +++ .../Features/Commands/DefaultCommand.cs | 14 ++++++ .../Features/Commands/DmmSyncCommand.cs | 7 +++ .../Features/Commands/GenericSyncCommand.cs | 7 +++ .../Ingestion/GenericIngestionProcessor.cs | 2 +- src/Zilean.Scraper/GlobalUsings.cs | 4 ++ src/Zilean.Scraper/Program.cs | 33 ++++++++++--- .../Properties/launchSettings.json | 21 +++++++++ src/Zilean.Scraper/Zilean.Scraper.csproj | 1 + 16 files changed, 178 insertions(+), 53 deletions(-) create mode 100644 src/Zilean.Scraper/Features/Bootstrapping/EnsureMigrated.cs create mode 100644 src/Zilean.Scraper/Features/Bootstrapping/HostingExtensions.cs delete mode 100644 src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs create mode 100644 src/Zilean.Scraper/Features/Bootstrapping/TypeRegistrar.cs create mode 100644 src/Zilean.Scraper/Features/Bootstrapping/TypeResolver.cs create mode 100644 src/Zilean.Scraper/Features/Commands/DefaultCommand.cs create mode 100644 src/Zilean.Scraper/Features/Commands/DmmSyncCommand.cs create mode 100644 src/Zilean.Scraper/Features/Commands/GenericSyncCommand.cs create mode 100644 src/Zilean.Scraper/Properties/launchSettings.json diff --git a/Directory.Packages.props b/Directory.Packages.props index eb71a6a..67a9429 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -25,6 +25,7 @@ + diff --git a/Zilean.sln.DotSettings.user b/Zilean.sln.DotSettings.user index 9b2cb1e..14ae35e 100644 --- a/Zilean.sln.DotSettings.user +++ b/Zilean.sln.DotSettings.user @@ -1,11 +1,14 @@  ForceIncluded + ForceIncluded ForceIncluded ForceIncluded + ForceIncluded ForceIncluded ForceIncluded ForceIncluded ForceIncluded + ForceIncluded diff --git a/src/Zilean.Scraper/Features/Bootstrapping/EnsureMigrated.cs b/src/Zilean.Scraper/Features/Bootstrapping/EnsureMigrated.cs new file mode 100644 index 0000000..fefe700 --- /dev/null +++ b/src/Zilean.Scraper/Features/Bootstrapping/EnsureMigrated.cs @@ -0,0 +1,24 @@ +namespace Zilean.Scraper.Features.Bootstrapping; + +public class EnsureMigrated(ImdbMetadataLoader metadataLoader, ILogger logger, ZileanDbContext dbContext, ZileanConfiguration configuration) : IHostedService +{ + public async Task StartAsync(CancellationToken cancellationToken) + { + logger.LogInformation("Applying Migrations..."); + await dbContext.Database.MigrateAsync(cancellationToken: cancellationToken); + logger.LogInformation("Migrations Applied."); + + if (configuration.Imdb.EnableImportMatching) + { + var imdbLoadedResult = await metadataLoader.Execute(cancellationToken); + + if (imdbLoadedResult == 1) + { + Environment.ExitCode = 1; + Process.GetCurrentProcess().Kill(); + } + } + } + + public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; +} diff --git a/src/Zilean.Scraper/Features/Bootstrapping/HostingExtensions.cs b/src/Zilean.Scraper/Features/Bootstrapping/HostingExtensions.cs new file mode 100644 index 0000000..490dbb0 --- /dev/null +++ b/src/Zilean.Scraper/Features/Bootstrapping/HostingExtensions.cs @@ -0,0 +1,47 @@ +namespace Zilean.Scraper.Features.Bootstrapping; + +public static class HostingExtensions +{ + public static IServiceCollection AddCommandLine( + this IServiceCollection services, + Action configurator) + { + var app = new CommandApp(new TypeRegistrar(services)); + app.Configure(configurator); + services.AddSingleton(app); + + return services; + } + + public static IServiceCollection AddCommandLine( + this IServiceCollection services, + Action configurator) + where TDefaultCommand : class, ICommand + { + var app = new CommandApp(new TypeRegistrar(services)); + app.Configure(configurator); + services.AddSingleton(app); + + return services; + } + + public static async Task RunAsync(this IHost host, string[] args) + { + ArgumentNullException.ThrowIfNull(host); + + await host.StartAsync(); + + try + { + var app = host.Services.GetService() ?? + throw new InvalidOperationException("Command application has not been configured."); + + return await app.RunAsync(args); + } + finally + { + await host.StopAsync(); + await ((IAsyncDisposable)host).DisposeAsync(); + } + } +} diff --git a/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs index f5c3338..12c3ec1 100644 --- a/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.Scraper/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -6,13 +6,14 @@ public static void AddScrapers(this IServiceCollection services, IConfiguration { var zileanConfiguration = configuration.GetZileanConfiguration(); + services.AddHttpClient(); services.AddSingleton(zileanConfiguration); services.AddImdbServices(); services.AddDmmServices(); services.AddGenericServices(); services.AddZileanDataServices(zileanConfiguration); services.AddSingleton(); - services.AddHostedService(); + services.AddHostedService(); } private static void AddDmmServices(this IServiceCollection services) diff --git a/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs b/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs deleted file mode 100644 index f24c2f2..0000000 --- a/src/Zilean.Scraper/Features/Bootstrapping/ServiceLifetime.cs +++ /dev/null @@ -1,45 +0,0 @@ -namespace Zilean.Scraper.Features.Bootstrapping; - -public class ServiceLifetime(ImdbMetadataLoader metadataLoader, DmmScraping dmmScraper, IServiceProvider serviceProvider, ILogger logger, ZileanConfiguration configuration) : IHostedLifecycleService -{ - public Task StartAsync(CancellationToken cancellationToken) => Task.CompletedTask; - - public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; - - public async Task StartingAsync(CancellationToken cancellationToken) - { -#if DEBUG - logger.LogInformation("Applying Migrations..."); - await using var asyncScope = serviceProvider.CreateAsyncScope(); - var dbContext = asyncScope.ServiceProvider.GetRequiredService(); - await dbContext.Database.MigrateAsync(cancellationToken); - logger.LogInformation("Migrations Applied."); -#else - await Task.Delay(1); -#endif - } - - public Task StoppedAsync(CancellationToken cancellationToken) => Task.CompletedTask; - - public Task StoppingAsync(CancellationToken cancellationToken) => Task.CompletedTask; - - public async Task StartedAsync(CancellationToken cancellationToken) - { - if (configuration.Imdb.EnableImportMatching) - { - var imdbLoadedResult = await metadataLoader.Execute(cancellationToken); - - if (imdbLoadedResult == 1) - { - Environment.ExitCode = 1; - Process.GetCurrentProcess().Kill(); - return; - } - } - - var dmmScrapedResult = await dmmScraper.Execute(cancellationToken); - - Environment.ExitCode = dmmScrapedResult; - Process.GetCurrentProcess().Kill(); - } -} diff --git a/src/Zilean.Scraper/Features/Bootstrapping/TypeRegistrar.cs b/src/Zilean.Scraper/Features/Bootstrapping/TypeRegistrar.cs new file mode 100644 index 0000000..31acc97 --- /dev/null +++ b/src/Zilean.Scraper/Features/Bootstrapping/TypeRegistrar.cs @@ -0,0 +1,12 @@ +namespace Zilean.Scraper.Features.Bootstrapping; + +internal sealed class TypeRegistrar(IServiceCollection provider) : ITypeRegistrar +{ + public ITypeResolver Build() => new TypeResolver(provider.BuildServiceProvider()); + + public void Register(Type service, Type implementation) => provider.AddSingleton(service, implementation); + + public void RegisterInstance(Type service, object implementation) => provider.AddSingleton(service, implementation); + + public void RegisterLazy(Type service, Func factory) => provider.AddSingleton(service, _ => factory()); +} diff --git a/src/Zilean.Scraper/Features/Bootstrapping/TypeResolver.cs b/src/Zilean.Scraper/Features/Bootstrapping/TypeResolver.cs new file mode 100644 index 0000000..67974ea --- /dev/null +++ b/src/Zilean.Scraper/Features/Bootstrapping/TypeResolver.cs @@ -0,0 +1,7 @@ +namespace Zilean.Scraper.Features.Bootstrapping; + +internal sealed class TypeResolver(IServiceProvider provider) : ITypeResolver +{ + public object? Resolve(Type? type) => + type == null ? null : provider.GetService(type); +} diff --git a/src/Zilean.Scraper/Features/Commands/DefaultCommand.cs b/src/Zilean.Scraper/Features/Commands/DefaultCommand.cs new file mode 100644 index 0000000..82537f9 --- /dev/null +++ b/src/Zilean.Scraper/Features/Commands/DefaultCommand.cs @@ -0,0 +1,14 @@ +namespace Zilean.Scraper.Features.Commands; + +public sealed class DefaultCommand(ILogger logger) : Command +{ + public sealed class Settings : CommandSettings + { + } + + public override int Execute(CommandContext context, Settings settings) + { + logger.LogInformation("Zilean Scraper: Execution Completed"); + return 0; + } +} diff --git a/src/Zilean.Scraper/Features/Commands/DmmSyncCommand.cs b/src/Zilean.Scraper/Features/Commands/DmmSyncCommand.cs new file mode 100644 index 0000000..09c0686 --- /dev/null +++ b/src/Zilean.Scraper/Features/Commands/DmmSyncCommand.cs @@ -0,0 +1,7 @@ +namespace Zilean.Scraper.Features.Commands; + +public class DmmSyncCommand(DmmScraping dmmScraping) : Command +{ + public override int Execute(CommandContext context) => + dmmScraping.Execute(CancellationToken.None).GetAwaiter().GetResult(); +} diff --git a/src/Zilean.Scraper/Features/Commands/GenericSyncCommand.cs b/src/Zilean.Scraper/Features/Commands/GenericSyncCommand.cs new file mode 100644 index 0000000..61368ad --- /dev/null +++ b/src/Zilean.Scraper/Features/Commands/GenericSyncCommand.cs @@ -0,0 +1,7 @@ +namespace Zilean.Scraper.Features.Commands; + +public class GenericSyncCommand(GenericIngestionScraping genericIngestion) : Command +{ + public override int Execute(CommandContext context) => + genericIngestion.Execute(CancellationToken.None).GetAwaiter().GetResult(); +} diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs index f8b9f96..443cbf8 100644 --- a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs @@ -4,7 +4,7 @@ public class GenericIngestionProcessor( IHttpClientFactory clientFactory, ILogger logger, ParseTorrentNameService parseTorrentNameService, - TorrentInfoService torrentInfoService, + ITorrentInfoService torrentInfoService, ZileanConfiguration configuration) { public async Task ProcessTorrentsAsync(string url, CancellationToken cancellationToken = default) diff --git a/src/Zilean.Scraper/GlobalUsings.cs b/src/Zilean.Scraper/GlobalUsings.cs index 8450040..80e83af 100644 --- a/src/Zilean.Scraper/GlobalUsings.cs +++ b/src/Zilean.Scraper/GlobalUsings.cs @@ -19,12 +19,16 @@ global using Microsoft.Extensions.Hosting; global using Microsoft.Extensions.Logging; global using Python.Runtime; +global using Serilog; +global using Serilog.Sinks.Spectre; global using SimCube.Aspire.Features.Otlp; global using Spectre.Console; +global using Spectre.Console.Cli; global using Zilean.Database; global using Zilean.Database.Bootstrapping; global using Zilean.Database.Services; global using Zilean.Scraper.Features.Bootstrapping; +global using Zilean.Scraper.Features.Commands; global using Zilean.Scraper.Features.Ingestion; global using Zilean.Scraper.Features.Imdb; global using Zilean.Scraper.Features.LzString; diff --git a/src/Zilean.Scraper/Program.cs b/src/Zilean.Scraper/Program.cs index b01823f..d254a7f 100644 --- a/src/Zilean.Scraper/Program.cs +++ b/src/Zilean.Scraper/Program.cs @@ -1,11 +1,32 @@ -var builder = Host.CreateApplicationBuilder(args); +var builder = Host.CreateDefaultBuilder(); -builder.Configuration.AddConfigurationFiles(); +builder.ConfigureAppConfiguration(configuration => +{ + configuration.AddConfigurationFiles(); +}); -builder.AddOtlpServiceDefaults(); +builder.ConfigureLogging((context, logging) => +{ + logging.ClearProviders(); + var loggingConfiguration = context.Configuration.GetLoggerConfiguration(); + Log.Logger = loggingConfiguration.CreateLogger(); + logging.AddSerilog(); +}); -builder.Services.AddScrapers(builder.Configuration); +builder.ConfigureServices((context, services) => +{ + services.AddScrapers(context.Configuration); + services.AddCommandLine(config => + { + config.SetApplicationName("zilean-scraper"); -var scraper = builder.Build(); + config.AddCommand("dmm-sync") + .WithDescription("Sync DMM Hashlists from Github."); -await scraper.RunAsync(); + config.AddCommand("generic-sync") + .WithDescription("Sync data from Zurg and Zilean instances."); + }); +}); + +var host = builder.Build(); +return await host.RunAsync(args); diff --git a/src/Zilean.Scraper/Properties/launchSettings.json b/src/Zilean.Scraper/Properties/launchSettings.json new file mode 100644 index 0000000..72e9a91 --- /dev/null +++ b/src/Zilean.Scraper/Properties/launchSettings.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json.schemastore.org/launchsettings.json", + "profiles": { + "Zilean.Scraper: DMM Sync": { + "commandName": "Project", + "environmentVariables": { + "ZILEAN_PYTHON_VENV": "C:\\Python311", + "Zilean__Database__ConnectionString": "Host=localhost;Database=zilean;Username=postgres;Password=postgres;Include Error Detail=true;Timeout=300;CommandTimeout=300;" + }, + "commandLineArgs": "dmm-sync" + }, + "Zilean.Scraper: Generic Sync": { + "commandName": "Project", + "environmentVariables": { + "ZILEAN_PYTHON_VENV": "C:\\Python311", + "Zilean__Database__ConnectionString": "Host=localhost;Database=zilean;Username=postgres;Password=postgres;Include Error Detail=true;Timeout=300;CommandTimeout=300;" + }, + "commandLineArgs": "generic-sync" + } + } +} diff --git a/src/Zilean.Scraper/Zilean.Scraper.csproj b/src/Zilean.Scraper/Zilean.Scraper.csproj index 0654320..ecb6ccb 100644 --- a/src/Zilean.Scraper/Zilean.Scraper.csproj +++ b/src/Zilean.Scraper/Zilean.Scraper.csproj @@ -22,6 +22,7 @@ + From dceb6cba9d205eb82ed8a680e1eac3d5e73159a5 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 16:14:00 +0000 Subject: [PATCH 6/8] refactor: wire up secondary service, with separate schedule Ensure they share the same overlap identifier, so they are staggered. --- Zilean.sln.DotSettings.user | 3 ++ .../ServiceCollectionExtensions.cs | 18 ++++++++---- ...rapIndexesService.cs => StartupService.cs} | 6 ++-- .../Features/Search/SearchEndpoints.cs | 10 +++---- .../Sync/{SyncJob.cs => DmmSyncJob.cs} | 11 ++++++-- .../Features/Sync/GenericSyncJob.cs | 28 +++++++++++++++++++ src/Zilean.ApiService/Program.cs | 2 +- .../Properties/launchSettings.json | 4 ++- .../Ingestion/GenericIngestionScraping.cs | 4 +-- .../Configuration/IngestionConfiguration.cs | 5 ++-- 10 files changed, 69 insertions(+), 22 deletions(-) rename src/Zilean.ApiService/Features/Bootstrapping/{BootstrapIndexesService.cs => StartupService.cs} (87%) rename src/Zilean.ApiService/Features/Sync/{SyncJob.cs => DmmSyncJob.cs} (51%) create mode 100644 src/Zilean.ApiService/Features/Sync/GenericSyncJob.cs diff --git a/Zilean.sln.DotSettings.user b/Zilean.sln.DotSettings.user index 14ae35e..ca94492 100644 --- a/Zilean.sln.DotSettings.user +++ b/Zilean.sln.DotSettings.user @@ -1,13 +1,16 @@  ForceIncluded ForceIncluded + ForceIncluded ForceIncluded ForceIncluded ForceIncluded ForceIncluded ForceIncluded + ForceIncluded ForceIncluded ForceIncluded + ForceIncluded ForceIncluded diff --git a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs index 0a8eee4..c764399 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/ServiceCollectionExtensions.cs @@ -9,14 +9,15 @@ public static IServiceCollection AddSwaggerSupport(this IServiceCollection servi public static IServiceCollection AddSchedulingSupport(this IServiceCollection services) => services.AddScheduler(); - public static IServiceCollection AddDataBootStrapping(this IServiceCollection services) => - services.AddHostedService(); + public static IServiceCollection AddStartupHostedService(this IServiceCollection services) => + services.AddHostedService(); public static IServiceCollection ConditionallyRegisterDmmJob(this IServiceCollection services, ZileanConfiguration configuration) { if (configuration.Dmm.EnableScraping) { - services.AddTransient(); + services.AddTransient(); + services.AddTransient(); services.AddSingleton(); } @@ -29,9 +30,16 @@ public static IServiceProvider SetupScheduling(this IServiceProvider provider, Z { if (configuration.Dmm.EnableScraping) { - scheduler.Schedule() + scheduler.Schedule() .Cron(configuration.Dmm.ScrapeSchedule) - .PreventOverlapping(nameof(SyncJob)); + .PreventOverlapping("SyncJobs"); + } + + if (configuration.Ingestion.EnableScraping) + { + scheduler.Schedule() + .Cron(configuration.Ingestion.ScrapeSchedule) + .PreventOverlapping("SyncJobs"); } }) .LogScheduledTaskProgress(); diff --git a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs similarity index 87% rename from src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs rename to src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs index 0c0956e..8e54433 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/BootstrapIndexesService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs @@ -1,6 +1,6 @@ namespace Zilean.ApiService.Features.Bootstrapping; -public class BootstrapIndexesService( +public class StartupService( ZileanConfiguration configuration, IShellExecutionService executionService, IServiceProvider serviceProvider, @@ -12,7 +12,7 @@ public class BootstrapIndexesService( public async Task StartingAsync(CancellationToken cancellationToken) { - var logger = loggerFactory.CreateLogger(); + var logger = loggerFactory.CreateLogger(); logger.LogInformation("Applying Migrations..."); await using var asyncScope = serviceProvider.CreateAsyncScope(); var dbContext = asyncScope.ServiceProvider.GetRequiredService(); @@ -30,7 +30,7 @@ public async Task StartedAsync(CancellationToken cancellationToken) { await using var asyncScope = serviceProvider.CreateAsyncScope(); var dbContext = asyncScope.ServiceProvider.GetRequiredService(); - var dmmJob = new SyncJob(executionService, loggerFactory.CreateLogger(), dbContext); + var dmmJob = new DmmSyncJob(executionService, loggerFactory.CreateLogger(), dbContext); var shouldRun = await dmmJob.ShouldRunOnStartup(); if (shouldRun) { diff --git a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs index 2086de1..18ab311 100644 --- a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs +++ b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs @@ -34,7 +34,7 @@ private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) return group; } - private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, SyncOnDemandState state, ZileanDbContext dbContext) + private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, IShellExecutionService executionService, ILogger syncLogger, IMutex mutex, SyncOnDemandState state, ZileanDbContext dbContext) { if (state.IsRunning) { @@ -42,9 +42,9 @@ private static async Task PerformOnDemandScrape(HttpContext context, ILogger logger, ZileanDbContext dbContext) : IInvocable, ICancellableInvocable +public class DmmSyncJob(IShellExecutionService shellExecutionService, ILogger logger, ZileanDbContext dbContext) : IInvocable, ICancellableInvocable { public CancellationToken CancellationToken { get; set; } + private const string DmmSyncArg = "dmm-sync"; public async Task Invoke() { - logger.LogInformation("SyncJob started"); + logger.LogInformation("Dmm SyncJob started"); + + var argumentBuilder = ArgumentsBuilder.Create(); + argumentBuilder.AppendArgument(DmmSyncArg, string.Empty, false, false); await shellExecutionService.ExecuteCommand(new ShellCommandOptions { Command = Path.Combine(AppContext.BaseDirectory, "scraper"), + ArgumentsBuilder = argumentBuilder, ShowOutput = true, CancellationToken = CancellationToken }); - logger.LogInformation("SyncJob completed"); + logger.LogInformation("Dmm SyncJob completed"); } // ReSharper disable once MethodSupportsCancellation diff --git a/src/Zilean.ApiService/Features/Sync/GenericSyncJob.cs b/src/Zilean.ApiService/Features/Sync/GenericSyncJob.cs new file mode 100644 index 0000000..3bd4be2 --- /dev/null +++ b/src/Zilean.ApiService/Features/Sync/GenericSyncJob.cs @@ -0,0 +1,28 @@ +namespace Zilean.ApiService.Features.Sync; + +public class GenericSyncJob(IShellExecutionService shellExecutionService, ILogger logger, ZileanDbContext dbContext) : IInvocable, ICancellableInvocable +{ + public CancellationToken CancellationToken { get; set; } + private const string GenericSyncArg = "generic-sync"; + + public async Task Invoke() + { + logger.LogInformation("Generic SyncJob started"); + + var argumentBuilder = ArgumentsBuilder.Create(); + argumentBuilder.AppendArgument(GenericSyncArg, string.Empty, false, false); + + await shellExecutionService.ExecuteCommand(new ShellCommandOptions + { + Command = Path.Combine(AppContext.BaseDirectory, "scraper"), + ArgumentsBuilder = argumentBuilder, + ShowOutput = true, + CancellationToken = CancellationToken + }); + + logger.LogInformation("Generic SyncJob completed"); + } + + // ReSharper disable once MethodSupportsCancellation + public Task ShouldRunOnStartup() => dbContext.ParsedPages.AnyAsync(); +} diff --git a/src/Zilean.ApiService/Program.cs b/src/Zilean.ApiService/Program.cs index b427632..caea6ac 100644 --- a/src/Zilean.ApiService/Program.cs +++ b/src/Zilean.ApiService/Program.cs @@ -15,7 +15,7 @@ .AddShellExecutionService() .ConditionallyRegisterDmmJob(zileanConfiguration) .AddZileanDataServices(zileanConfiguration) - .AddDataBootStrapping(); + .AddStartupHostedService(); var app = builder.Build(); diff --git a/src/Zilean.ApiService/Properties/launchSettings.json b/src/Zilean.ApiService/Properties/launchSettings.json index 169b92c..e13dac4 100644 --- a/src/Zilean.ApiService/Properties/launchSettings.json +++ b/src/Zilean.ApiService/Properties/launchSettings.json @@ -6,7 +6,9 @@ "environmentVariables": { "ASPNETCORE_ENVIRONMENT": "Development", "ASPNETCORE_URLS": "http://+:8181", - "Zilean__Torrents__EnableEndpoint": "true" + "Zilean__Torrents__EnableEndpoint": "true", + "Zilean__Ingestion__EnableScraping": "true", + "Zilean__Dmm__EnableScraping": "true" } } } diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs index 99b4a52..17efb04 100644 --- a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs @@ -20,13 +20,13 @@ public async Task Execute(CancellationToken cancellationToken) urlsToProcess.AddRange(urls); } - if (configuration.Ingestion.EnableZurgIngestion) + if (configuration.Ingestion.ZurgInstances.Count > 0) { logger.LogInformation("Adding Zurg instances to the list of URLs to process"); urlsToProcess.AddRange(configuration.Ingestion.ZurgInstances); } - if (configuration.Ingestion.EnableZileanIngestion) + if (configuration.Ingestion.ZileanInstances.Count > 0) { logger.LogInformation("Adding Zilean instances to the list of URLs to process"); urlsToProcess.AddRange(configuration.Ingestion.ZileanInstances); diff --git a/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs index eaeb452..a1204b8 100644 --- a/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs @@ -4,9 +4,10 @@ public class IngestionConfiguration { public List ZurgInstances { get; set; } = []; public List ZileanInstances { get; set; } = []; - public bool EnableZurgIngestion { get; set; } = false; - public bool EnableZileanIngestion { get; set; } = false; + public bool EnableScraping { get; set; } = false; public KubernetesConfiguration Kubernetes { get; set; } = new(); public int BatchSize { get; set; } = 1000; public int MaxChannelSize { get; set; } = 5000; + + public string ScrapeSchedule { get; set; } = "0 * * * *"; } From 3bce11ec8bedd34eae66dc6c8f381485a0fcffc5 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 17:20:39 +0000 Subject: [PATCH 7/8] refactor: collection of selectors in case they dont all match Also implements the generic endpoint type in the discovery service. --- Zilean.sln.DotSettings.user | 1 + .../Services/ITorrentInfoService.cs | 1 + .../Services/TorrentInfoService.cs | 13 ++++ .../Ingestion/GenericIngestionProcessor.cs | 41 ++++++++++-- .../Ingestion/GenericIngestionScraping.cs | 64 +++++++++++++------ .../Ingestion/KubernetesServiceDiscovery.cs | 39 +++++++---- .../Configuration/IngestionConfiguration.cs | 5 +- .../Configuration/KubernetesConfiguration.cs | 3 +- .../Configuration/KubernetesSelector.cs | 8 +++ src/Zilean.Shared/Features/Dmm/DmmRecords.cs | 2 - .../Features/Scraping/GenericEndpoint.cs | 7 ++ .../Features/Scraping/GenericEndpointType.cs | 7 ++ src/Zilean.Shared/GlobalUsings.cs | 1 + 13 files changed, 147 insertions(+), 45 deletions(-) create mode 100644 src/Zilean.Shared/Features/Configuration/KubernetesSelector.cs create mode 100644 src/Zilean.Shared/Features/Scraping/GenericEndpoint.cs create mode 100644 src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs diff --git a/Zilean.sln.DotSettings.user b/Zilean.sln.DotSettings.user index ca94492..bd69b9e 100644 --- a/Zilean.sln.DotSettings.user +++ b/Zilean.sln.DotSettings.user @@ -1,4 +1,5 @@  + ForceIncluded ForceIncluded ForceIncluded ForceIncluded diff --git a/src/Zilean.Database/Services/ITorrentInfoService.cs b/src/Zilean.Database/Services/ITorrentInfoService.cs index 59af965..fd4f897 100644 --- a/src/Zilean.Database/Services/ITorrentInfoService.cs +++ b/src/Zilean.Database/Services/ITorrentInfoService.cs @@ -5,4 +5,5 @@ public interface ITorrentInfoService Task StoreTorrentInfo(List torrents, int batchSize = 10000); Task SearchForTorrentInfoByOnlyTitle(string query); Task SearchForTorrentInfoFiltered(TorrentInfoFilter filter, int? limit = null); + Task> GetExistingInfoHashesAsync(List infoHashes); } diff --git a/src/Zilean.Database/Services/TorrentInfoService.cs b/src/Zilean.Database/Services/TorrentInfoService.cs index c81a8f2..5185d0d 100644 --- a/src/Zilean.Database/Services/TorrentInfoService.cs +++ b/src/Zilean.Database/Services/TorrentInfoService.cs @@ -189,5 +189,18 @@ FROM search_imdb_meta( return imdbRecord?.ImdbId; } + public async Task> GetExistingInfoHashesAsync(List infoHashes) + { + await using var serviceScope = serviceProvider.CreateAsyncScope(); + await using var dbContext = serviceScope.ServiceProvider.GetRequiredService(); + + var existingHashes = await dbContext.Torrents + .Where(t => infoHashes.Contains(t.InfoHash)) + .Select(t => t.InfoHash) + .ToListAsync(); + + return [..existingHashes]; + } + private void WriteProgress(decimal @decimal) => logger.LogInformation("Storing torrent info: {Percentage:P}", @decimal); } diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs index 443cbf8..47c6475 100644 --- a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs @@ -7,9 +7,14 @@ public class GenericIngestionProcessor( ITorrentInfoService torrentInfoService, ZileanConfiguration configuration) { - public async Task ProcessTorrentsAsync(string url, CancellationToken cancellationToken = default) + private int _processedCount; + + public async Task ProcessTorrentsAsync(GenericEndpoint endpoint, CancellationToken cancellationToken = default) { - logger.LogInformation("Processing URL: {Url}", url); + var sw = Stopwatch.StartNew(); + logger.LogInformation("Processing URL: {@Url}", endpoint); + + Interlocked.Exchange(ref _processedCount, 0); var channel = Channel.CreateBounded>(new BoundedChannelOptions(configuration.Ingestion.MaxChannelSize) { @@ -18,17 +23,27 @@ public async Task ProcessTorrentsAsync(string url, CancellationToken cancellatio FullMode = BoundedChannelFullMode.Wait }); - var producerTask = ProduceAsync(url, channel.Writer, cancellationToken); + var producerTask = ProduceAsync(endpoint, channel.Writer, cancellationToken); var consumerTask = ConsumeAsync(channel.Reader, configuration.Ingestion.BatchSize, cancellationToken); await Task.WhenAll(producerTask, consumerTask); + + logger.LogInformation("Processed {Count} torrents for endpoint '{@Endpoint}' in {TimeTaken}s", _processedCount, endpoint, sw.Elapsed.TotalSeconds); + sw.Stop(); } - private async Task ProduceAsync(string url, ChannelWriter> writer, CancellationToken cancellationToken = default) + private async Task ProduceAsync(GenericEndpoint endpoint, ChannelWriter> writer, CancellationToken cancellationToken = default) { try { var httpClient = clientFactory.CreateClient(); - var response = await httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + var fullUrl = endpoint.EndpointType switch + { + GenericEndpointType.Zurg => $"{endpoint.Url}/debug/torrents", + GenericEndpointType.Zilean => $"{endpoint.Url}/torrents/all", + _ => throw new InvalidOperationException("Unknown endpoint type") + }; + + var response = await httpClient.GetAsync(fullUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken); response.EnsureSuccessStatusCode(); var stream = await response.Content.ReadAsStreamAsync(cancellationToken); @@ -89,6 +104,7 @@ private async Task ProcessBatch(List> batch, CancellationTok { var current = await result; torrents.Add(ExtractedDmmEntry.FromStreamedEntry(current)); + Interlocked.Increment(ref _processedCount); } if (torrents.Count == 0 || cancellationToken.IsCancellationRequested) @@ -96,11 +112,22 @@ private async Task ProcessBatch(List> batch, CancellationTok return; } - logger.LogInformation("Processing batch of {Count} torrents", torrents.Count); + var infoHashes = torrents.Select(t => t.InfoHash!).ToList(); + + var existingInfoHashes = await torrentInfoService.GetExistingInfoHashesAsync(infoHashes); + + var newTorrents = torrents.Where(t => !existingInfoHashes.Contains(t.InfoHash)).ToList(); + logger.LogInformation("Filtered out {Count} torrents already in the database", torrents.Count - newTorrents.Count); + + if (newTorrents.Count == 0) + { + logger.LogInformation("No new torrents to process in this batch."); + return; + } if (torrents.Count != 0) { - var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(torrents); + var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(newTorrents); var finalizedTorrents = parsedTorrents.Where(torrentInfo => torrentInfo.WipeSomeTissue()).ToList(); logger.LogInformation("Parsed {Count} torrents", finalizedTorrents.Count); await torrentInfoService.StoreTorrentInfo(finalizedTorrents); diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs index 17efb04..e3e718c 100644 --- a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs @@ -10,27 +10,16 @@ public async Task Execute(CancellationToken cancellationToken) { logger.LogInformation("Starting ingestion scraping"); - List urlsToProcess = []; + List urlsToProcess = []; - if (configuration.Ingestion.Kubernetes.EnableServiceDiscovery) - { - logger.LogInformation("Discovering URLs from Kubernetes services"); - var urls = await kubernetesServiceDiscovery.DiscoverUrlsAsync(cancellationToken); - logger.LogInformation("Discovered {Count} URLs from Kubernetes services", urls.Count); - urlsToProcess.AddRange(urls); - } + configuration.Ingestion.ZileanInstances.Add("http://localhost:8181"); + configuration.Ingestion.ZurgInstances.Add("http://experiments:19999"); - if (configuration.Ingestion.ZurgInstances.Count > 0) - { - logger.LogInformation("Adding Zurg instances to the list of URLs to process"); - urlsToProcess.AddRange(configuration.Ingestion.ZurgInstances); - } + await DiscoverUrlsFromKubernetesServices(cancellationToken, urlsToProcess); - if (configuration.Ingestion.ZileanInstances.Count > 0) - { - logger.LogInformation("Adding Zilean instances to the list of URLs to process"); - urlsToProcess.AddRange(configuration.Ingestion.ZileanInstances); - } + AddZurgInstancesToUrls(urlsToProcess); + + AddZileanInstancesToUrls(urlsToProcess); if (urlsToProcess.Count == 0) { @@ -56,7 +45,7 @@ public async Task Execute(CancellationToken cancellationToken) } catch (Exception ex) { - logger.LogError(ex, "Error processing URL: {Url}", url); + logger.LogError(ex, "Error processing URL: {@Url}", url); } } @@ -64,4 +53,41 @@ public async Task Execute(CancellationToken cancellationToken) return 0; } + + private void AddZileanInstancesToUrls(List urlsToProcess) + { + if (configuration.Ingestion.ZileanInstances.Count > 0) + { + logger.LogInformation("Adding Zilean instances to the list of URLs to process"); + urlsToProcess.AddRange(configuration.Ingestion.ZileanInstances.Select(url => new GenericEndpoint + { + EndpointType = GenericEndpointType.Zilean, + Url = url, + })); + } + } + + private void AddZurgInstancesToUrls(List urlsToProcess) + { + if (configuration.Ingestion.ZurgInstances.Count > 0) + { + logger.LogInformation("Adding Zurg instances to the list of URLs to process"); + urlsToProcess.AddRange(configuration.Ingestion.ZurgInstances.Select(url => new GenericEndpoint + { + EndpointType = GenericEndpointType.Zurg, + Url = url, + })); + } + } + + private async Task DiscoverUrlsFromKubernetesServices(CancellationToken cancellationToken, List urlsToProcess) + { + if (configuration.Ingestion.Kubernetes.EnableServiceDiscovery) + { + logger.LogInformation("Discovering URLs from Kubernetes services"); + var endpoints = await kubernetesServiceDiscovery.DiscoverUrlsAsync(cancellationToken); + logger.LogInformation("Discovered {Count} URLs from Kubernetes services", endpoints.Count); + urlsToProcess.AddRange(endpoints); + } + } } diff --git a/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs index d83021d..7b86ddf 100644 --- a/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs +++ b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs @@ -4,9 +4,11 @@ public class KubernetesServiceDiscovery( ILogger logger, ZileanConfiguration configuration) { - public async Task> DiscoverUrlsAsync(CancellationToken cancellationToken = default) + private record DiscoveredService(V1Service Service, KubernetesSelector Selector); + + public async Task> DiscoverUrlsAsync(CancellationToken cancellationToken = default) { - var urls = new List(); + var urls = new List(); try { @@ -14,44 +16,55 @@ public async Task> DiscoverUrlsAsync(CancellationToken cancellation KubernetesClientConfiguration.BuildConfigFromConfigFile(configuration.Ingestion.Kubernetes.KubeConfigFile); var kubernetesClient = new Kubernetes(clientConfig); - var services = await kubernetesClient.CoreV1.ListServiceForAllNamespacesAsync( - labelSelector: configuration.Ingestion.Kubernetes.LabelSelector, - cancellationToken: cancellationToken); + List discoveredServices = []; + + foreach (var selector in configuration.Ingestion.Kubernetes.KubernetesSelectors) + { + var services = await kubernetesClient.CoreV1.ListServiceForAllNamespacesAsync( + labelSelector: selector.LabelSelector, + cancellationToken: cancellationToken); + + discoveredServices.AddRange(services.Items.Select(service => new DiscoveredService(service, selector))); + } - foreach (var service in services.Items) + foreach (var service in discoveredServices) { try { var url = BuildUrlFromService(service); if (!string.IsNullOrEmpty(url)) { - urls.Add(url); + urls.Add(new GenericEndpoint + { + EndpointType = service.Selector.EndpointType, + Url = url, + }); logger.LogInformation("Discovered service URL: {Url}", url); } } catch (Exception ex) { logger.LogError(ex, "Failed to build URL for service {ServiceName} in namespace {Namespace}", - service.Metadata.Name, service.Metadata.NamespaceProperty); + service.Service.Metadata.Name, service.Service.Metadata.NamespaceProperty); } } } catch (Exception ex) { - logger.LogError(ex, "Failed to list services with label selector {LabelSelector}", configuration.Ingestion.Kubernetes.LabelSelector); + logger.LogError(ex, "Failed to list services with label selectors {@LabelSelector}", configuration.Ingestion.Kubernetes.KubernetesSelectors); } return urls; } - private string BuildUrlFromService(V1Service service) + private static string BuildUrlFromService(DiscoveredService service) { - if (service.Metadata?.NamespaceProperty == null) + if (service.Service.Metadata?.NamespaceProperty == null) { throw new InvalidOperationException("Service metadata or namespace is missing."); } - var namespaceName = service.Metadata.NamespaceProperty; - return string.Format(configuration.Ingestion.Kubernetes.ZurgUrlTemplate, namespaceName); + var namespaceName = service.Service.Metadata.NamespaceProperty; + return string.Format(service.Selector.UrlTemplate, namespaceName); } } diff --git a/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs index a1204b8..dbb533f 100644 --- a/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/IngestionConfiguration.cs @@ -6,8 +6,9 @@ public class IngestionConfiguration public List ZileanInstances { get; set; } = []; public bool EnableScraping { get; set; } = false; public KubernetesConfiguration Kubernetes { get; set; } = new(); - public int BatchSize { get; set; } = 1000; + public int BatchSize { get; set; } = 500; public int MaxChannelSize { get; set; } = 5000; - public string ScrapeSchedule { get; set; } = "0 * * * *"; + public string ZurgEndpointSuffix { get; set; } = "/debug/torrents"; + public string ZileanEndpointSuffix { get; set; } = "/torrents/all"; } diff --git a/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs index 27810af..8964c76 100644 --- a/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs @@ -3,8 +3,7 @@ public class KubernetesConfiguration { public bool EnableServiceDiscovery { get; set; } = false; - public string ZurgUrlTemplate { get; set; } = "http://zurg.{0}:9999/debug/torrents"; - public string LabelSelector { get; set; } = "app.elfhosted.com/name=zurg"; + public List KubernetesSelectors { get; set; } = [new()]; public string KubeConfigFile { get; set; } = "/$HOME/.kube/config"; public bool IsConfigFile { get; set; } = false; } diff --git a/src/Zilean.Shared/Features/Configuration/KubernetesSelector.cs b/src/Zilean.Shared/Features/Configuration/KubernetesSelector.cs new file mode 100644 index 0000000..cfdf9cf --- /dev/null +++ b/src/Zilean.Shared/Features/Configuration/KubernetesSelector.cs @@ -0,0 +1,8 @@ +namespace Zilean.Shared.Features.Configuration; + +public class KubernetesSelector +{ + public string UrlTemplate { get; set; } = "http://zurg.{0}:9999/debug/torrents"; + public string LabelSelector { get; set; } = "app.elfhosted.com/name=zurg"; + public GenericEndpointType EndpointType { get; set; } = GenericEndpointType.Zurg; +} diff --git a/src/Zilean.Shared/Features/Dmm/DmmRecords.cs b/src/Zilean.Shared/Features/Dmm/DmmRecords.cs index 80bbd63..8305b40 100644 --- a/src/Zilean.Shared/Features/Dmm/DmmRecords.cs +++ b/src/Zilean.Shared/Features/Dmm/DmmRecords.cs @@ -1,5 +1,3 @@ -using Zilean.Shared.Features.Scraping; - namespace Zilean.Shared.Features.Dmm; public class ExtractedDmmEntry(string? infoHash, string? filename, long filesize, TorrentInfo? parseResponse) diff --git a/src/Zilean.Shared/Features/Scraping/GenericEndpoint.cs b/src/Zilean.Shared/Features/Scraping/GenericEndpoint.cs new file mode 100644 index 0000000..998135c --- /dev/null +++ b/src/Zilean.Shared/Features/Scraping/GenericEndpoint.cs @@ -0,0 +1,7 @@ +namespace Zilean.Shared.Features.Scraping; + +public class GenericEndpoint +{ + public required string Url { get; set; } + public required GenericEndpointType EndpointType { get; set; } +} diff --git a/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs b/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs new file mode 100644 index 0000000..2a0c04a --- /dev/null +++ b/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs @@ -0,0 +1,7 @@ +namespace Zilean.Shared.Features.Scraping; + +public enum GenericEndpointType +{ + Zilean, + Zurg +} diff --git a/src/Zilean.Shared/GlobalUsings.cs b/src/Zilean.Shared/GlobalUsings.cs index 97ca155..36e1084 100644 --- a/src/Zilean.Shared/GlobalUsings.cs +++ b/src/Zilean.Shared/GlobalUsings.cs @@ -17,6 +17,7 @@ global using Zilean.Shared.Features.Configuration; global using Zilean.Shared.Features.Dmm; global using Zilean.Shared.Features.Imdb; +global using Zilean.Shared.Features.Scraping; global using Zilean.Shared.Features.Torznab.Categories; global using Zilean.Shared.Features.Torznab.Info; global using Zilean.Shared.Features.Torznab.Parameters; From 0db28bad3c60308beb1942f18884b1c816518b55 Mon Sep 17 00:00:00 2001 From: PromKnight Date: Sat, 16 Nov 2024 17:58:16 +0000 Subject: [PATCH 8/8] feat: completed generic ingestion with service discovery --- Zilean.sln | 1 + Zilean.sln.DotSettings.user | 1 + eng/dev-settings.json | 51 +++++++++++++++++++ .../Ingestion/GenericIngestionProcessor.cs | 5 -- .../Ingestion/GenericIngestionScraping.cs | 3 -- .../Ingestion/KubernetesServiceDiscovery.cs | 2 +- .../Configuration/KubernetesConfiguration.cs | 1 - .../Features/Scraping/GenericEndpointType.cs | 4 +- 8 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 eng/dev-settings.json diff --git a/Zilean.sln b/Zilean.sln index 846c93c..2bc7c1b 100644 --- a/Zilean.sln +++ b/Zilean.sln @@ -17,6 +17,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "eng", "eng", "{49EC8AF3-9A9 README.md = README.md eng\compose-dev.yaml = eng\compose-dev.yaml eng\.dockerignore = eng\.dockerignore + eng\dev-settings.json = eng\dev-settings.json EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "k6", "k6", "{3F62995D-FA0F-434F-B90A-5E59C5960A4A}" diff --git a/Zilean.sln.DotSettings.user b/Zilean.sln.DotSettings.user index bd69b9e..d07a3e3 100644 --- a/Zilean.sln.DotSettings.user +++ b/Zilean.sln.DotSettings.user @@ -6,6 +6,7 @@ ForceIncluded ForceIncluded ForceIncluded + ForceIncluded ForceIncluded ForceIncluded ForceIncluded diff --git a/eng/dev-settings.json b/eng/dev-settings.json new file mode 100644 index 0000000..6a5de0b --- /dev/null +++ b/eng/dev-settings.json @@ -0,0 +1,51 @@ +{ + "Zilean": { + "Dmm": { + "EnableScraping": true, + "EnableEndpoint": true, + "ScrapeSchedule": "0 * * * *", + "MinimumReDownloadIntervalMinutes": 30, + "MaxFilteredResults": 200, + "MinimumScoreMatch": 0.85, + "ImportBatched": false + }, + "Torznab": { + "EnableEndpoint": true + }, + "Database": { + "ConnectionString": "Host=localhost;Database=zilean;Username=postgres;Password=postgres;Include Error Detail=true;Timeout=300;CommandTimeout=300;" + }, + "Prowlarr": { + "EnableEndpoint": true + }, + "Torrents": { + "EnableEndpoint": false + }, + "Imdb": { + "EnableImportMatching": true, + "EnableEndpoint": true, + "MinimumScoreMatch": 0.85 + }, + "Ingestion": { + "ZurgInstances": [], + "ZileanInstances": [], + "EnableScraping": false, + "Kubernetes": { + "EnableServiceDiscovery": false, + "KubernetesSelectors": [ + { + "UrlTemplate": "http://zurg.{0}:9999", + "LabelSelector": "app.elfhosted.com/name=zurg", + "EndpointType": 1 + } + ], + "KubeConfigFile": "/$HOME/.kube/config" + }, + "BatchSize": 500, + "MaxChannelSize": 5000, + "ScrapeSchedule": "0 * * * *", + "ZurgEndpointSuffix": "/debug/torrents", + "ZileanEndpointSuffix": "/torrents/all" + } + } +} diff --git a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs index 47c6475..32c0843 100644 --- a/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs @@ -60,11 +60,6 @@ private async Task ProduceAsync(GenericEndpoint endpoint, ChannelWriter Execute(CancellationToken cancellationToken) List urlsToProcess = []; - configuration.Ingestion.ZileanInstances.Add("http://localhost:8181"); - configuration.Ingestion.ZurgInstances.Add("http://experiments:19999"); - await DiscoverUrlsFromKubernetesServices(cancellationToken, urlsToProcess); AddZurgInstancesToUrls(urlsToProcess); diff --git a/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs index 7b86ddf..14b15d4 100644 --- a/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs +++ b/src/Zilean.Scraper/Features/Ingestion/KubernetesServiceDiscovery.cs @@ -57,7 +57,7 @@ public async Task> DiscoverUrlsAsync(CancellationToken can return urls; } - private static string BuildUrlFromService(DiscoveredService service) + private string BuildUrlFromService(DiscoveredService service) { if (service.Service.Metadata?.NamespaceProperty == null) { diff --git a/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs index 8964c76..378f9c6 100644 --- a/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/KubernetesConfiguration.cs @@ -5,5 +5,4 @@ public class KubernetesConfiguration public bool EnableServiceDiscovery { get; set; } = false; public List KubernetesSelectors { get; set; } = [new()]; public string KubeConfigFile { get; set; } = "/$HOME/.kube/config"; - public bool IsConfigFile { get; set; } = false; } diff --git a/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs b/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs index 2a0c04a..44ef40b 100644 --- a/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs +++ b/src/Zilean.Shared/Features/Scraping/GenericEndpointType.cs @@ -2,6 +2,6 @@ public enum GenericEndpointType { - Zilean, - Zurg + Zilean = 0, + Zurg = 1 }