From 5617ac0bcb105ba0a7564712789e02c7a96fa0f3 Mon Sep 17 00:00:00 2001 From: Jacob Mansfield Date: Sat, 30 Nov 2019 13:01:44 +0000 Subject: [PATCH 1/2] Allow sync through YouTube's RSS feed --- .../management/jobs/synchronize.py | 80 ++++++++++++++----- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/app/YtManagerApp/management/jobs/synchronize.py b/app/YtManagerApp/management/jobs/synchronize.py index c937e52..4026fa7 100644 --- a/app/YtManagerApp/management/jobs/synchronize.py +++ b/app/YtManagerApp/management/jobs/synchronize.py @@ -3,11 +3,12 @@ import datetime from threading import Lock +import requests +from xml.etree import ElementTree from apscheduler.triggers.cron import CronTrigger from django.db.models import Max, F from django.conf import settings - from YtManagerApp.management.appconfig import appconfig from YtManagerApp.management.downloader import fetch_thumbnail, downloader_process_subscription from YtManagerApp.models import * @@ -28,7 +29,7 @@ def __init__(self, job_execution, subscription: Optional[Subscription] = None): super().__init__(job_execution) self.__subscription = subscription self.__api = youtube.YoutubeAPI.build_public() - self.__new_vids = [] + self.__new_videos = [] def get_description(self): if self.__subscription is not None: @@ -40,9 +41,6 @@ def get_subscription_list(self): return [self.__subscription] return Subscription.objects.all().order_by(F('last_synchronised').desc(nulls_first=True)) - def get_videos_list(self, subs): - return Video.objects.filter(subscription__in=subs) - def run(self): self.__lock.acquire(blocking=True) SynchronizeJob.running = True @@ -51,27 +49,25 @@ def run(self): # Build list of work items work_subs = self.get_subscription_list() - work_vids = self.get_videos_list(work_subs) + work_videos = Video.objects.filter(subscription__in=work_subs) - self.set_total_steps(len(work_subs) + len(work_vids)) + self.set_total_steps(len(work_subs) + len(work_videos)) # Remove the 'new' flag - work_vids.update(new=False) + work_videos.update(new=False) # Process subscriptions for sub in work_subs: - self.progress_advance(1, "Synchronizing subscription " + sub.name) + self.progress_advance(progress_msg="Synchronizing subscription " + sub.name) self.check_new_videos(sub) self.fetch_missing_thumbnails(sub) # Add new videos to progress calculation - self.set_total_steps(len(work_subs) + len(work_vids) + len(self.__new_vids)) + self.set_total_steps(len(work_subs) + len(work_videos) + len(self.__new_videos)) # Process videos - all_videos = itertools.chain(work_vids, self.__new_vids) + all_videos = itertools.chain(work_videos, self.__new_videos) for batch in iterate_chunks(all_videos, 50): - video_stats = {} - if _ENABLE_UPDATE_STATS: batch_ids = [video.video_id for video in batch] video_stats = {v.id: v for v in self.__api.videos(batch_ids, part='id,statistics,contentDetails')} @@ -80,14 +76,13 @@ def run(self): video_stats = {v.id: v for v in self.__api.videos(batch_ids, part='id,statistics,contentDetails')} for video in batch: - self.progress_advance(1, "Updating video " + video.name) + self.progress_advance(progress_msg="Updating video " + video.name) self.check_video_deleted(video) self.fetch_missing_thumbnails(video) if video.video_id in video_stats: self.update_video_stats(video, video_stats[video.video_id]) - # Start downloading videos for sub in work_subs: downloader_process_subscription(sub) @@ -97,6 +92,51 @@ def run(self): self.__lock.release() def check_new_videos(self, sub: Subscription): + if sub.last_synchronised is None: + self.check_all_videos(sub) + else: + self.check_rss_videos(sub) + sub.last_synchronised = datetime.datetime.now() + sub.save() + + def check_rss_videos(self, sub: Subscription): + found_existing_video = False + + rss_request = requests.get("https://www.youtube.com/feeds/videos.xml?channel_id="+sub.channel_id) + rss_request.raise_for_status() + + rss = ElementTree.fromstring(rss_request.content) + for entry in rss.findall("{http://www.w3.org/2005/Atom}entry"): + video_id = entry.find("{http://www.youtube.com/xml/schemas/2015}videoId").text + results = Video.objects.filter(video_id=video_id, subscription=sub) + if results.exists(): + found_existing_video = True + else: + video_title = entry.find("{http://www.w3.org/2005/Atom}title").text + + self.log.info('New video for subscription %s: %s %s"', sub, video_id, video_title) + + video = Video() + video.video_id = video_id + video.name = video_title + video.description = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}description").text + video.watched = False + video.new = True + video.downloaded_path = None + video.subscription = sub + video.playlist_index = 0 + video.publish_date = datetime.datetime.fromisoformat(entry.find("{http://www.w3.org/2005/Atom}published").text) + video.thumbnail = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}thumbnail").get("url") + video.rating = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}community").find("{http://search.yahoo.com/mrss/}starRating").get("average") + video.views = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}community").find("{http://search.yahoo.com/mrss/}statistics").get("views") + video.save() + + self.__new_videos.append(video) + + if not found_existing_video: + self.check_all_videos(sub) + + def check_all_videos(self, sub: Subscription): playlist_items = self.__api.playlist_items(sub.playlist_id) if sub.rewrite_playlist_indices: playlist_items = sorted(playlist_items, key=lambda x: x.published_at) @@ -114,11 +154,10 @@ def check_new_videos(self, sub: Subscription): highest = Video.objects.filter(subscription=sub).aggregate(Max('playlist_index'))['playlist_index__max'] item.position = 1 + (highest or -1) - self.__new_vids.append(Video.create(item, sub)) - sub.last_synchronised = datetime.datetime.now() - sub.save() + self.__new_videos.append(Video.create(item, sub)) - def fetch_missing_thumbnails(self, obj: Union[Subscription, Video]): + @staticmethod + def fetch_missing_thumbnails(obj: Union[Subscription, Video]): if obj.thumbnail.startswith("http"): if isinstance(obj, Subscription): obj.thumbnail = fetch_thumbnail(obj.thumbnail, 'sub', obj.playlist_id, settings.THUMBNAIL_SIZE_SUBSCRIPTION) @@ -163,7 +202,8 @@ def check_video_deleted(self, video: Video): video.save() - def update_video_stats(self, video: Video, yt_video): + @staticmethod + def update_video_stats(video: Video, yt_video): if yt_video.n_likes is not None \ and yt_video.n_dislikes is not None \ and yt_video.n_likes + yt_video.n_dislikes > 0: From 1329c91b640a7d389427370d0f0cf1d5ecf8a43c Mon Sep 17 00:00:00 2001 From: Jacob Mansfield Date: Sat, 30 Nov 2019 13:12:55 +0000 Subject: [PATCH 2/2] Video description can be null in RSS --- app/YtManagerApp/management/jobs/synchronize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/YtManagerApp/management/jobs/synchronize.py b/app/YtManagerApp/management/jobs/synchronize.py index 4026fa7..87eda27 100644 --- a/app/YtManagerApp/management/jobs/synchronize.py +++ b/app/YtManagerApp/management/jobs/synchronize.py @@ -119,7 +119,7 @@ def check_rss_videos(self, sub: Subscription): video = Video() video.video_id = video_id video.name = video_title - video.description = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}description").text + video.description = entry.find("{http://search.yahoo.com/mrss/}group").find("{http://search.yahoo.com/mrss/}description").text or "" video.watched = False video.new = True video.downloaded_path = None