From f46fd4f287b4d8a48ebce2382e3393fb43f3a8c9 Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Thu, 5 Nov 2020 17:58:35 -0600 Subject: [PATCH] Added a post run information output for successful/failed/skipped. Cleaned up log output to be more clear. --- README.md | 1 - lyricscraper/scrapers/azlyrics_scraper.py | 4 +- lyricscraper/scrapers/genius_scraper.py | 4 +- lyricscraper/scrapers/musixmatch_scraper.py | 4 +- lyricscraper/scrapers/songlyrics_scraper.py | 4 +- main.py | 50 +++++++++++++++++++-- 6 files changed, 55 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index d2d502f..f27a362 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ players, but nice to have. This program was only tested and built for Windows, but should work on linux if built that way () # TODO -- Post summary screen with missed, successful scrapes. - Add version number so users can compare with latest from Github (compiled versions) # Add Scrapers: diff --git a/lyricscraper/scrapers/azlyrics_scraper.py b/lyricscraper/scrapers/azlyrics_scraper.py index 46bdb3c..092f4d9 100644 --- a/lyricscraper/scrapers/azlyrics_scraper.py +++ b/lyricscraper/scrapers/azlyrics_scraper.py @@ -24,7 +24,7 @@ def scrape(self, song): query = song.artist + ' - ' + song.title url = self.QUERY_URL % query - logger.info('[AZLyrics] Searching for {}'.format(url)) + logger.info('\t[AZLyrics] Searching for {}'.format(url)) search_results = requests.get(url, headers=self.request_headers).content soup = BeautifulSoup(search_results, 'html.parser') @@ -33,7 +33,7 @@ def scrape(self, song): # Validate results found if soup.find('div', {'class': 'alert alert-warning'}) is not None: - logger.info('[AZLyrics] No match found') + logger.debug('[AZLyrics] No match found') return lyrics diff --git a/lyricscraper/scrapers/genius_scraper.py b/lyricscraper/scrapers/genius_scraper.py index 1e91917..e77ee45 100644 --- a/lyricscraper/scrapers/genius_scraper.py +++ b/lyricscraper/scrapers/genius_scraper.py @@ -30,14 +30,14 @@ def scrape(self, song,): url = self.QUERY_URL % query lyrics = '' - logger.info('[Genius] Searching for {}'.format(url)) + logger.info('\t[Genius] Searching for {}'.format(url)) self.driver.get_url(url) # Validate results found try: if self.driver.verify_elem('div[ng-if="$ctrl.sections && !$ctrl.has_results"]'): - logger.info('[Genius] No match found') + logger.debug('[Genius] No match found') return lyrics except: pass diff --git a/lyricscraper/scrapers/musixmatch_scraper.py b/lyricscraper/scrapers/musixmatch_scraper.py index b9f1988..dd6b853 100644 --- a/lyricscraper/scrapers/musixmatch_scraper.py +++ b/lyricscraper/scrapers/musixmatch_scraper.py @@ -25,7 +25,7 @@ def scrape(self, song): query = song.artist.replace(' ', '-') url = self.QUERY_URL % query - logger.info('[MusixMatch] Searching for {} - {}'.format(song.artist, song.title)) + logger.info('\t[MusixMatch] Searching for {}'.format(url)) search_results = requests.get(url, headers=self.request_headers).content soup = BeautifulSoup(search_results, 'html.parser') @@ -34,7 +34,7 @@ def scrape(self, song): # Validate results found if soup.find('div', {'class': 'error-page'}) is not None: - logger.info('[MusixMatch] No match found') + logger.debug('[MusixMatch] No match found') return lyrics diff --git a/lyricscraper/scrapers/songlyrics_scraper.py b/lyricscraper/scrapers/songlyrics_scraper.py index 7a8e1a5..8249d3a 100644 --- a/lyricscraper/scrapers/songlyrics_scraper.py +++ b/lyricscraper/scrapers/songlyrics_scraper.py @@ -25,7 +25,7 @@ def scrape(self, song): query = query.replace(' ', '+') url = self.QUERY_URL % query - logger.info('[SongLyrics] Searching for {}'.format(url)) + logger.info('\t[SongLyrics] Searching for {}'.format(url)) search_results = requests.get(url, headers=self.request_headers).content soup = BeautifulSoup(search_results, 'html.parser') @@ -34,7 +34,7 @@ def scrape(self, song): # Validate results found if soup.find('div', {'class': 'alert alert-warning'}) is not None: - logger.info('[AZLyrics] No match found') + logger.debug('[AZLyrics] No match found') return lyrics diff --git a/main.py b/main.py index 88883d1..3bf0254 100644 --- a/main.py +++ b/main.py @@ -63,33 +63,76 @@ def setup_scrapers(): def random_scraper(): return random.choice(scraper_list) +def print_post_play_info(artist_matches): + successful_index = 0 + failed_index = 1 + skipped_index = 2 + max_characters_per_line = 68 + template_length = 27 + + print('\nPost Run Information:') + if len(artist_matches) == 0: + print('\tNo artists found') + return + + for artist in artist_matches: + match = artist_matches[artist] + #num_of_dots = max_characters_per_line - len(artist) - template_length - len(str(match[successful_index])) - len(str(match[failed_index])) - len(str(match[skipped_index])) + #dots = ''.join(['_' * num_of_dots * 2]) + print('\t{}\t\t{} Success, {} Failed, {} Skipped'.format(artist, match[successful_index], match[failed_index], match[skipped_index])) + + successful_sum = sum([item[successful_index] for item in artist_matches.values()]) + failed_sum = sum([item[failed_index] for item in artist_matches.values()]) + skipped_sum = sum([item[skipped_index] for item in artist_matches.values()]) + + print('Total: {} Success, {} Failed, {} Skipped'.format(successful_sum, failed_sum, skipped_sum)) + + def scan_dir(root_dir, scrapers, force_overwrite, embed_lyrics): + artist_matches = {} # Stores a list of [successful, failed, skipped] + successful_index = 0 + failed_index = 1 + skipped_index = 2 + logger.info('Scanning {} for songs...'.format(root_dir)) for dirpath, _, files in os.walk(root_dir, topdown=True): for file in files: if parser.is_song(file): full_filepath = os.path.join(dirpath, file) + song = Song(full_filepath) + + if song.artist not in artist_matches: + artist_matches[song.artist] = [0, 0, 0] + if parser.has_lyrics(full_filepath) and not force_overwrite: - print('Skipping {}, has existing lyric file', file) + logger.debug('Skipping {}, has existing lyric file'.format(file)) + artist_matches[song.artist][skipped_index] += 1 continue - song = Song(full_filepath) - + logger.info('Scraping: {} - {}'.format(song.artist, song.title)) lyrics = '' for scraper in scrapers: lyrics = scraper.scrape(song) if len(lyrics) > 0: + artist_matches[song.artist][failed_index] += 1 break # Write lyrics to file + # TODO: Should I move this all into write_lyrics method? if len(lyrics) > 0: if embed_lyrics: song.write_lyrics(lyrics, force_overwrite) with codecs.open(os.path.join(dirpath, parser.clean_file_extension(file) + '.txt'), 'w+', 'utf-8') as file: file.write(lyrics.strip()) + logger.info('\tSuccess: Lyrics written') + artist_matches[song.artist][successful_index] += 1 + + # Print information about the run + print_post_play_info(artist_matches) + @Gooey def main(program_name='Test Readiness Updater', program_description='This program automates updating test readiness status from iTrack'): args = init_args() @@ -103,3 +146,4 @@ def main(program_name='Test Readiness Updater', program_description='This progra if __name__ == '__main__': main() +