Skip to content

Commit

Permalink
Added a post run information output for successful/failed/skipped. Cl…
Browse files Browse the repository at this point in the history
…eaned up log output to be more clear.
  • Loading branch information
majora2007 committed Nov 5, 2020
1 parent 222524f commit f46fd4f
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 12 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ players, but nice to have.
This program was only tested and built for Windows, but should work on linux if built that way ()

# TODO
- Post summary screen with missed, successful scrapes.
- Add version number so users can compare with latest from Github (compiled versions)

# Add Scrapers:
Expand Down
4 changes: 2 additions & 2 deletions lyricscraper/scrapers/azlyrics_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def scrape(self, song):
query = song.artist + ' - ' + song.title
url = self.QUERY_URL % query

logger.info('[AZLyrics] Searching for {}'.format(url))
logger.info('\t[AZLyrics] Searching for {}'.format(url))

search_results = requests.get(url, headers=self.request_headers).content
soup = BeautifulSoup(search_results, 'html.parser')
Expand All @@ -33,7 +33,7 @@ def scrape(self, song):

# Validate results found
if soup.find('div', {'class': 'alert alert-warning'}) is not None:
logger.info('[AZLyrics] No match found')
logger.debug('[AZLyrics] No match found')
return lyrics


Expand Down
4 changes: 2 additions & 2 deletions lyricscraper/scrapers/genius_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ def scrape(self, song,):
url = self.QUERY_URL % query
lyrics = ''

logger.info('[Genius] Searching for {}'.format(url))
logger.info('\t[Genius] Searching for {}'.format(url))

self.driver.get_url(url)

# Validate results found
try:
if self.driver.verify_elem('div[ng-if="$ctrl.sections && !$ctrl.has_results"]'):
logger.info('[Genius] No match found')
logger.debug('[Genius] No match found')
return lyrics
except:
pass
Expand Down
4 changes: 2 additions & 2 deletions lyricscraper/scrapers/musixmatch_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def scrape(self, song):
query = song.artist.replace(' ', '-')
url = self.QUERY_URL % query

logger.info('[MusixMatch] Searching for {} - {}'.format(song.artist, song.title))
logger.info('\t[MusixMatch] Searching for {}'.format(url))

search_results = requests.get(url, headers=self.request_headers).content
soup = BeautifulSoup(search_results, 'html.parser')
Expand All @@ -34,7 +34,7 @@ def scrape(self, song):

# Validate results found
if soup.find('div', {'class': 'error-page'}) is not None:
logger.info('[MusixMatch] No match found')
logger.debug('[MusixMatch] No match found')
return lyrics


Expand Down
4 changes: 2 additions & 2 deletions lyricscraper/scrapers/songlyrics_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def scrape(self, song):
query = query.replace(' ', '+')
url = self.QUERY_URL % query

logger.info('[SongLyrics] Searching for {}'.format(url))
logger.info('\t[SongLyrics] Searching for {}'.format(url))

search_results = requests.get(url, headers=self.request_headers).content
soup = BeautifulSoup(search_results, 'html.parser')
Expand All @@ -34,7 +34,7 @@ def scrape(self, song):

# Validate results found
if soup.find('div', {'class': 'alert alert-warning'}) is not None:
logger.info('[AZLyrics] No match found')
logger.debug('[AZLyrics] No match found')
return lyrics


Expand Down
50 changes: 47 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,33 +63,76 @@ def setup_scrapers():
def random_scraper():
return random.choice(scraper_list)

def print_post_play_info(artist_matches):
successful_index = 0
failed_index = 1
skipped_index = 2
max_characters_per_line = 68
template_length = 27

print('\nPost Run Information:')
if len(artist_matches) == 0:
print('\tNo artists found')
return

for artist in artist_matches:
match = artist_matches[artist]
#num_of_dots = max_characters_per_line - len(artist) - template_length - len(str(match[successful_index])) - len(str(match[failed_index])) - len(str(match[skipped_index]))
#dots = ''.join(['_' * num_of_dots * 2])
print('\t{}\t\t{} Success, {} Failed, {} Skipped'.format(artist, match[successful_index], match[failed_index], match[skipped_index]))

successful_sum = sum([item[successful_index] for item in artist_matches.values()])
failed_sum = sum([item[failed_index] for item in artist_matches.values()])
skipped_sum = sum([item[skipped_index] for item in artist_matches.values()])

print('Total: {} Success, {} Failed, {} Skipped'.format(successful_sum, failed_sum, skipped_sum))


def scan_dir(root_dir, scrapers, force_overwrite, embed_lyrics):
artist_matches = {} # Stores a list of [successful, failed, skipped]
successful_index = 0
failed_index = 1
skipped_index = 2

logger.info('Scanning {} for songs...'.format(root_dir))
for dirpath, _, files in os.walk(root_dir, topdown=True):
for file in files:
if parser.is_song(file):
full_filepath = os.path.join(dirpath, file)

song = Song(full_filepath)

if song.artist not in artist_matches:
artist_matches[song.artist] = [0, 0, 0]

if parser.has_lyrics(full_filepath) and not force_overwrite:
print('Skipping {}, has existing lyric file', file)
logger.debug('Skipping {}, has existing lyric file'.format(file))
artist_matches[song.artist][skipped_index] += 1
continue

song = Song(full_filepath)

logger.info('Scraping: {} - {}'.format(song.artist, song.title))
lyrics = ''
for scraper in scrapers:
lyrics = scraper.scrape(song)

if len(lyrics) > 0:
artist_matches[song.artist][failed_index] += 1
break

# Write lyrics to file
# TODO: Should I move this all into write_lyrics method?
if len(lyrics) > 0:
if embed_lyrics:
song.write_lyrics(lyrics, force_overwrite)

with codecs.open(os.path.join(dirpath, parser.clean_file_extension(file) + '.txt'), 'w+', 'utf-8') as file:
file.write(lyrics.strip())
logger.info('\tSuccess: Lyrics written')
artist_matches[song.artist][successful_index] += 1

# Print information about the run
print_post_play_info(artist_matches)

@Gooey
def main(program_name='Test Readiness Updater', program_description='This program automates updating test readiness status from iTrack'):
args = init_args()
Expand All @@ -103,3 +146,4 @@ def main(program_name='Test Readiness Updater', program_description='This progra

if __name__ == '__main__':
main()

0 comments on commit f46fd4f

Please sign in to comment.