diff --git a/python-selenium/README.md b/python-selenium/README.md new file mode 100644 index 0000000000..99f3604943 --- /dev/null +++ b/python-selenium/README.md @@ -0,0 +1,44 @@ +# Modern Web Automation With Python and Selenium + +This repository contains the module `bandcamp`, which is the sample app built in the Real Python tutorial [Modern Web Automation With Python and Selenium](https://realpython.com/modern-web-automation-with-python-and-selenium/). + +## Installation and Setup + +Create and activate a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/). + +Then, install the requirements: + +```sh +(venv) $ python -m pip install -r requirements.txt +``` + +The only direct dependency for this project is [Selenium](https://selenium-python.readthedocs.io/). You should use a Python version of at least 3.10, which is necessary to support [structural pattern matching](https://realpython.com/structural-pattern-matching/). + +You'll need a [Firefox Selenium driver](https://selenium-python.readthedocs.io/installation.html#drivers) called `geckodriver` to run the project as-is. Make sure to [download and install](https://github.com/mozilla/geckodriver/releases) it before running the project. + +## Run the Bandcamp Discover Player + +To run the music player, install the package, then use the entry point command from your command-line: + +```sh +(venv) $ python -m pip install . +(venv) $ bandcamp-player +``` + +You'll see a text-based user interface that allows you to interact with the music player: + +``` +Type: play [] | tracks | more | exit +> +``` + +Type one of the available commands to interact with Bandcamp's Discover section through your headless browser. Listen to songs with `play`, list available tracks with `tracks`, and load more songs using `more`. You can exit the music player by typing `exit`. + +## About the Authors + +Martin Breuss - Email: martin@realpython.com +Bartosz Zaczyński - Email: bartosz@realpython.com + +## License + +Distributed under the MIT license. See `LICENSE` for more information. diff --git a/python-selenium/pyproject.toml b/python-selenium/pyproject.toml new file mode 100644 index 0000000000..b675214a12 --- /dev/null +++ b/python-selenium/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "bandcamp_player" +version = "0.1.0" +requires-python = ">=3.10" +description = "A web player for Bandcamp using Selenium" +authors = [ + { name = "Martin Breuss", email = "martin@realpython.com" }, + { name = "Bartosz Zaczyński", email = "bartosz@realpython.com" }, +] +dependencies = [ + "selenium", +] +[project.scripts] +bandcamp-player = "bandcamp.__main__:main" diff --git a/python-selenium/requirements.txt b/python-selenium/requirements.txt new file mode 100644 index 0000000000..7b46f7c892 --- /dev/null +++ b/python-selenium/requirements.txt @@ -0,0 +1,15 @@ +attrs==24.2.0 +certifi==2024.8.30 +h11==0.14.0 +idna==3.10 +outcome==1.3.0.post0 +PySocks==1.7.1 +selenium==4.25.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +trio==0.27.0 +trio-websocket==0.11.1 +typing_extensions==4.12.2 +urllib3==2.2.3 +websocket-client==1.8.0 +wsproto==1.2.0 diff --git a/python-selenium/src/bandcamp/__init__.py b/python-selenium/src/bandcamp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python-selenium/src/bandcamp/__main__.py b/python-selenium/src/bandcamp/__main__.py new file mode 100644 index 0000000000..f0cba06399 --- /dev/null +++ b/python-selenium/src/bandcamp/__main__.py @@ -0,0 +1,6 @@ +from bandcamp.app.tui import interact + + +def main(): + """Provide the main entry point for the app.""" + interact() diff --git a/python-selenium/src/bandcamp/app/__init__.py b/python-selenium/src/bandcamp/app/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python-selenium/src/bandcamp/app/player.py b/python-selenium/src/bandcamp/app/player.py new file mode 100644 index 0000000000..f74be6574a --- /dev/null +++ b/python-selenium/src/bandcamp/app/player.py @@ -0,0 +1,45 @@ +from selenium.webdriver import Firefox +from selenium.webdriver.firefox.options import Options + +from bandcamp.web.element import TrackElement +from bandcamp.web.page import HomePage + +BANDCAMP_FRONTPAGE_URL = "https://bandcamp.com/" + + +class Player: + """Play tracks from Bandcamp's Discover section.""" + + def __init__(self) -> None: + self._driver = self._set_up_driver() + self.home = HomePage(self._driver) + self.discover = self.home.discover_tracklist + self._current_track = TrackElement( + self.home.discover_tracklist.available_tracks[0], self._driver + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + """Close the headless browser.""" + self._driver.close() + + def play(self, track_number=None): + """Play the first track, or one of the available numbered tracks.""" + if track_number: + self._current_track = TrackElement( + self.home.discover_tracklist.available_tracks[ + track_number - 1 + ], + self._driver, + ) + self._current_track.play() + + def _set_up_driver(self): + """Create a headless browser pointing to Bandcamp.""" + options = Options() + options.add_argument("--headless") + browser = Firefox(options=options) + browser.get(BANDCAMP_FRONTPAGE_URL) + return browser diff --git a/python-selenium/src/bandcamp/app/tui.py b/python-selenium/src/bandcamp/app/tui.py new file mode 100644 index 0000000000..d5913f33c8 --- /dev/null +++ b/python-selenium/src/bandcamp/app/tui.py @@ -0,0 +1,61 @@ +"""Provide a text-based user interface for a Bandcamp music player.""" + +from bandcamp.app.player import Player + +COLUMN_WIDTH = CW = 30 + + +def interact(): + """Control the player through user interactions.""" + with Player() as player: + while True: + print("\nType: play [] | tracks | more | exit") + match input("> ").strip().lower().split(): + case ["play"]: + play(player) + case ["play", track]: + try: + track_number = int(track) + play(player, track_number) + except ValueError: + print("Please provide a valid track number.") + case ["tracks"]: + display_tracks(player) + case ["more"]: + player.discover.load_more() + display_tracks(player) + case ["exit"]: + print("Exiting the player...") + break + case _: + print("Unknown command. Try again.") + + +def play(player, track_number=None): + """Play a track and show info about the track.""" + player.play(track_number) + print(player._current_track._get_track_info()) + + +def display_tracks(player): + """Display information about the currently playable tracks.""" + header = ( + f"{'#':<5} {'Album':<{CW}} " f"{'Artist':<{CW}} " f"{'Genre':<{CW}}" + ) + print(header) + print("-" * 100) + for track_number, track in enumerate( + player.discover.available_tracks, start=1 + ): + album, artist, *genre = track.text.split("\n") + album = _truncate(album, CW) + artist = _truncate(artist, CW) + genre = _truncate(genre[0], CW) if genre else "" + print( + f"{track_number:<5} {album:<{CW}} " f"{artist:<{CW}} {genre:<{CW}}" + ) + + +def _truncate(text, width): + """Truncate track information.""" + return text[: width - 3] + "..." if len(text) > width else text diff --git a/python-selenium/src/bandcamp/web/__init__.py b/python-selenium/src/bandcamp/web/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python-selenium/src/bandcamp/web/base.py b/python-selenium/src/bandcamp/web/base.py new file mode 100644 index 0000000000..92d2f3a032 --- /dev/null +++ b/python-selenium/src/bandcamp/web/base.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass + +from selenium.webdriver.remote.webdriver import WebDriver +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.wait import WebDriverWait + +MAX_WAIT_SECONDS = 10.0 + + +@dataclass +class Track: + album: str + artist: str + genre: str + url: str + + +class WebPage: + def __init__(self, driver: WebDriver) -> None: + self._driver = driver + self._wait = WebDriverWait(driver, MAX_WAIT_SECONDS) + + +class WebComponent(WebPage): + def __init__(self, parent: WebElement, driver: WebDriver) -> None: + super().__init__(driver) + self._parent = parent diff --git a/python-selenium/src/bandcamp/web/element.py b/python-selenium/src/bandcamp/web/element.py new file mode 100644 index 0000000000..90d44cb59d --- /dev/null +++ b/python-selenium/src/bandcamp/web/element.py @@ -0,0 +1,55 @@ +from selenium.webdriver.remote.webdriver import WebDriver +from selenium.webdriver.remote.webelement import WebElement + +from bandcamp.web.base import Track, WebComponent +from bandcamp.web.locators import HomePageLocator, TrackLocator + + +class TrackElement(WebComponent, TrackLocator): + """Model a playable track in Bandcamp's Discover section.""" + + def play(self) -> None: + """Play the track.""" + if not self.is_playing(): + self._get_play_button().click() + self._wait.until(lambda _: self.is_playing()) + + def is_playing(self) -> bool: + return "playing" in self._get_play_button().get_attribute("class") + + def _get_track_info(self) -> Track: + """Create a representation of the track's relevant information.""" + full_url = self._parent.find_element(*self.ALBUM).get_attribute("href") + # Cut off the referrer query parameter + clean_url = full_url.split("?")[0] if full_url else "" + return Track( + album=self._parent.find_element(*self.ALBUM).text, + artist=self._parent.find_element(*self.ARTIST).text, + genre=self._parent.find_element(*self.GENRE).text, + url=clean_url, + ) + + def _get_play_button(self): + return self._parent.find_element(*self.PLAY_BUTTON) + + +class DiscoverTrackList(WebComponent, HomePageLocator): + """Model the track list in Bandcamp's Discover section.""" + + def __init__(self, parent: WebElement, driver: WebDriver = None) -> None: + super().__init__(parent, driver) + self.available_tracks = self._get_available_tracks() + + def load_more(self) -> None: + """Load additional tracks in the Discover section.""" + self._get_next_page_button().click() + self.available_tracks = self._get_available_tracks() + + def _get_available_tracks(self) -> list: + """Find all currently available tracks in the Discover section.""" + all_tracks = self._driver.find_elements(*self.TRACK) + return [track for track in all_tracks if track.is_displayed()] + + def _get_next_page_button(self): + """Locate and return the 'Next' button that loads more results.""" + return self._driver.find_elements(*self.PAGINATION_BUTTON)[-1] diff --git a/python-selenium/src/bandcamp/web/locators.py b/python-selenium/src/bandcamp/web/locators.py new file mode 100644 index 0000000000..524e6ba9f9 --- /dev/null +++ b/python-selenium/src/bandcamp/web/locators.py @@ -0,0 +1,14 @@ +from selenium.webdriver.common.by import By + + +class HomePageLocator: + DISCOVER_RESULTS = (By.CLASS_NAME, "discover-results") + TRACK = (By.CLASS_NAME, "discover-item") + PAGINATION_BUTTON = (By.CLASS_NAME, "item-page") + + +class TrackLocator: + PLAY_BUTTON = (By.CSS_SELECTOR, "a") + ALBUM = (By.CLASS_NAME, "item-title") + GENRE = (By.CLASS_NAME, "item-genre") + ARTIST = (By.CLASS_NAME, "item-artist") diff --git a/python-selenium/src/bandcamp/web/page.py b/python-selenium/src/bandcamp/web/page.py new file mode 100644 index 0000000000..ead8b30a04 --- /dev/null +++ b/python-selenium/src/bandcamp/web/page.py @@ -0,0 +1,15 @@ +from selenium.webdriver.remote.webdriver import WebDriver + +from bandcamp.web.base import WebPage +from bandcamp.web.element import DiscoverTrackList +from bandcamp.web.locators import HomePageLocator + + +class HomePage(WebPage, HomePageLocator): + """Model the relevant parts of the Bandcamp home page.""" + + def __init__(self, driver: WebDriver) -> None: + super().__init__(driver) + self.discover_tracklist = DiscoverTrackList( + self._driver.find_element(*self.DISCOVER_RESULTS), self._driver + )