Skip to content

Commit

Permalink
Fixed obtaining character name when character is a unit
Browse files Browse the repository at this point in the history
  • Loading branch information
danielpontello committed Jan 6, 2023
1 parent 3736369 commit 74fe63a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ a simple scraper to download all card images from https://mikucolle.gamerch.com/

- Install requirements from requirements.txt: ```pip install -r requirements.txt```
- Run main.py: ```python main.py```
- The images should be saved in the ```out``` folder, grouped by character:

![scraper screenshot](docs/folders.png)
Binary file added docs/folders.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 17 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,29 @@ def download_images(url_list):

if html is not None:
bs = BeautifulSoup(html, "html.parser")
image = bs.find("img", {"class": "ui_wikidb_main_img"})
character = bs.find("a", {"class": "ui_page_match"})
name = bs.find("h2", {"id": "js_wikidb_main_name"})

# image link
image = bs.find("img", {"class": "ui_wikidb_main_img"})
image_url = image['src']
character_name = character['title']
filename = f"{name.text}.jpg"
character_dir = os.path.join(OUTPUT_FOLDER, character_name)

# character
character = bs.select_one('.ui_wikidb_top_pc > p:nth-child(2) > span:nth-child(1)')
next_elem = character.findNext()
if next_elem.name == "a":
character_name = next_elem['title'].strip()
else:
character_name = next_elem.previous_sibling.text.strip()

# card name
card_name = bs.find("h2", {"id": "js_wikidb_main_name"})

# output path
filename = f"{card_name.text.strip()}.jpg"
character_dir = os.path.join(OUTPUT_FOLDER, character_name)
if not os.path.exists(character_dir):
os.makedirs(character_dir, exist_ok=True)

output_file = os.path.join(character_dir, filename)

urllib.request.urlretrieve(image_url, filename=output_file)


Expand Down

0 comments on commit 74fe63a

Please sign in to comment.