Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VaspDoc.get_incar_tags: Use Mediawiki API #4141

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions src/pymatgen/io/vasp/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import json
import re

import requests
Expand Down Expand Up @@ -68,16 +69,32 @@ def get_help(cls, tag: str, fmt: str = "text") -> str:
@classmethod
def get_incar_tags(cls) -> list[str]:
"""Get a list of all INCAR tags from the VASP wiki."""
tags = []
for url in (
"https://www.vasp.at/wiki/index.php/Category:INCAR_tag",
"https://www.vasp.at/wiki/index.php?title=Category:INCAR_tag&pagefrom=LREAL#mw-pages",
"https://www.vasp.at/wiki/index.php?title=Category:INCAR_tag&pagefrom=Profiling#mw-pages",
):
response = requests.get(url, timeout=60)
soup = BeautifulSoup(response.text, features="html.parser")
for div in soup.findAll("div", {"class": "mw-category-group"}):
children = div.findChildren("li")
for child in children:
tags.append(child.text.strip())
# Use Mediawiki API as documented in
# https://www.vasp.at/wiki/api.php?action=help&modules=query
url = (
"https://www.vasp.at/wiki/api.php?"
"action=query&list=categorymembers"
"&cmtitle=Category:INCAR_tag"
"&cmlimit=500&format=json"
)
response = requests.get(url, timeout=60)
response_dict = json.loads(response.text)

def extract_titles(data):
"""Extract keywords from from Wikimedia response data.
See https://www.vasp.at/wiki/api.php?action=help&modules=query%2Bcategorymembers
Returns: List of keywords as strings.
"""
return [category_data["title"] for category_data in data["query"]["categorymembers"]]

tags = extract_titles(response_dict)

# If there are more than 500 items in the response, we will
# get 'continue' field in the response
# See https://www.mediawiki.org/wiki/API:Continue
while "continue" in response_dict:
response = requests.get(url + f"&cmcontinue={response_dict['continue']['cmcontinue']}", timeout=60)
response_dict = json.loads(response.text)
tags = tags + extract_titles(response_dict)

return tags