parsing level int a class-level map

main
Nicolas FRANCO 2 years ago
parent 197d8fcc64
commit 880135a884

File diff suppressed because it is too large Load Diff

@ -0,0 +1,79 @@
import urllib.request
import requests
import bs4
from bs4 import BeautifulSoup
from lxml import html
import yaml
from tqdm import tqdm
URL = "https://www.d20pfsrd.com/magic/all-spells/"
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'lxml')
list = soup.find(id='article-content').find_next('div',class_="flexbox")
lis = list.find_all('li')
###################
### METHODS
###################
def getStringSiblings(array, content, stop):
if content:
for sibling in content.next_siblings:
if sibling.name == stop:
break
if sibling.name == 'a':
array.append(sibling.text)
elif isinstance((sibling), bs4.element.NavigableString):
component_text = sibling.string.strip()
if component_text:
array.append(component_text.rstrip(';'))
else:
return None
return ' '.join(array)
spellz = {}
pbar = tqdm(total=2650, desc="[Processing]", unit=" spell")
for li in lis:
url = li.a['href']
## get html of details page
responseDetails = requests.get(url)
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
# get article content which contains all info about spells
spellContent = spellSoup.find(id='article-content')
pbar.update(1)
# get name
if spellContent :
spell_name = spellContent.find('h1').text
else :
spell_name = None
continue
# print("name: ",spell_name)
# get school and level
school_levels = spellContent.find('b',string="School").find_previous('p')
text = school_levels.text
parts = text.split("Level")
spell_school = parts[0].replace("School","").strip().strip(";")
spell_level = parts[1].replace("Level","").strip().split(";")[0]
spellz[spell_name] = {
'school': spell_school,
'level': spell_level,
}
with open('outputs/spells.yaml', 'w') as f:
yaml.dump(outputs/levelz, f)
pbar.close()

@ -9,12 +9,28 @@ import bs4
from bs4 import BeautifulSoup
from lxml import html
URL = "https://www.d20pfsrd.com/magic/all-spells/a/accelerate-poison/"
URL = "https://www.d20pfsrd.com/magic/all-spells/a/ant-haul/"
responseDetails = requests.get(URL)
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
spellContent = spellSoup.find(id='article-content')
def parseLevelAndGetClass(spell_level):
class_dict = {}
for class_level in spell_level.split(","):
class_level = class_level.strip()
if " " in class_level:
class_name, level = class_level.rsplit(maxsplit=1)
if "/" in class_name:
class_names = class_name.split("/")
for name in class_names:
class_dict[name.strip()] = level.strip()
else:
class_dict[class_name.strip()] = level.strip()
else:
class_dict[class_level.strip()] = "1"
return class_dict
###################
### LEVELS
###################
@ -27,6 +43,9 @@ spell_level = parts[1].replace("Level","").strip().split(";")[0]
print("level: ", spell_level)
print("school: ", spell_school)
class_levels = parseLevelAndGetClass(spell_level)
print(class_levels)
###################
### DESCRIPTION
###################

@ -28,6 +28,22 @@ lis = list.find_all('li')
###################
### METHODS
###################
def parseLevelAndGetClass(spell_level):
class_dict = {}
for class_level in spell_level.split(","):
class_level = class_level.strip()
if " " in class_level:
class_name, level = class_level.rsplit(maxsplit=1)
if "/" in class_name:
class_names = class_name.split("/")
for name in class_names:
class_dict[name.strip()] = level.strip()
else:
class_dict[class_name.strip()] = level.strip()
else:
class_dict[class_level.strip()] = "1"
return class_dict
def getStringSiblings(array, content, stop):
if content:
for sibling in content.next_siblings:
@ -76,6 +92,8 @@ for li in lis:
spell_school = parts[0].replace("School","").strip().strip(";")
spell_level = parts[1].replace("Level","").strip()
spell_class_and_level = parseLevelAndGetClass(spell_level)
# print("School: ",spell_school)
# print("Level:",spell_level)
@ -162,7 +180,7 @@ for li in lis:
# print("no: ",cpt)
spellz[spell_name] = {
'school': spell_school,
'level': spell_level,
'level': spell_class_and_level,
'casting_time': spell_castTime,
'components': spell_components,
'range': spell_range,
@ -174,6 +192,7 @@ for li in lis:
'effect': spell_effect,
'description': spell_paragraphs
}
break
with open('outputs/spells.yaml', 'w') as f:
yaml.dump(spellz, f)

Loading…
Cancel
Save