new description working + updated outputs

description
Nicolas FRANCO 2 years ago
parent 3bde5a9363
commit 936d185bcd

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -11,17 +11,21 @@ from lxml import html
# URL = "https://www.d20pfsrd.com/magic/all-spells/m/magic-circle-against-evil/"
# URL = "https://www.d20pfsrd.com/magic/all-spells/p/prophetic-lore/"
URL = "https://www.d20pfsrd.com/magic/all-spells/t/time-stop/"
URL = "https://www.d20pfsrd.com/magic/all-spells/d/death-from-below/"
responseDetails = requests.get(URL)
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
spellContent = spellSoup.find(id='article-content')
description = spellContent.find('p', string='DESCRIPTION')
if description is None:
description = spellContent.find_next_sibling()
next_elem = description.find_next_sibling()
html = ''
next_elem = description.find_next('p')
html= ''
while next_elem and not (next_elem.name == 'div' and 'section15' in next_elem.get('class', [])):
print("ne;", next_elem )
print()
html += str(next_elem)
next_elem = next_elem.find_next_sibling()

@ -44,8 +44,7 @@ def getStringSiblings(array, content, stop):
return ' '.join(array)
spellz = {}
# pbar = tqdm(total=2650, desc="[Processing]", unit=" spell")
cpt = 0
pbar = tqdm(total=2650, desc="[Processing]", unit=" spell")
for li in lis: # now we loop over all spells, get the page link and scrap all attributes
url = li.a['href']
@ -137,8 +136,11 @@ for li in lis: # now we loop over all spells, get the page link and scrap all at
# get description
description = spellContent.find('p', string='DESCRIPTION')
if(description):
if description is None:
description = spellContent.find_next_sibling()
else:
next_elem = description.find_next_sibling()
html = ''
while next_elem and not (next_elem.name == 'div' and 'section15' in next_elem.get('class', [])):
html += str(next_elem)
@ -179,7 +181,7 @@ for li in lis: # now we loop over all spells, get the page link and scrap all at
with open('outputs/spells.yaml', 'w') as f:
yaml.dump(spellz, f)
# pbar.close()
pbar.close()

Loading…
Cancel
Save