removed extra attributes from spell

main
Nicolas FRANCO 2 years ago
parent 2b6f7a8783
commit 197d8fcc64

@ -9,50 +9,62 @@ import bs4
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from lxml import html from lxml import html
URL = "https://www.d20pfsrd.com/magic/all-spells/d/death-from-below/" URL = "https://www.d20pfsrd.com/magic/all-spells/a/accelerate-poison/"
responseDetails = requests.get(URL) responseDetails = requests.get(URL)
spellSoup = BeautifulSoup(responseDetails.content, 'lxml') spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
spellContent = spellSoup.find(id='article-content') spellContent = spellSoup.find(id='article-content')
###################
### LEVELS
###################
school_levels = spellContent.find('b',string="School").find_previous('p')
text = school_levels.text
parts = text.split("Level")
spell_school = parts[0].replace("School","").strip().strip(";")
spell_level = parts[1].replace("Level","").strip().split(";")[0]
print("level: ", spell_level)
print("school: ", spell_school)
################### ###################
### DESCRIPTION ### DESCRIPTION
################### ###################
spell_description = spellContent.find('p',string='DESCRIPTION') # spell_description = spellContent.find('p',string='DESCRIPTION')
print("Desc separator: ", spell_description) # print("Desc separator: ", spell_description)
spell_paragraphs = [] # spell_paragraphs = []
spell_description = spellContent.find('p',string='DESCRIPTION') # spell_description = spellContent.find('p',string='DESCRIPTION')
if not spell_description: # if not spell_description:
spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION') # spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
spell_description = spell_description.find_next('p') # spell_description = spell_description.find_next('p')
while spell_description and not spell_description.find_previous('div', {'class': 'section15'}): # while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
if spell_description.has_attr('class'): # if spell_description.has_attr('class'):
spell_paragraphs.append(spell_description.text) # spell_paragraphs.append(spell_description.text)
else: # else:
spell_paragraphs.append(spell_description.text) # spell_paragraphs.append(spell_description.text)
spell_description = spell_description.find_next('p') # spell_description = spell_description.find_next('p')
if spell_description and spell_description.parent.name == 'div': # if spell_description and spell_description.parent.name == 'div':
break # break
print("Spell description:\n", '\n\n'.join(spell_paragraphs)) # print("Spell description:\n", '\n\n'.join(spell_paragraphs))
def getStringSiblings(array, content, stop): # def getStringSiblings(array, content, stop):
if content: # if content:
for sibling in content.next_siblings: # for sibling in content.next_siblings:
print(sibling) # print(sibling)
if sibling.name == stop: # if sibling.name == stop:
break # break
if sibling.name == 'a': # if sibling.name == 'a':
array.append(sibling.text) # array.append(sibling.text)
elif isinstance((sibling), bs4.element.NavigableString): # elif isinstance((sibling), bs4.element.NavigableString):
component_text = sibling.string.strip() # component_text = sibling.string.strip()
if component_text: # if component_text:
array.append(component_text.rstrip(';')) # array.append(component_text.rstrip(';'))
else: # else:
return None # return None
return ' '.join(array) # return ' '.join(array)
################### ###################
### TARGET ### TARGET
@ -62,23 +74,6 @@ def getStringSiblings(array, content, stop):
# spell_target = getStringSiblings(target, spell_target, 'b') # spell_target = getStringSiblings(target, spell_target, 'b')
# print("Target: ", spell_target) # print("Target: ", spell_target)
###################
### LEVELS
###################
# p = spellContent.find('b',string="School")
# print(p)
# p = p.find_previous('p')
# print(p)
# text = p.text
# print(text)
# parts = text.split("Level")
# school = parts[0].replace("School","").strip().strip(";")
# level = parts[1].replace("Level","").strip()
# print("---")
# print("School:", school)
# print("Level:", level)
# def getDescription(array, content): # def getDescription(array, content):
# if content: # if content:
# content = content.find_next() # content = content.find_next()

Loading…
Cancel
Save