|
|
@ -9,62 +9,76 @@ import bs4
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from lxml import html
|
|
|
|
from lxml import html
|
|
|
|
|
|
|
|
|
|
|
|
URL = "https://www.d20pfsrd.com/magic/all-spells/a/ant-haul/"
|
|
|
|
# URL = "https://www.d20pfsrd.com/magic/all-spells/m/magic-circle-against-evil/"
|
|
|
|
|
|
|
|
# URL = "https://www.d20pfsrd.com/magic/all-spells/p/prophetic-lore/"
|
|
|
|
|
|
|
|
URL = "https://www.d20pfsrd.com/magic/all-spells/t/time-stop/"
|
|
|
|
|
|
|
|
|
|
|
|
responseDetails = requests.get(URL)
|
|
|
|
responseDetails = requests.get(URL)
|
|
|
|
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
|
|
|
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
|
|
|
spellContent = spellSoup.find(id='article-content')
|
|
|
|
spellContent = spellSoup.find(id='article-content')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
description = spellContent.find('p', string='DESCRIPTION')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
next_elem = description.find_next_sibling()
|
|
|
|
|
|
|
|
html = ''
|
|
|
|
|
|
|
|
while next_elem and not (next_elem.name == 'div' and 'section15' in next_elem.get('class', [])):
|
|
|
|
|
|
|
|
html += str(next_elem)
|
|
|
|
|
|
|
|
next_elem = next_elem.find_next_sibling()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Convert HTML string to regular string
|
|
|
|
|
|
|
|
htmlString = str(html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "<h4" in htmlString:
|
|
|
|
|
|
|
|
# Trim HTML string
|
|
|
|
|
|
|
|
htmlString = htmlString.split("<h4")[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "</p><div>" in htmlString:
|
|
|
|
|
|
|
|
last_p_index = htmlString.rfind("</p>")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if last_p_index != -1:
|
|
|
|
|
|
|
|
htmlString = htmlString[:last_p_index + 4]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Convert back to BeautifulSoup object
|
|
|
|
|
|
|
|
trimmed_html = BeautifulSoup(htmlString, 'html.parser')
|
|
|
|
|
|
|
|
str(trimmed_html)
|
|
|
|
|
|
|
|
trimmed_html.prettify()
|
|
|
|
|
|
|
|
print(trimmed_html)
|
|
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
###################
|
|
|
|
### LEVELS
|
|
|
|
### LEVELS
|
|
|
|
###################
|
|
|
|
###################
|
|
|
|
school_levels = spellContent.find('b',string="School").find_previous('p')
|
|
|
|
# school_levels = spellContent.find('b',string="School").find_previous('p')
|
|
|
|
text = school_levels.text
|
|
|
|
# text = school_levels.text
|
|
|
|
parts = text.split("Level")
|
|
|
|
# parts = text.split("Level")
|
|
|
|
spell_school = parts[0].replace("School","").strip().strip(";")
|
|
|
|
# spell_school = parts[0].replace("School","").strip().strip(";")
|
|
|
|
spell_level = parts[1].replace("Level","").strip().split(";")[0]
|
|
|
|
# spell_level = parts[1].replace("Level","").strip().split(";")[0]
|
|
|
|
|
|
|
|
|
|
|
|
print("level: ", spell_level)
|
|
|
|
# print("level: ", spell_level)
|
|
|
|
print("school: ", spell_school)
|
|
|
|
# print("school: ", spell_school)
|
|
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
###################
|
|
|
|
### DESCRIPTION
|
|
|
|
### DESCRIPTION
|
|
|
|
###################
|
|
|
|
###################
|
|
|
|
# spell_description = spellContent.find('p',string='DESCRIPTION')
|
|
|
|
|
|
|
|
# print("Desc separator: ", spell_description)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# spell_paragraphs = []
|
|
|
|
# spell_paragraphs = []
|
|
|
|
# spell_description = spellContent.find('p',string='DESCRIPTION')
|
|
|
|
# spell_description = spellContent.find('p',string='DESCRIPTION')
|
|
|
|
# if not spell_description:
|
|
|
|
# if not spell_description:
|
|
|
|
# spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
|
|
|
# spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
|
|
|
# spell_description = spell_description.find_next('p')
|
|
|
|
# if not spell_description:
|
|
|
|
|
|
|
|
# spell_description = None
|
|
|
|
|
|
|
|
# exit # change to continue
|
|
|
|
|
|
|
|
# spell_description = spell_description.find_next()
|
|
|
|
|
|
|
|
# # check if spell description is a table
|
|
|
|
|
|
|
|
# if spell_description.name == 'table':
|
|
|
|
|
|
|
|
# spell_paragraphs.append(spell_description)
|
|
|
|
|
|
|
|
# spell_description = spell_description.find_next()
|
|
|
|
|
|
|
|
|
|
|
|
# while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
|
|
|
|
# print("paragraphs: ", spell_paragraphs)
|
|
|
|
# if spell_description.has_attr('class'):
|
|
|
|
# print("description: ", spell_description)
|
|
|
|
# spell_paragraphs.append(spell_description.text)
|
|
|
|
# Find the parent tag of the <p> tag with text "DESCRIPTION"
|
|
|
|
# else:
|
|
|
|
|
|
|
|
# spell_paragraphs.append(spell_description.text)
|
|
|
|
|
|
|
|
# spell_description = spell_description.find_next('p')
|
|
|
|
|
|
|
|
# if spell_description and spell_description.parent.name == 'div':
|
|
|
|
|
|
|
|
# break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# print("Spell description:\n", '\n\n'.join(spell_paragraphs))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def getStringSiblings(array, content, stop):
|
|
|
|
# prettify
|
|
|
|
# if content:
|
|
|
|
|
|
|
|
# for sibling in content.next_siblings:
|
|
|
|
|
|
|
|
# print(sibling)
|
|
|
|
|
|
|
|
# if sibling.name == stop:
|
|
|
|
|
|
|
|
# break
|
|
|
|
|
|
|
|
# if sibling.name == 'a':
|
|
|
|
|
|
|
|
# array.append(sibling.text)
|
|
|
|
|
|
|
|
# elif isinstance((sibling), bs4.element.NavigableString):
|
|
|
|
|
|
|
|
# component_text = sibling.string.strip()
|
|
|
|
|
|
|
|
# if component_text:
|
|
|
|
|
|
|
|
# array.append(component_text.rstrip(';'))
|
|
|
|
|
|
|
|
# else:
|
|
|
|
|
|
|
|
# return None
|
|
|
|
|
|
|
|
# return ' '.join(array)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
###################
|
|
|
|
###################
|
|
|
|
### TARGET
|
|
|
|
### TARGET
|
|
|
|