parent
7f6bb4a6f5
commit
d43d67c8b7
@ -0,0 +1,57 @@
|
|||||||
|
import yaml
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
def createDatabase():
|
||||||
|
connexion = sqlite3.connect('database/spells.db')
|
||||||
|
cursor = connexion.cursor()
|
||||||
|
|
||||||
|
cursor.execute('''DROP TABLE IF EXISTS spell''')
|
||||||
|
cursor.execute('''CREATE TABLE spell(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT,
|
||||||
|
level TEXT,
|
||||||
|
school TEXT,
|
||||||
|
casting_time TEXT,
|
||||||
|
components TEXT,
|
||||||
|
range TEXT,
|
||||||
|
target TEXT,
|
||||||
|
area TEXT,
|
||||||
|
effect TEXT,
|
||||||
|
duration TEXT,
|
||||||
|
saving_throw TEXT,
|
||||||
|
spell_resistance TEXT,
|
||||||
|
description TEXT)''')
|
||||||
|
|
||||||
|
connexion.commit()
|
||||||
|
connexion.close()
|
||||||
|
|
||||||
|
def insertSpells():
|
||||||
|
with open('outputs/spells.yaml', 'r') as file:
|
||||||
|
spells = yaml.safe_load(file)
|
||||||
|
|
||||||
|
connexion = sqlite3.connect('assets/spells.db')
|
||||||
|
cursor = connexion.cursor()
|
||||||
|
|
||||||
|
for name, spell in spells.items():
|
||||||
|
level = spell.get('level')
|
||||||
|
school = spell.get('school')
|
||||||
|
casting_time = spell.get('casting_time')
|
||||||
|
range_ = spell.get('range')
|
||||||
|
target = spell.get('target')
|
||||||
|
duration = spell.get('duration')
|
||||||
|
saving_throw = spell.get('saving_throw')
|
||||||
|
spell_resistance = spell.get('spell_resistance')
|
||||||
|
description = '\n'.join(spell.get('description', []))
|
||||||
|
components = spell.get('components')
|
||||||
|
area = spell.get('area')
|
||||||
|
effect = spell.get('effect')
|
||||||
|
|
||||||
|
cursor.execute('''INSERT INTO spell(name, level, school, casting_time, components, range, target, area, effect, duration, saving_throw, spell_resistance, description)
|
||||||
|
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
|
||||||
|
(name, level, school, casting_time, components, range_, target, area, effect, duration, saving_throw, spell_resistance, description))
|
||||||
|
|
||||||
|
connexion.commit()
|
||||||
|
connexion.close()
|
||||||
|
|
||||||
|
createDatabase()
|
||||||
|
insertSpells()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,119 @@
|
|||||||
|
# This file was used for debbuging purposes.
|
||||||
|
# It helped testing attributes separetly in
|
||||||
|
# order to better parse them or to optimize/
|
||||||
|
# factor some code.
|
||||||
|
|
||||||
|
import urllib.request
|
||||||
|
import requests
|
||||||
|
import bs4
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
URL = "https://www.d20pfsrd.com/magic/all-spells/d/death-from-below/"
|
||||||
|
|
||||||
|
responseDetails = requests.get(URL)
|
||||||
|
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
||||||
|
spellContent = spellSoup.find(id='article-content')
|
||||||
|
|
||||||
|
###################
|
||||||
|
### DESCRIPTION
|
||||||
|
###################
|
||||||
|
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||||
|
print("Desc separator: ", spell_description)
|
||||||
|
|
||||||
|
spell_paragraphs = []
|
||||||
|
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||||
|
if not spell_description:
|
||||||
|
spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
||||||
|
spell_description = spell_description.find_next('p')
|
||||||
|
|
||||||
|
while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
|
||||||
|
if spell_description.has_attr('class'):
|
||||||
|
spell_paragraphs.append(spell_description.text)
|
||||||
|
else:
|
||||||
|
spell_paragraphs.append(spell_description.text)
|
||||||
|
spell_description = spell_description.find_next('p')
|
||||||
|
if spell_description and spell_description.parent.name == 'div':
|
||||||
|
break
|
||||||
|
|
||||||
|
print("Spell description:\n", '\n\n'.join(spell_paragraphs))
|
||||||
|
|
||||||
|
def getStringSiblings(array, content, stop):
|
||||||
|
if content:
|
||||||
|
for sibling in content.next_siblings:
|
||||||
|
print(sibling)
|
||||||
|
if sibling.name == stop:
|
||||||
|
break
|
||||||
|
if sibling.name == 'a':
|
||||||
|
array.append(sibling.text)
|
||||||
|
elif isinstance((sibling), bs4.element.NavigableString):
|
||||||
|
component_text = sibling.string.strip()
|
||||||
|
if component_text:
|
||||||
|
array.append(component_text.rstrip(';'))
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return ' '.join(array)
|
||||||
|
|
||||||
|
###################
|
||||||
|
### TARGET
|
||||||
|
###################
|
||||||
|
# target = []
|
||||||
|
# spell_target = spellContent.find('b',string="Target")
|
||||||
|
# spell_target = getStringSiblings(target, spell_target, 'b')
|
||||||
|
# print("Target: ", spell_target)
|
||||||
|
|
||||||
|
###################
|
||||||
|
### LEVELS
|
||||||
|
###################
|
||||||
|
# p = spellContent.find('b',string="School")
|
||||||
|
# print(p)
|
||||||
|
# p = p.find_previous('p')
|
||||||
|
# print(p)
|
||||||
|
# text = p.text
|
||||||
|
# print(text)
|
||||||
|
# parts = text.split("Level")
|
||||||
|
# school = parts[0].replace("School","").strip().strip(";")
|
||||||
|
# level = parts[1].replace("Level","").strip()
|
||||||
|
|
||||||
|
# print("---")
|
||||||
|
# print("School:", school)
|
||||||
|
# print("Level:", level)
|
||||||
|
|
||||||
|
# def getDescription(array, content):
|
||||||
|
# if content:
|
||||||
|
# content = content.find_next()
|
||||||
|
# array.append(content.text)
|
||||||
|
# if
|
||||||
|
|
||||||
|
# def getStringSiblings(array, content, stop):
|
||||||
|
# if content:
|
||||||
|
# for sibling in content.next_siblings:
|
||||||
|
# if sibling.name == stop:
|
||||||
|
# break
|
||||||
|
# if sibling.name == 'a':
|
||||||
|
# array.append(sibling.text)
|
||||||
|
# elif isinstance((sibling), bs4.element.NavigableString):
|
||||||
|
# component_text = sibling.string.strip()
|
||||||
|
# if component_text:
|
||||||
|
# array.append(component_text.rstrip(';'))
|
||||||
|
# else:
|
||||||
|
# return None
|
||||||
|
# return ' '.join(array)
|
||||||
|
|
||||||
|
###################
|
||||||
|
### DURATION
|
||||||
|
###################
|
||||||
|
|
||||||
|
# spell_duration = spellContent.find('b',string='Duration')
|
||||||
|
# if spell_duration:
|
||||||
|
# if spell_duration.next_sibling is not None:
|
||||||
|
# spell_duration = spell_duration.next_sibling.text.strip()
|
||||||
|
# else :
|
||||||
|
# print("fix here ---")
|
||||||
|
# print("first: ", spell_duration)
|
||||||
|
# spell_duration = spell_duration.find_next('br')
|
||||||
|
# print(spell_duration)
|
||||||
|
# else :
|
||||||
|
# spell_duration = None
|
||||||
|
# print("Duration: ",spell_duration)
|
||||||
|
# print("Duration: ",spell_duration)
|
@ -0,0 +1,187 @@
|
|||||||
|
import urllib.request
|
||||||
|
import requests
|
||||||
|
import bs4
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from lxml import html
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
### -------------------------------------
|
||||||
|
# GET ALL THE SPELLS FROM THE PAGE
|
||||||
|
### -------------------------------------
|
||||||
|
|
||||||
|
## GET <li> ELEMENTS NAME + URL TO DETAIL PAGE
|
||||||
|
# url with all spells
|
||||||
|
URL = "https://www.d20pfsrd.com/magic/all-spells/"
|
||||||
|
|
||||||
|
# get the page content using GET to url
|
||||||
|
response = requests.get(URL)
|
||||||
|
|
||||||
|
# parse html using
|
||||||
|
soup = BeautifulSoup(response.content, 'lxml')
|
||||||
|
list = soup.find(id='article-content').find_next('div',class_="flexbox")
|
||||||
|
|
||||||
|
# this gets all the <li> elements from the article-content div, which contain all of the
|
||||||
|
# spells (name and link to detail page)
|
||||||
|
lis = list.find_all('li')
|
||||||
|
|
||||||
|
###################
|
||||||
|
### METHODS
|
||||||
|
###################
|
||||||
|
def getStringSiblings(array, content, stop):
|
||||||
|
if content:
|
||||||
|
for sibling in content.next_siblings:
|
||||||
|
if sibling.name == stop:
|
||||||
|
break
|
||||||
|
if sibling.name == 'a':
|
||||||
|
array.append(sibling.text)
|
||||||
|
elif isinstance((sibling), bs4.element.NavigableString):
|
||||||
|
component_text = sibling.string.strip()
|
||||||
|
if component_text:
|
||||||
|
array.append(component_text.rstrip(';'))
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return ' '.join(array)
|
||||||
|
|
||||||
|
|
||||||
|
cpt = 0
|
||||||
|
spellz = {}
|
||||||
|
|
||||||
|
for li in lis:
|
||||||
|
url = li.a['href']
|
||||||
|
|
||||||
|
## get html of details page
|
||||||
|
responseDetails = requests.get(url)
|
||||||
|
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
||||||
|
|
||||||
|
# get article content which contains all info about spells
|
||||||
|
spellContent = spellSoup.find(id='article-content')
|
||||||
|
|
||||||
|
###################
|
||||||
|
### ATTRIBUTES
|
||||||
|
###################
|
||||||
|
|
||||||
|
# get name
|
||||||
|
if spellContent :
|
||||||
|
spell_name = spellContent.find('h1').text
|
||||||
|
else :
|
||||||
|
spell_name = None
|
||||||
|
continue
|
||||||
|
print("name: ",spell_name)
|
||||||
|
|
||||||
|
# get school and level
|
||||||
|
school_levels = spellContent.find('b',string="School").find_previous('p')
|
||||||
|
text = school_levels.text
|
||||||
|
parts = text.split("Level")
|
||||||
|
spell_school = parts[0].replace("School","").strip().strip(";")
|
||||||
|
spell_level = parts[1].replace("Level","").strip()
|
||||||
|
|
||||||
|
print("School: ",spell_school)
|
||||||
|
print("Level:",spell_level)
|
||||||
|
|
||||||
|
# get casting time
|
||||||
|
castTime = []
|
||||||
|
spell_castTime = spellContent.find('b',string="Casting Time")
|
||||||
|
spell_castTime = getStringSiblings(castTime, spell_castTime, 'b')
|
||||||
|
print("Cast time: ", spell_castTime)
|
||||||
|
|
||||||
|
# get components
|
||||||
|
components = []
|
||||||
|
spell_components = spellContent.find('b', string='Components')#.next_sibling.strip()
|
||||||
|
spell_components = getStringSiblings(components, spell_components, 'p')
|
||||||
|
print ("Components: ", spell_components)
|
||||||
|
|
||||||
|
# get range
|
||||||
|
rangesp = []
|
||||||
|
spell_range = spellContent.find('b',string="Range")
|
||||||
|
spell_range = getStringSiblings(rangesp, spell_range, 'b')
|
||||||
|
print("Range: ", spell_range)
|
||||||
|
|
||||||
|
# get target
|
||||||
|
target = []
|
||||||
|
spell_target = spellContent.find('b',string="Target")
|
||||||
|
spell_target = getStringSiblings(target, spell_target, 'b')
|
||||||
|
print("Target: ", spell_target)
|
||||||
|
|
||||||
|
#get duration
|
||||||
|
duration = []
|
||||||
|
spell_duration = spellContent.find('b',string="Duration")
|
||||||
|
spell_duration = getStringSiblings(duration, spell_duration, 'b')
|
||||||
|
print("Duration: ",spell_duration)
|
||||||
|
|
||||||
|
|
||||||
|
# get saving throw
|
||||||
|
svthrow = []
|
||||||
|
spell_saving_throw = spellContent.find('b',string='Saving Throw')
|
||||||
|
spell_saving_throw = getStringSiblings(svthrow, spell_saving_throw, 'b')
|
||||||
|
print("Saving throw: ", spell_saving_throw)
|
||||||
|
|
||||||
|
# get resistance
|
||||||
|
resistance = []
|
||||||
|
spell_resistance = spellContent.find('b',string='Spell Resistance')
|
||||||
|
spell_resistance = getStringSiblings(resistance, spell_resistance, 'b')
|
||||||
|
print("Spell Resistance: ", spell_resistance)
|
||||||
|
|
||||||
|
# get area
|
||||||
|
area = []
|
||||||
|
spell_area = spellContent.find('b',string='Area')
|
||||||
|
spell_area = getStringSiblings(area, spell_area, 'b')
|
||||||
|
print("Area:", spell_area )
|
||||||
|
|
||||||
|
# get effect
|
||||||
|
effect = []
|
||||||
|
spell_effect = spellContent.find('b',string='Effect')
|
||||||
|
spell_effect = getStringSiblings(effect, spell_effect, 'b')
|
||||||
|
print('Effect: ',spell_effect)
|
||||||
|
|
||||||
|
# get description
|
||||||
|
spell_paragraphs = []
|
||||||
|
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||||
|
if not spell_description:
|
||||||
|
spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
||||||
|
if not spell_description:
|
||||||
|
spell_description = None
|
||||||
|
continue
|
||||||
|
spell_description = spell_description.find_next('p')
|
||||||
|
|
||||||
|
while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
|
||||||
|
if spell_description.has_attr('class'):
|
||||||
|
spell_paragraphs.append(spell_description.text)
|
||||||
|
else:
|
||||||
|
spell_paragraphs.append(spell_description.text)
|
||||||
|
spell_description = spell_description.find_next('p')
|
||||||
|
if spell_description and spell_description.parent.name == 'div':
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
print("Spell description:\n", '\n\n'.join(spell_paragraphs))
|
||||||
|
|
||||||
|
# print(" ----- ")
|
||||||
|
# print(" ")
|
||||||
|
# cpt += 1
|
||||||
|
# print("no: ",cpt)
|
||||||
|
spellz[spell_name] = {
|
||||||
|
'school': spell_school,
|
||||||
|
'level': spell_level,
|
||||||
|
'casting_time': spell_castTime,
|
||||||
|
'components': spell_components,
|
||||||
|
'range': spell_range,
|
||||||
|
'target': spell_target,
|
||||||
|
'duration': spell_duration,
|
||||||
|
'saving_throw': spell_saving_throw,
|
||||||
|
'spell_resistance': spell_resistance,
|
||||||
|
'area': spell_area,
|
||||||
|
'effect': spell_effect,
|
||||||
|
'description': spell_paragraphs
|
||||||
|
}
|
||||||
|
|
||||||
|
with open('outputs/spells.yaml', 'w') as f:
|
||||||
|
yaml.dump(spellz, f)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue