parent
7f6bb4a6f5
commit
d43d67c8b7
@ -0,0 +1,57 @@
|
||||
import yaml
|
||||
import sqlite3
|
||||
|
||||
def createDatabase():
|
||||
connexion = sqlite3.connect('database/spells.db')
|
||||
cursor = connexion.cursor()
|
||||
|
||||
cursor.execute('''DROP TABLE IF EXISTS spell''')
|
||||
cursor.execute('''CREATE TABLE spell(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT,
|
||||
level TEXT,
|
||||
school TEXT,
|
||||
casting_time TEXT,
|
||||
components TEXT,
|
||||
range TEXT,
|
||||
target TEXT,
|
||||
area TEXT,
|
||||
effect TEXT,
|
||||
duration TEXT,
|
||||
saving_throw TEXT,
|
||||
spell_resistance TEXT,
|
||||
description TEXT)''')
|
||||
|
||||
connexion.commit()
|
||||
connexion.close()
|
||||
|
||||
def insertSpells():
|
||||
with open('outputs/spells.yaml', 'r') as file:
|
||||
spells = yaml.safe_load(file)
|
||||
|
||||
connexion = sqlite3.connect('assets/spells.db')
|
||||
cursor = connexion.cursor()
|
||||
|
||||
for name, spell in spells.items():
|
||||
level = spell.get('level')
|
||||
school = spell.get('school')
|
||||
casting_time = spell.get('casting_time')
|
||||
range_ = spell.get('range')
|
||||
target = spell.get('target')
|
||||
duration = spell.get('duration')
|
||||
saving_throw = spell.get('saving_throw')
|
||||
spell_resistance = spell.get('spell_resistance')
|
||||
description = '\n'.join(spell.get('description', []))
|
||||
components = spell.get('components')
|
||||
area = spell.get('area')
|
||||
effect = spell.get('effect')
|
||||
|
||||
cursor.execute('''INSERT INTO spell(name, level, school, casting_time, components, range, target, area, effect, duration, saving_throw, spell_resistance, description)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
|
||||
(name, level, school, casting_time, components, range_, target, area, effect, duration, saving_throw, spell_resistance, description))
|
||||
|
||||
connexion.commit()
|
||||
connexion.close()
|
||||
|
||||
createDatabase()
|
||||
insertSpells()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,119 @@
|
||||
# This file was used for debbuging purposes.
|
||||
# It helped testing attributes separetly in
|
||||
# order to better parse them or to optimize/
|
||||
# factor some code.
|
||||
|
||||
import urllib.request
|
||||
import requests
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import html
|
||||
|
||||
URL = "https://www.d20pfsrd.com/magic/all-spells/d/death-from-below/"
|
||||
|
||||
responseDetails = requests.get(URL)
|
||||
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
||||
spellContent = spellSoup.find(id='article-content')
|
||||
|
||||
###################
|
||||
### DESCRIPTION
|
||||
###################
|
||||
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||
print("Desc separator: ", spell_description)
|
||||
|
||||
spell_paragraphs = []
|
||||
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||
if not spell_description:
|
||||
spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
||||
spell_description = spell_description.find_next('p')
|
||||
|
||||
while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
|
||||
if spell_description.has_attr('class'):
|
||||
spell_paragraphs.append(spell_description.text)
|
||||
else:
|
||||
spell_paragraphs.append(spell_description.text)
|
||||
spell_description = spell_description.find_next('p')
|
||||
if spell_description and spell_description.parent.name == 'div':
|
||||
break
|
||||
|
||||
print("Spell description:\n", '\n\n'.join(spell_paragraphs))
|
||||
|
||||
def getStringSiblings(array, content, stop):
|
||||
if content:
|
||||
for sibling in content.next_siblings:
|
||||
print(sibling)
|
||||
if sibling.name == stop:
|
||||
break
|
||||
if sibling.name == 'a':
|
||||
array.append(sibling.text)
|
||||
elif isinstance((sibling), bs4.element.NavigableString):
|
||||
component_text = sibling.string.strip()
|
||||
if component_text:
|
||||
array.append(component_text.rstrip(';'))
|
||||
else:
|
||||
return None
|
||||
return ' '.join(array)
|
||||
|
||||
###################
|
||||
### TARGET
|
||||
###################
|
||||
# target = []
|
||||
# spell_target = spellContent.find('b',string="Target")
|
||||
# spell_target = getStringSiblings(target, spell_target, 'b')
|
||||
# print("Target: ", spell_target)
|
||||
|
||||
###################
|
||||
### LEVELS
|
||||
###################
|
||||
# p = spellContent.find('b',string="School")
|
||||
# print(p)
|
||||
# p = p.find_previous('p')
|
||||
# print(p)
|
||||
# text = p.text
|
||||
# print(text)
|
||||
# parts = text.split("Level")
|
||||
# school = parts[0].replace("School","").strip().strip(";")
|
||||
# level = parts[1].replace("Level","").strip()
|
||||
|
||||
# print("---")
|
||||
# print("School:", school)
|
||||
# print("Level:", level)
|
||||
|
||||
# def getDescription(array, content):
|
||||
# if content:
|
||||
# content = content.find_next()
|
||||
# array.append(content.text)
|
||||
# if
|
||||
|
||||
# def getStringSiblings(array, content, stop):
|
||||
# if content:
|
||||
# for sibling in content.next_siblings:
|
||||
# if sibling.name == stop:
|
||||
# break
|
||||
# if sibling.name == 'a':
|
||||
# array.append(sibling.text)
|
||||
# elif isinstance((sibling), bs4.element.NavigableString):
|
||||
# component_text = sibling.string.strip()
|
||||
# if component_text:
|
||||
# array.append(component_text.rstrip(';'))
|
||||
# else:
|
||||
# return None
|
||||
# return ' '.join(array)
|
||||
|
||||
###################
|
||||
### DURATION
|
||||
###################
|
||||
|
||||
# spell_duration = spellContent.find('b',string='Duration')
|
||||
# if spell_duration:
|
||||
# if spell_duration.next_sibling is not None:
|
||||
# spell_duration = spell_duration.next_sibling.text.strip()
|
||||
# else :
|
||||
# print("fix here ---")
|
||||
# print("first: ", spell_duration)
|
||||
# spell_duration = spell_duration.find_next('br')
|
||||
# print(spell_duration)
|
||||
# else :
|
||||
# spell_duration = None
|
||||
# print("Duration: ",spell_duration)
|
||||
# print("Duration: ",spell_duration)
|
@ -0,0 +1,187 @@
|
||||
import urllib.request
|
||||
import requests
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup
|
||||
from lxml import html
|
||||
import yaml
|
||||
|
||||
### -------------------------------------
|
||||
# GET ALL THE SPELLS FROM THE PAGE
|
||||
### -------------------------------------
|
||||
|
||||
## GET <li> ELEMENTS NAME + URL TO DETAIL PAGE
|
||||
# url with all spells
|
||||
URL = "https://www.d20pfsrd.com/magic/all-spells/"
|
||||
|
||||
# get the page content using GET to url
|
||||
response = requests.get(URL)
|
||||
|
||||
# parse html using
|
||||
soup = BeautifulSoup(response.content, 'lxml')
|
||||
list = soup.find(id='article-content').find_next('div',class_="flexbox")
|
||||
|
||||
# this gets all the <li> elements from the article-content div, which contain all of the
|
||||
# spells (name and link to detail page)
|
||||
lis = list.find_all('li')
|
||||
|
||||
###################
|
||||
### METHODS
|
||||
###################
|
||||
def getStringSiblings(array, content, stop):
|
||||
if content:
|
||||
for sibling in content.next_siblings:
|
||||
if sibling.name == stop:
|
||||
break
|
||||
if sibling.name == 'a':
|
||||
array.append(sibling.text)
|
||||
elif isinstance((sibling), bs4.element.NavigableString):
|
||||
component_text = sibling.string.strip()
|
||||
if component_text:
|
||||
array.append(component_text.rstrip(';'))
|
||||
else:
|
||||
return None
|
||||
return ' '.join(array)
|
||||
|
||||
|
||||
cpt = 0
|
||||
spellz = {}
|
||||
|
||||
for li in lis:
|
||||
url = li.a['href']
|
||||
|
||||
## get html of details page
|
||||
responseDetails = requests.get(url)
|
||||
spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
|
||||
|
||||
# get article content which contains all info about spells
|
||||
spellContent = spellSoup.find(id='article-content')
|
||||
|
||||
###################
|
||||
### ATTRIBUTES
|
||||
###################
|
||||
|
||||
# get name
|
||||
if spellContent :
|
||||
spell_name = spellContent.find('h1').text
|
||||
else :
|
||||
spell_name = None
|
||||
continue
|
||||
print("name: ",spell_name)
|
||||
|
||||
# get school and level
|
||||
school_levels = spellContent.find('b',string="School").find_previous('p')
|
||||
text = school_levels.text
|
||||
parts = text.split("Level")
|
||||
spell_school = parts[0].replace("School","").strip().strip(";")
|
||||
spell_level = parts[1].replace("Level","").strip()
|
||||
|
||||
print("School: ",spell_school)
|
||||
print("Level:",spell_level)
|
||||
|
||||
# get casting time
|
||||
castTime = []
|
||||
spell_castTime = spellContent.find('b',string="Casting Time")
|
||||
spell_castTime = getStringSiblings(castTime, spell_castTime, 'b')
|
||||
print("Cast time: ", spell_castTime)
|
||||
|
||||
# get components
|
||||
components = []
|
||||
spell_components = spellContent.find('b', string='Components')#.next_sibling.strip()
|
||||
spell_components = getStringSiblings(components, spell_components, 'p')
|
||||
print ("Components: ", spell_components)
|
||||
|
||||
# get range
|
||||
rangesp = []
|
||||
spell_range = spellContent.find('b',string="Range")
|
||||
spell_range = getStringSiblings(rangesp, spell_range, 'b')
|
||||
print("Range: ", spell_range)
|
||||
|
||||
# get target
|
||||
target = []
|
||||
spell_target = spellContent.find('b',string="Target")
|
||||
spell_target = getStringSiblings(target, spell_target, 'b')
|
||||
print("Target: ", spell_target)
|
||||
|
||||
#get duration
|
||||
duration = []
|
||||
spell_duration = spellContent.find('b',string="Duration")
|
||||
spell_duration = getStringSiblings(duration, spell_duration, 'b')
|
||||
print("Duration: ",spell_duration)
|
||||
|
||||
|
||||
# get saving throw
|
||||
svthrow = []
|
||||
spell_saving_throw = spellContent.find('b',string='Saving Throw')
|
||||
spell_saving_throw = getStringSiblings(svthrow, spell_saving_throw, 'b')
|
||||
print("Saving throw: ", spell_saving_throw)
|
||||
|
||||
# get resistance
|
||||
resistance = []
|
||||
spell_resistance = spellContent.find('b',string='Spell Resistance')
|
||||
spell_resistance = getStringSiblings(resistance, spell_resistance, 'b')
|
||||
print("Spell Resistance: ", spell_resistance)
|
||||
|
||||
# get area
|
||||
area = []
|
||||
spell_area = spellContent.find('b',string='Area')
|
||||
spell_area = getStringSiblings(area, spell_area, 'b')
|
||||
print("Area:", spell_area )
|
||||
|
||||
# get effect
|
||||
effect = []
|
||||
spell_effect = spellContent.find('b',string='Effect')
|
||||
spell_effect = getStringSiblings(effect, spell_effect, 'b')
|
||||
print('Effect: ',spell_effect)
|
||||
|
||||
# get description
|
||||
spell_paragraphs = []
|
||||
spell_description = spellContent.find('p',string='DESCRIPTION')
|
||||
if not spell_description:
|
||||
spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
|
||||
if not spell_description:
|
||||
spell_description = None
|
||||
continue
|
||||
spell_description = spell_description.find_next('p')
|
||||
|
||||
while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
|
||||
if spell_description.has_attr('class'):
|
||||
spell_paragraphs.append(spell_description.text)
|
||||
else:
|
||||
spell_paragraphs.append(spell_description.text)
|
||||
spell_description = spell_description.find_next('p')
|
||||
if spell_description and spell_description.parent.name == 'div':
|
||||
break
|
||||
|
||||
|
||||
print("Spell description:\n", '\n\n'.join(spell_paragraphs))
|
||||
|
||||
# print(" ----- ")
|
||||
# print(" ")
|
||||
# cpt += 1
|
||||
# print("no: ",cpt)
|
||||
spellz[spell_name] = {
|
||||
'school': spell_school,
|
||||
'level': spell_level,
|
||||
'casting_time': spell_castTime,
|
||||
'components': spell_components,
|
||||
'range': spell_range,
|
||||
'target': spell_target,
|
||||
'duration': spell_duration,
|
||||
'saving_throw': spell_saving_throw,
|
||||
'spell_resistance': spell_resistance,
|
||||
'area': spell_area,
|
||||
'effect': spell_effect,
|
||||
'description': spell_paragraphs
|
||||
}
|
||||
|
||||
with open('outputs/spells.yaml', 'w') as f:
|
||||
yaml.dump(spellz, f)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in new issue