removed extra attributes from spell

2 years ago · 197d8fcc64
parent 2b6f7a8783
commit 197d8fcc64
2 changed files with 44 additions and 49 deletions
--- a/scrapping/one_attribute_scrap.py
+++ b/scrapping/one_attribute_scrap.py
--- a/scrapping/one_page_scrap.py
+++ b/scrapping/one_page_scrap.py
@ -9,50 +9,62 @@ import bs4
 from bs4 import BeautifulSoup
 from lxml import html

-URL = "https://www.d20pfsrd.com/magic/all-spells/d/death-from-below/"
+URL = "https://www.d20pfsrd.com/magic/all-spells/a/accelerate-poison/"

 responseDetails = requests.get(URL)
 spellSoup = BeautifulSoup(responseDetails.content, 'lxml')
 spellContent = spellSoup.find(id='article-content')

+###################
+### LEVELS
+###################
+school_levels = spellContent.find('b',string="School").find_previous('p')
+text = school_levels.text
+parts = text.split("Level")
+spell_school = parts[0].replace("School","").strip().strip(";")
+spell_level = parts[1].replace("Level","").strip().split(";")[0]
+
+print("level: ", spell_level)
+print("school: ", spell_school)
+
 ###################
 ### DESCRIPTION
 ###################
-spell_description = spellContent.find('p',string='DESCRIPTION')
-print("Desc separator: ", spell_description)      
+# spell_description = spellContent.find('p',string='DESCRIPTION')
+# print("Desc separator: ", spell_description)      

-spell_paragraphs = []
-spell_description = spellContent.find('p',string='DESCRIPTION')
-if not spell_description:
-    spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
-spell_description = spell_description.find_next('p')
+# spell_paragraphs = []
+# spell_description = spellContent.find('p',string='DESCRIPTION')
+# if not spell_description:
+#     spell_description = spellSoup.find('div', {'class': 'page-center'}).find('p',string='DESCRIPTION')
+# spell_description = spell_description.find_next('p')

-while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
-    if spell_description.has_attr('class'):
-        spell_paragraphs.append(spell_description.text)
-    else:
-        spell_paragraphs.append(spell_description.text)
-    spell_description = spell_description.find_next('p')
-    if spell_description and spell_description.parent.name == 'div':
-        break
+# while spell_description and not spell_description.find_previous('div', {'class': 'section15'}):
+#     if spell_description.has_attr('class'):
+#         spell_paragraphs.append(spell_description.text)
+#     else:
+#         spell_paragraphs.append(spell_description.text)
+#     spell_description = spell_description.find_next('p')
+#     if spell_description and spell_description.parent.name == 'div':
+#         break

-print("Spell description:\n", '\n\n'.join(spell_paragraphs))
+# print("Spell description:\n", '\n\n'.join(spell_paragraphs))

-def getStringSiblings(array, content, stop):
-    if content:
-        for sibling in content.next_siblings:
-            print(sibling)
-            if sibling.name == stop:
-                break
-            if sibling.name == 'a':
-                array.append(sibling.text)
-            elif isinstance((sibling), bs4.element.NavigableString):
-                component_text = sibling.string.strip()
-                if component_text:
-                    array.append(component_text.rstrip(';'))
-    else:
-        return None
-    return ' '.join(array)
+# def getStringSiblings(array, content, stop):
+#     if content:
+#         for sibling in content.next_siblings:
+#             print(sibling)
+#             if sibling.name == stop:
+#                 break
+#             if sibling.name == 'a':
+#                 array.append(sibling.text)
+#             elif isinstance((sibling), bs4.element.NavigableString):
+#                 component_text = sibling.string.strip()
+#                 if component_text:
+#                     array.append(component_text.rstrip(';'))
+#     else:
+#         return None
+#     return ' '.join(array)

 ###################
 ### TARGET
@ -62,23 +74,6 @@ def getStringSiblings(array, content, stop):
 # spell_target = getStringSiblings(target, spell_target, 'b')
 # print("Target: ", spell_target)

-###################
-### LEVELS
-###################
-# p = spellContent.find('b',string="School")
-# print(p)
-# p = p.find_previous('p')
-# print(p)
-# text = p.text
-# print(text)
-# parts = text.split("Level")
-# school = parts[0].replace("School","").strip().strip(";")
-# level = parts[1].replace("Level","").strip()
-
-# print("---")
-# print("School:", school)
-# print("Level:", level)
-
 # def getDescription(array, content):
 #     if content:
 #         content = content.find_next()