From 3c3902d570384f62ec1e87a20f10fd55cf1d8144 Mon Sep 17 00:00:00 2001 From: "nicolas.franco" Date: Sat, 11 Mar 2023 03:52:53 +0100 Subject: [PATCH] removed pasing algorithm + added extra level split --- scrapping/one_page_scrap.py | 19 ------------------- scrapping/scrap-spells.py | 3 +-- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/scrapping/one_page_scrap.py b/scrapping/one_page_scrap.py index 1610544..c3e673d 100644 --- a/scrapping/one_page_scrap.py +++ b/scrapping/one_page_scrap.py @@ -15,22 +15,6 @@ responseDetails = requests.get(URL) spellSoup = BeautifulSoup(responseDetails.content, 'lxml') spellContent = spellSoup.find(id='article-content') -def parseLevelAndGetClass(spell_level): - class_dict = {} - for class_level in spell_level.split(","): - class_level = class_level.strip() - if " " in class_level: - class_name, level = class_level.rsplit(maxsplit=1) - if "/" in class_name: - class_names = class_name.split("/") - for name in class_names: - class_dict[name.strip()] = level.strip() - else: - class_dict[class_name.strip()] = level.strip() - else: - class_dict[class_level.strip()] = "1" - return class_dict - ################### ### LEVELS ################### @@ -43,9 +27,6 @@ spell_level = parts[1].replace("Level","").strip().split(";")[0] print("level: ", spell_level) print("school: ", spell_school) -class_levels = parseLevelAndGetClass(spell_level) -print(class_levels) - ################### ### DESCRIPTION ################### diff --git a/scrapping/scrap-spells.py b/scrapping/scrap-spells.py index 6228a19..ab812ca 100644 --- a/scrapping/scrap-spells.py +++ b/scrapping/scrap-spells.py @@ -90,7 +90,7 @@ for li in lis: text = school_levels.text parts = text.split("Level") spell_school = parts[0].replace("School","").strip().strip(";") - spell_level = parts[1].replace("Level","").strip() + spell_level = parts[1].replace("Level","").strip().split(";")[0] spell_class_and_level = parseLevelAndGetClass(spell_level) @@ -192,7 +192,6 @@ for li in lis: 'effect': spell_effect, 'description': spell_paragraphs } - break with open('outputs/spells.yaml', 'w') as f: yaml.dump(spellz, f)