79 lines
1.9 KiB
Python
79 lines
1.9 KiB
Python
import json
|
||
|
||
import requests
|
||
from bs4 import BeautifulSoup, ResultSet, Tag
|
||
|
||
from WikiNavigationCategory import WikiNavigationCategory, NavigationItem
|
||
|
||
|
||
def load_config():
|
||
"""加载配置"""
|
||
with open('config.json', 'r') as f:
|
||
return json.load(f)
|
||
|
||
|
||
def get_page(url, sub):
|
||
try:
|
||
response = requests.get(url + sub)
|
||
response.raise_for_status()
|
||
except requests.exceptions.HTTPError as e:
|
||
print(e)
|
||
else:
|
||
return response.text
|
||
|
||
|
||
def process_page(page):
|
||
return BeautifulSoup(page, 'lxml')
|
||
|
||
|
||
def save_page(page_name, page_content):
|
||
file = open("{}.html".format(page_name), 'w')
|
||
file.write(page_content)
|
||
file.close()
|
||
|
||
|
||
def process_item(category: Tag):
|
||
items = category.find_all('div')[1:]
|
||
|
||
for item in items:
|
||
# 正常项目
|
||
img = item.find('img')
|
||
link = item.find('a')
|
||
|
||
if img and link:
|
||
img_alt = img.get('alt', '无描述')
|
||
img_url = img.get('src')
|
||
link_text = link.get_text(strip=True)
|
||
link_href = link.get('href', '#')
|
||
|
||
new_item = NavigationItem()
|
||
new_item.name = link_text
|
||
new_item.icon_url = img_url
|
||
new_item.icon_alt = img_alt
|
||
new_item.link = link_href
|
||
|
||
print(new_item)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
config = load_config()
|
||
base_url = config['base_url']
|
||
subpages = config['subpages']
|
||
print(subpages)
|
||
|
||
soup = process_page(get_page(base_url, 'Stardew_Valley_Wiki'))
|
||
|
||
# 查找包含所有菜单的div
|
||
main_menu = soup.find('div', id='mainmenu')
|
||
if main_menu:
|
||
# 查找所有子div(每个分类)
|
||
categories = main_menu.find_all('div', recursive=False)
|
||
|
||
for category in categories:
|
||
|
||
# 获取分类标题
|
||
header_div = category.find('div')
|
||
if header_div:
|
||
category_name = header_div.get_text(strip=True)
|
||
process_item(category)
|