commit 8e799d54d08125f82f3d2683f9f7efe2a66e2dfa Author: 雨霖铃 Date: Wed Oct 22 00:15:56 2025 +0800 stardewwiki diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a09c56d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.idea diff --git a/Stardew_Valley_Wiki.html b/Stardew_Valley_Wiki.html new file mode 100644 index 0000000..79b56bc --- /dev/null +++ b/Stardew_Valley_Wiki.html @@ -0,0 +1,13 @@ + + +来自 ConcernedApe + + + + + + + + + + diff --git a/WikiNavigationCategory.py b/WikiNavigationCategory.py new file mode 100644 index 0000000..fb5fed9 --- /dev/null +++ b/WikiNavigationCategory.py @@ -0,0 +1,25 @@ +from typing import List + +from bs4 import ResultSet, Tag + + +class WikiNavigationCategory: + def __init__(self): + self.category_name: str = "" # 分类名称 + self.items: List[NavigationItem] = [] # 导航项目列表 + + +class NavigationItem: + def __init__(self): + self.name: str = "" # 项目名称 + self.icon_url: str = "" # 图标地址 + self.icon_alt: str = "" # 图标描述 + self.link: str = "" # 链接路径 + self.is_subcategory: bool = False # 是否为子分类 + + def __str__(self): + return (f" ├── 项目: {self.name}\n" + f" │ ├── 图标: {self.icon_alt}\n" + f" │ ├── 图标URL: {self.icon_url}\n" + f" │ ├── 链接: {self.link}\n" + f" │ └── 是否子分类: {self.is_subcategory}") \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..829d336 --- /dev/null +++ b/config.json @@ -0,0 +1,8 @@ +{ + "base_url": "https://zh.stardewvalleywiki.com/", + "subpages": [ + "Stardew_Valley_Wiki", + "", + "" + ] +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..0d36b20 --- /dev/null +++ b/main.py @@ -0,0 +1,78 @@ +import json + +import requests +from bs4 import BeautifulSoup, ResultSet, Tag + +from WikiNavigationCategory import WikiNavigationCategory, NavigationItem + + +def load_config(): + """加载配置""" + with open('config.json', 'r') as f: + return json.load(f) + + +def get_page(url, sub): + try: + response = requests.get(url + sub) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + print(e) + else: + return response.text + + +def process_page(page): + return BeautifulSoup(page, 'lxml') + + +def save_page(page_name, page_content): + file = open("{}.html".format(page_name), 'w') + file.write(page_content) + file.close() + + +def process_item(category: Tag): + items = category.find_all('div')[1:] + + for item in items: + # 正常项目 + img = item.find('img') + link = item.find('a') + + if img and link: + img_alt = img.get('alt', '无描述') + img_url = img.get('src') + link_text = link.get_text(strip=True) + link_href = link.get('href', '#') + + new_item = NavigationItem() + new_item.name = link_text + new_item.icon_url = img_url + new_item.icon_alt = img_alt + new_item.link = link_href + + print(new_item) + + +if __name__ == "__main__": + config = load_config() + base_url = config['base_url'] + subpages = config['subpages'] + print(subpages) + + soup = process_page(get_page(base_url, 'Stardew_Valley_Wiki')) + + # 查找包含所有菜单的div + main_menu = soup.find('div', id='mainmenu') + if main_menu: + # 查找所有子div(每个分类) + categories = main_menu.find_all('div', recursive=False) + + for category in categories: + + # 获取分类标题 + header_div = category.find('div') + if header_div: + category_name = header_div.get_text(strip=True) + process_item(category) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d3a6807 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +requests +html2text +markdown +bs4 +lxml +fake-useragent \ No newline at end of file