stardewwiki
This commit is contained in:
commit
8e799d54d0
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/.idea
|
||||
13
Stardew_Valley_Wiki.html
Normal file
13
Stardew_Valley_Wiki.html
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
|
||||
来自 ConcernedApe
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
25
WikiNavigationCategory.py
Normal file
25
WikiNavigationCategory.py
Normal file
@ -0,0 +1,25 @@
|
||||
from typing import List
|
||||
|
||||
from bs4 import ResultSet, Tag
|
||||
|
||||
|
||||
class WikiNavigationCategory:
|
||||
def __init__(self):
|
||||
self.category_name: str = "" # 分类名称
|
||||
self.items: List[NavigationItem] = [] # 导航项目列表
|
||||
|
||||
|
||||
class NavigationItem:
|
||||
def __init__(self):
|
||||
self.name: str = "" # 项目名称
|
||||
self.icon_url: str = "" # 图标地址
|
||||
self.icon_alt: str = "" # 图标描述
|
||||
self.link: str = "" # 链接路径
|
||||
self.is_subcategory: bool = False # 是否为子分类
|
||||
|
||||
def __str__(self):
|
||||
return (f" ├── 项目: {self.name}\n"
|
||||
f" │ ├── 图标: {self.icon_alt}\n"
|
||||
f" │ ├── 图标URL: {self.icon_url}\n"
|
||||
f" │ ├── 链接: {self.link}\n"
|
||||
f" │ └── 是否子分类: {self.is_subcategory}")
|
||||
8
config.json
Normal file
8
config.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"base_url": "https://zh.stardewvalleywiki.com/",
|
||||
"subpages": [
|
||||
"Stardew_Valley_Wiki",
|
||||
"",
|
||||
""
|
||||
]
|
||||
}
|
||||
78
main.py
Normal file
78
main.py
Normal file
@ -0,0 +1,78 @@
|
||||
import json
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, ResultSet, Tag
|
||||
|
||||
from WikiNavigationCategory import WikiNavigationCategory, NavigationItem
|
||||
|
||||
|
||||
def load_config():
|
||||
"""加载配置"""
|
||||
with open('config.json', 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def get_page(url, sub):
|
||||
try:
|
||||
response = requests.get(url + sub)
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
print(e)
|
||||
else:
|
||||
return response.text
|
||||
|
||||
|
||||
def process_page(page):
|
||||
return BeautifulSoup(page, 'lxml')
|
||||
|
||||
|
||||
def save_page(page_name, page_content):
|
||||
file = open("{}.html".format(page_name), 'w')
|
||||
file.write(page_content)
|
||||
file.close()
|
||||
|
||||
|
||||
def process_item(category: Tag):
|
||||
items = category.find_all('div')[1:]
|
||||
|
||||
for item in items:
|
||||
# 正常项目
|
||||
img = item.find('img')
|
||||
link = item.find('a')
|
||||
|
||||
if img and link:
|
||||
img_alt = img.get('alt', '无描述')
|
||||
img_url = img.get('src')
|
||||
link_text = link.get_text(strip=True)
|
||||
link_href = link.get('href', '#')
|
||||
|
||||
new_item = NavigationItem()
|
||||
new_item.name = link_text
|
||||
new_item.icon_url = img_url
|
||||
new_item.icon_alt = img_alt
|
||||
new_item.link = link_href
|
||||
|
||||
print(new_item)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = load_config()
|
||||
base_url = config['base_url']
|
||||
subpages = config['subpages']
|
||||
print(subpages)
|
||||
|
||||
soup = process_page(get_page(base_url, 'Stardew_Valley_Wiki'))
|
||||
|
||||
# 查找包含所有菜单的div
|
||||
main_menu = soup.find('div', id='mainmenu')
|
||||
if main_menu:
|
||||
# 查找所有子div(每个分类)
|
||||
categories = main_menu.find_all('div', recursive=False)
|
||||
|
||||
for category in categories:
|
||||
|
||||
# 获取分类标题
|
||||
header_div = category.find('div')
|
||||
if header_div:
|
||||
category_name = header_div.get_text(strip=True)
|
||||
process_item(category)
|
||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
requests
|
||||
html2text
|
||||
markdown
|
||||
bs4
|
||||
lxml
|
||||
fake-useragent
|
||||
Loading…
Reference in New Issue
Block a user