import os
import re
import html2text as ht
import requests
from bs4 import BeautifulSoup
def addTitle(root_dir, ext=None):
"""
:param root_dir: 文件夹
:param ext: 筛选后缀
:return: 无
filter: 过滤.开头文件 过滤readme 筛选md文件
"""
fullNames_list = []
paths_list = []
i = 0
regex = "[0-9]{1,3}"
for parent, _, fileNames in os.walk(root_dir):
# 文件排序
fileNames.sort(key=lambda x: int(re.search(regex, x).group(0) if re.search(regex, x) else 0)) # 文件名 按数字排序
for fullName in fileNames:
# 过滤
if fullName.startswith('.'): # 去除隐藏文件
continue
if fullName.lower().startswith('readme'):
continue
if ext: # 根据后缀名搜索
if fullName.endswith(tuple(ext)):
i+=1
name = fullName.rsplit(".",1)[0]
content = f"""---
title: {name}
order: {i}
category:
- AE表达式
---
"""
filePath = os.path.join(parent, fullName)
with open(filePath,"r+", encoding='utf-8') as f:
ff = f.read()
f.seek(0)
f.truncate()
f.write(content + ff)
else:
...
def getCat(URL):
...
names = []
links = []
htmlfile = requests.get(URL)
htmlfile.encoding = 'utf-8'
soup = BeautifulSoup(htmlfile.text, 'html.parser')
cats = soup.findAll(class_='docs-single-cat-wrap')
for i in cats:
print(i.text)
link = i.findAll("a")
print(str(link))
# os.mkdir(i.text)
# print (i.text)
def getContent(url):
"""
:param url:
:return: text
"""
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
path = "E:\Project\docs_yuelili_com\docs\zh\exp"
# addTitle(path,"md")
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
URL = "https://www.yuelili.com/docs/ae-effect/arithmetic/"
getCat(URL)
# text_maker = ht.HTML2Text()
# text_maker.bypass_tables = False
# #
# htmlfile = requests.get(URL)
# htmlfile.encoding = 'utf-8'
# soup = BeautifulSoup(htmlfile.text,'html.parser')
# content = str(soup.find(class_='betterdocs-content'))
#
# text = text_maker.handle(content)
#
#
# with open("12121.md","w+",encoding="utf-8") as f:
# f.write(text)
暂无讨论,说说你的看法吧