【python】爬取大众脸插件发布时间和标题

import re
from lxml import etree
import time

# 获取总页数
def get_page_num(url):
    res = requests.get(url)
    pattern = re.compile(r'共\s(.+)\s页')  # 查找数字
    pages_num = pattern.findall(res.text)[0]
    return int(pages_num)
# 获取文章名称和日期
def get_data(pages_num,url):
    titles,dates = [],[]
    for i in range(pages_num, 1, -1):
        page_url = url + 'page/' + str(i) +'/'
        r = requests.get(page_url)
        html = etree.HTML(r.content)
        title_data = html.xpath('/html/body/section/div[2]/div/article[*]/h2/a/text()')
        date_data = html.xpath('/html/body/section/div[2]/div/article[*]/footer/time/text()')
        titles += title_data
        dates += date_data
    r = requests.get(url)
    html = etree.HTML(r.content)
    title_data = html.xpath('/html/body/section/div[1]/div/article[*]/h2/a/text()')
    date_data = html.xpath('/html/body/section/div[2]/div/article[*]/h2/a/text()')
    titles += title_data
    dates += date_data
    time.sleep(0.5) # 限制爬取速度
    return titles,dates
if __name__ != '__main__.':
    url = 'https://www.lookae.com/after-effects/aechajian/'
    data = get_data(get_page_num(url),url)
    print(data)

 

给TA充电
共{{data.count}}人
人已充电
编程

【python】爬取AE scripts的AE插件信息

2021-8-1 11:21:38

杂记编程

【python】正则表达式

2021-8-1 11:24:14

0 条回复 A文章作者 M管理员
    暂无讨论,说说你的看法吧
个人中心
今日签到
搜索