import requests import json import os from time import sleep, time if __name__=="__main__": start=time() header={ 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36' } #请求头,假装浏览器发起请求 UP_id='314480501'#漫罗拉314480501 urls='https://api.bilibili.com/x/space/arc/search?mid=%s&ps=30&tid=0&pn=1&keyword=&order=pubdate&jsonp=jsonp'%UP_id #请求地址,要传UP主ID进来 page=requests.get(url=urls,headers=header,timeout=(5,5)).json() page_info=page['data'] fenye=page_info['page'] total=fenye['count'] #视频总数 size=fenye['ps'] fenlei=page_info['list']['tlist'] videos=page_info['list']['vlist'] #图片地址和BV号都在这个字段里 author=videos[0]['author']#作者 folder='./'+author page_max=total//size+1#总页数 if(not os.path.exists(folder)): os.mkdir(folder) #创建文件夹 img_list=[] #图片地址列表
for i in videos: img_list.append((i['pic'],i['bvid'])) #保存图片地址
for page_index inrange(2,page_max+1): #获取剩下的图片地址 urls='https://api.bilibili.com/x/space/arc/search?mid=%s&ps=30&tid=0&pn=%d&keyword=&order=pubdate&jsonp=jsonp'%(UP_id,page_index) page=requests.get(url=urls,headers=header,timeout=(5,5)).json() page_info=page['data'] videos=page_info['list']['vlist'] for i in videos: img_list.append((i['pic'],i['bvid']))
cnt=0 for i in img_list: #遍历地址列表 pic_url,name=i pic_data=requests.get(url=pic_url,headers=header,timeout=(5,5)).content withopen(folder+'/'+name,'wb') as fp: fp.write(pic_data) #保存图片 cnt+=1 print('%s %d'%(name,cnt),' 爬取结束') #打印BV号和计数 time_use=time()-start print('%s 共%d张'%(author,total)) print('实际爬取 %d 张'%cnt) print('用时: %.2fs'%time_use) print(len(img_list))