python 爬虫自动爬取图片
import os
import time
from lxml import etree
import requests
# 获取图片链接xpath地址
def get_href_xp():href_xp = []for i in range(30):href_xp.append(f'//*[@id="infinite_scroll"]/div[{i+1}]/div[1]/div/div[1]/a')return href_xp
# 获取图片名称xpath地址
def get_img_name_xp():image_name_xp = []for i in range(30):image_name_xp.append(f'//*[@id="infinite_scroll"]/div[{i+1}]/div[1]/div/div[1]/a/img')return image_name_xp
# 获取图片名称
def get_xp_image_name(xp):image_name_list = []content = tree.xpath(xp)for item in range(len(content)):image_name_list.append(content[item].attrib['alt'])return image_name_list
# 获取图片链接
def get_xp_html_url(xp):html_url_list = []content = tree.xpath(xp)for item in range(len(content)):html_url_list.append('' + content[item].attrib['href'])return html_url_list
# 获取图片链接的图片数量(一个女模特的图片数量,也是链接数)
def image_number(url):response = requests.get(url).content.decode('utf-8')tree = etree.HTML(response)content = tree.xpath('//*[@id="picnum"]/span[2]')for item in range(len(content)):return content[item].text
# 女模特图片链接地址拼接
def enlarge(url,number):img_url_list = []url_head = url[:-5]url_till = url[-5:]for i in range(number):img_url_list.append(url_head + '_'+ f'{i+1}'+ url_till)return img_url_list# 获取女模特图片下载地址
def image_download_url(url):response = requests.get(url).content.decode('utf-8')tree = etree.HTML(response)content = tree.xpath('/html/body/div[2]/div[2]/div[2]/ul[1]/li[2]/a')for item in range(len(content)):return content[item].attrib['href']
def download(url_list,file_name):headers = {'Host': 'img1.085p.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}os.makedirs(f'.\\beautiful_gril\\{file_name}')k = 1for link in url_list:r = requests.get(link, headers=headers)with open(f'.\\beautiful_gril\\{file_name}\\{k}.jpg','wb') as f:time.sleep(1)f.write(r.content)f.close()requests.close()k += 1
if __name__ == "__main__":response = requests.get('/').content.decode('utf-8')tree = etree.HTML(response)# 获取图片链接xpath地址href_xpath_list = get_href_xp()# 获取图片名称xpath地址image_name_xpath_list = get_img_name_xp()href_list = []image_name_list = []# 获取图片链接for i in href_xpath_list:href_list.append(get_xp_html_url(i))# 获取图片名称for i in image_name_xpath_list:image_name_list.append(get_xp_image_name(i))# 由于获得的href_list是个二维矩阵,故将他转为一维数组 L_href_list = []for i in range(len(href_list)):L_href_list.append(href_list[i][0])href_list = L_href_list# 由于获得的image_name_list是个二维矩阵,故将他转为一维数组 L_image_name_list = []for i in range(len(image_name_list)):L_image_name_list.append(image_name_list[i][0])image_name_list = L_image_name_listimage = []image_load_list = []image_load = []for link in href_list:number = image_number(link)image.append(enlarge(link,int(number)))for item in image:image_load = []for link in item:image_load.append(image_download_url(link)) image_load_list.append(image_load) key = 13for url_list in image_load_list[13:]:file_name = image_name_list[key]download(url_list,file_name)key += 1
python 爬虫自动爬取图片
import os
import time
from lxml import etree
import requests
# 获取图片链接xpath地址
def get_href_xp():href_xp = []for i in range(30):href_xp.append(f'//*[@id="infinite_scroll"]/div[{i+1}]/div[1]/div/div[1]/a')return href_xp
# 获取图片名称xpath地址
def get_img_name_xp():image_name_xp = []for i in range(30):image_name_xp.append(f'//*[@id="infinite_scroll"]/div[{i+1}]/div[1]/div/div[1]/a/img')return image_name_xp
# 获取图片名称
def get_xp_image_name(xp):image_name_list = []content = tree.xpath(xp)for item in range(len(content)):image_name_list.append(content[item].attrib['alt'])return image_name_list
# 获取图片链接
def get_xp_html_url(xp):html_url_list = []content = tree.xpath(xp)for item in range(len(content)):html_url_list.append('' + content[item].attrib['href'])return html_url_list
# 获取图片链接的图片数量(一个女模特的图片数量,也是链接数)
def image_number(url):response = requests.get(url).content.decode('utf-8')tree = etree.HTML(response)content = tree.xpath('//*[@id="picnum"]/span[2]')for item in range(len(content)):return content[item].text
# 女模特图片链接地址拼接
def enlarge(url,number):img_url_list = []url_head = url[:-5]url_till = url[-5:]for i in range(number):img_url_list.append(url_head + '_'+ f'{i+1}'+ url_till)return img_url_list# 获取女模特图片下载地址
def image_download_url(url):response = requests.get(url).content.decode('utf-8')tree = etree.HTML(response)content = tree.xpath('/html/body/div[2]/div[2]/div[2]/ul[1]/li[2]/a')for item in range(len(content)):return content[item].attrib['href']
def download(url_list,file_name):headers = {'Host': 'img1.085p.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}os.makedirs(f'.\\beautiful_gril\\{file_name}')k = 1for link in url_list:r = requests.get(link, headers=headers)with open(f'.\\beautiful_gril\\{file_name}\\{k}.jpg','wb') as f:time.sleep(1)f.write(r.content)f.close()requests.close()k += 1
if __name__ == "__main__":response = requests.get('/').content.decode('utf-8')tree = etree.HTML(response)# 获取图片链接xpath地址href_xpath_list = get_href_xp()# 获取图片名称xpath地址image_name_xpath_list = get_img_name_xp()href_list = []image_name_list = []# 获取图片链接for i in href_xpath_list:href_list.append(get_xp_html_url(i))# 获取图片名称for i in image_name_xpath_list:image_name_list.append(get_xp_image_name(i))# 由于获得的href_list是个二维矩阵,故将他转为一维数组 L_href_list = []for i in range(len(href_list)):L_href_list.append(href_list[i][0])href_list = L_href_list# 由于获得的image_name_list是个二维矩阵,故将他转为一维数组 L_image_name_list = []for i in range(len(image_name_list)):L_image_name_list.append(image_name_list[i][0])image_name_list = L_image_name_listimage = []image_load_list = []image_load = []for link in href_list:number = image_number(link)image.append(enlarge(link,int(number)))for item in image:image_load = []for link in item:image_load.append(image_download_url(link)) image_load_list.append(image_load) key = 13for url_list in image_load_list[13:]:file_name = image_name_list[key]download(url_list,file_name)key += 1