闽公网安备 35020302035485号
框架: Scrapy v2.4.1
# 堆代码 duidaima.com
# -*- coding: utf-8 -*-
import scrapy
from multiprocessing import Process
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
class BaiduSpider(scrapy.Spider):
name = 'baidu'
allowed_domains = ['baidu.com']
def __init__(self, category=None, *args, **kwargs):
super(BaiduSpider, self).__init__(*args, **kwargs)
self.start_urls = f"https://www.baidu.com/s?ie=utf-8&wd={category}"
def start_requests(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
}
yield scrapy.Request(self.start_urls, headers=headers, callback=self.parse)
def parse(self, response):
print(response.status)
# print(response.text)
def start(category):
print(f"category={category}")
settings = get_project_settings()
process = CrawlerProcess(settings)
spider = BaiduSpider()
process.crawl(spider, category=category)
process.start()
if __name__ == "__main__":
name = ['java', 'python', 'C语言', 'php', 'goland']
for i in range(len(name)):
category = name[i]
p = Process(target=start, args=(category,))
p.start()
这段代码要做的是:5.而不是一个 scrapy 跑 [‘java’, ‘python’, ‘C 语言’, ‘php’, ‘goland’] 所有关键词搜索结果