Scrapy

mac2022-06-30  98

pip install Scrapy scrapy.cfg myproject/ __init__.py items.py middlewares.py pipelines.py settings.py spiders/ __init__.py spider1.py spider2.py ... scrapy startproject myproject [project_dir] cd project_dir scrapy genspider mydomain mydomain.com import scrapy class MySpider(scrapy.Spider): name = 'example.com' allowed_domains = ['example.com'] start_urls = [ 'http://www.example.com/1.html', 'http://www.example.com/2.html', 'http://www.example.com/3.html', ] def parse(self, response): pass import scrapy from myproject.items import MyItem class MySpider(scrapy.Spider): name = 'example.com' allowed_domains = ['example.com'] def start_requests(self): yield scrapy.Request('http://www.example.com/1.html', self.parse) yield scrapy.Request('http://www.example.com/2.html', self.parse) yield scrapy.Request('http://www.example.com/3.html', self.parse) def parse(self, response): pass import scrapy class Product(scrapy.Item): name = scrapy.Field() price = scrapy.Field() stock = scrapy.Field() tags = scrapy.Field()

 

最新回复(0)