Feat: The crawler no fetch text from span and div
This commit is contained in:
@@ -19,7 +19,7 @@ class CrawlerSpider(scrapy.Spider):
|
||||
yield {
|
||||
"url": response.url,
|
||||
"title": response.css("title::text").get(),
|
||||
"content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text").getall()),
|
||||
"content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text, span::text, div::text").getall()),
|
||||
"links": response.css("a::attr(href)").getall()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user