Feat: The crawler no fetch text from span and div
This commit is contained in:
@@ -19,7 +19,7 @@ class CrawlerSpider(scrapy.Spider):
|
|||||||
yield {
|
yield {
|
||||||
"url": response.url,
|
"url": response.url,
|
||||||
"title": response.css("title::text").get(),
|
"title": response.css("title::text").get(),
|
||||||
"content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text").getall()),
|
"content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text, span::text, div::text").getall()),
|
||||||
"links": response.css("a::attr(href)").getall()
|
"links": response.css("a::attr(href)").getall()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user