From c10df0ec95e996ea4d81c1b22684a5e426f53d84 Mon Sep 17 00:00:00 2001 From: Namu Date: Sun, 16 Nov 2025 01:49:55 +0100 Subject: [PATCH] Feat: The crawler no fetch text from span and div --- crawler2/spiders/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawler2/spiders/__init__.py b/crawler2/spiders/__init__.py index d942e4c..2eaaa74 100644 --- a/crawler2/spiders/__init__.py +++ b/crawler2/spiders/__init__.py @@ -19,7 +19,7 @@ class CrawlerSpider(scrapy.Spider): yield { "url": response.url, "title": response.css("title::text").get(), - "content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text").getall()), + "content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text, span::text, div::text").getall()), "links": response.css("a::attr(href)").getall() }