diff --git a/crawler2/spiders/__init__.py b/crawler2/spiders/__init__.py index d942e4c..2eaaa74 100644 --- a/crawler2/spiders/__init__.py +++ b/crawler2/spiders/__init__.py @@ -19,7 +19,7 @@ class CrawlerSpider(scrapy.Spider): yield { "url": response.url, "title": response.css("title::text").get(), - "content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text").getall()), + "content": " ".join(response.css("p, h1, h2, h3, h4, h5, a::text, span::text, div::text").getall()), "links": response.css("a::attr(href)").getall() }