from datetime import datetime
from scrapy.spiders import SitemapSpider
class FilteredSitemapSpider(SitemapSpider):
name = 'filtered_sitemap_spider'
allowed_domains = ['example.com']
sitemap_urls = ['http://example.com/sitemap.xml']
def sitemap_filter(self, entries):
for entry in entries:
date_time = datetime.strptime(entry['lastmod'], '%Y-%m-%d')
if date_time.year >= 2005:
yield entry