Hi, I tried to implement this in my code. But I get the following error: exceptions.NameError: global name 'Request' is not defined .
from scrapy.spider import BaseSpider from scrapy.selector import HtmlXPathSelector from bs4 import BeautifulSoup class spider_aicte(BaseSpider): name = "Indian_Colleges" allowed_domains = ["http://www.domain.org"] start_urls = [ "http://www.domain.org/appwebsite.html", ] def parse(self, response): filename = response.url.split("/")[-2] soup = BeautifulSoup(response.body) for link in soup.find_all('a'): download_link = link.get('href') if '.pdf' in download_link: pdf_link = "http://www.domain.org" + download_link print pdf_link class FileSpider(BaseSpider): name = "fspider" allowed_domains = ["www.domain.org"] start_urls = [ pdf_link ] for url in pdf_link: yield Request(url, callback=self.save_pdf) def save_pdf(self, response): path = self.get_path(response.url) with open(path, "wb") as f: f.write(response.body)
source share