I am trying to clear Craigslist ads using Scrapy to extract items that are for sale.
I can extract the date, post the header and send the url , but I am having trouble retrieving the price .
For some reason, the current code extracts all of the prices, but when I delete // before the price range is searched, the price field is returned as empty.
Can someone please review the code below and help me out?
from scrapy.spider import BaseSpider from scrapy.selector import HtmlXPathSelector from craigslist_sample.items import CraigslistSampleItem class MySpider(BaseSpider): name = "craig" allowed_domains = ["craigslist.org"] start_urls = ["http://longisland.craigslist.org/search/sss?sort=date&query=raptor%20660&srchType=T"] def parse(self, response): hxs = HtmlXPathSelector(response) titles = hxs.select("//p") items = [] for titles in titles: item = CraigslistSampleItem() item['date'] = titles.select('span[@class="itemdate"]/text()').extract() item ["title"] = titles.select("a/text()").extract() item ["link"] = titles.select("a/@href").extract() item ['price'] = titles.select('//span[@class="itempp"]/text()').extract() items.append(item) return items
source share