I am very new to " scrapy ", I am abandoning the website and I have some anchor tags which consist of href attributes with java script SubmitForm functions . When I clicked on this javascript function, a page opens from which I need to get the data. I used Xpath and found href for specific anchor tags, but could not execute this href attribute containing the javascript function. Can someone tell me how to execute javascript Submit tag binding functions in scrapy python.My HTML
<table class="Tbl" cellspacing="2" cellpadding="0" border="0"> <tbody> <tr> <td class="TblOddRow"> <table cellspacing="0" cellpadding="0" border="0"> <tbody> <tr> <td valign="middle" nowrap=""> <a class="Page" alt="Click to view job description" title="Click to view job description" href="javascript:sysSubmitForm('frmSR1');">Accountant </a> </td> </tr> </tbody> </table> </td> </tr> </tbody> </table>
And the spider code
class MountSinaiSpider(BaseSpider): name = "mountsinai" allowed_domains = ["mountsinaicss.igreentree.com"] start_urls = [ "https://mountsinaicss.igreentree.com/css_external/CSSPage_SearchAndBrowseJobs.ASP?T=20120517011617&", ] def parse(self, response): return [FormRequest.from_response(response, formdata={ "Type":"CSS","SRCH":"Search Jobs","InitURL":"CSSPage_SearchAndBrowseJobs.ASP","RetColsQS":"Requisition.Key¤Requisition.JobTitle¤Requisition.fk_Code_Full_Part¤[Requisition.fk_Code_Full_Part]OLD.Description(sysfk_Code_Full_PartDesc)¤Requisition.fk_Code_Location¤[Requisition.fk_Code_Location]OLD.Description(sysfk_Code_LocationDesc)¤Requisition.fk_Code_Dept¤[Requisition.fk_Code_Dept]OLD.Description(sysfk_Code_DeptDesc)¤Requisition.Req¤","RetColsGR":"Requisition.Key¤Requisition.JobTitle¤Requisition.fk_Code_Full_Part¤[Requisition.fk_Code_Full_Part]OLD.Description(sysfk_Code_Full_PartDesc)¤Requisition.fk_Code_Location¤[Requisition.fk_Code_Location]OLD.Description(sysfk_Code_LocationDesc)¤Requisition.fk_Code_Dept¤[Requisition.fk_Code_Dept]OLD.Description(sysfk_Code_DeptDesc)¤Requisition.Req¤","ResultSort":"" }, callback=self.parse_main_list)] def parse_main_list(self, response): hxs = HtmlXPathSelector(response) firstpage_urls = hxs.select("//table[@class='Tbl']/tr/td/table/tr/td") for link in firstpage_urls: hrefs = link.select('a/@href').extract()
source share