How to get form processing

I tried using scrapy to complete the registration and collect the count of the number of projects. And here is the code.

from scrapy.item import Item, Field
from scrapy.http import FormRequest
from scrapy.spider import Spider
from scrapy.utils.response import open_in_browser


class GitSpider(Spider):
    name = "github"
    allowed_domains = ["github.com"]
    start_urls = ["https://www.github.com/login"]

    def parse(self, response):
        formdata = {'login': 'username',
                'password': 'password' }
        yield FormRequest.from_response(response,
                                        formdata=formdata,
                                        clickdata={'name': 'commit'},
                                        callback=self.parse1)

    def parse1(self, response):
        open_in_browser(response)

After running the code

scrapy runspider github.py

He should show me the results page of the form, which should be a failed login on the same page as the username and password are fake. However, he shows me a search page . The log file is in pastebin

How should the code be fixed? Thanks in advance.

+4
source share
2 answers

, FormRequest.from_response() - . , " ". formnumber:

yield FormRequest.from_response(response,
                                formnumber=1,
                                formdata=formdata,
                                clickdata={'name': 'commit'},
                                callback=self.parse1)

( "" ):

enter image description here

+7

webdriver.

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
from scrapy.contrib.spiders import CrawlSpider

class GitSpider(CrawlSpider):

    name = "gitscrape"
    allowed_domains = ["github.com"]
    start_urls = ["https://www.github.com/login"]

    def __init__(self):
        self.driver = webdriver.Firefox()

    def parse(self, response):
        self.driver.get(response.url)
        login_form = self.driver.find_element_by_name('login')
        password_form = self.driver.find_element_by_name('password')
        commit = self.driver.find_element_by_name('commit')
        login_form.send_keys("yourlogin")
        password_form.send_keys("yourpassword")
        actions = ActionChains(self.driver)
        actions.click(commit)
        actions.perform()
        # by this point you are logged to github and have access 
        #to all data in the main menΓΉ
        time.sleep(3)
        self.driver.close()
+3

All Articles