How to print page source using Selenium

I have the code below that searches for Twitter and scrolls an infinite scroll. However, the line "print data" does not work for me. Any ideas?

# Import Selenium stuff
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException

# Import other needed packages
import sys
import unittest, time, re

# Call up Firefox, do the Twitter search, click the "All" link and start paging
class Sel(unittest.TestCase):
def setUp(self):
    self.driver = webdriver.Firefox()
    self.driver.implicitly_wait(30)
    self.base_url = "https://twitter.com"
    self.verificationErrors = []
    self.accept_next_alert = True
def test_sel(self):
    driver = self.driver
    delay = 3
    driver.get(self.base_url + "/search?q=storstrut&src=typd")
    driver.find_element_by_link_text("All").click()
    for i in range(1,100):
        self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(4)
    html_source = driver.page_source
    data = html_source.encode('utf-8')
    print data


if __name__ == "__main__":
    unittest.main() 
+4
source share
1 answer

you have a lot of unused code and weird import, but you are on the right track.

Here is a simplified version with explanations.

import time
from selenium import webdriver


# launch Firefox
driver = webdriver.Firefox()

# load Twitter page
driver.get("https://twitter.com/search?q=storstrut&src=typd")

# the following javascript scrolls down the entire page body.  Since Twitter
# uses "inifinite scrolling", more content will be added to the bottom of the
# DOM as you scroll... since it is in the loop, it will scroll down up to 100 
# times.
for _ in range(100):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# print all of the page source that was loaded
print driver.page_source.encode("utf-8")

# quit and close browser
driver.quit()
+6
source

All Articles