Retrieving user comments from a news site

Question

Retrieving user comments from a news site

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


def wait(dr, x):
  element = WebDriverWait(dr, 50).until(
    EC.presence_of_all_elements_located((By.XPATH, x))
)
return element
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("http://www.dinamalar.com/user_comments.asp? uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
for elem in wait(browser, '//*[@id="commsec"]/div[2]/div[1]'):
print elem.text

This is the link, I need to extract all the comments http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4 % E0% AF% 8D% E0% AE% 9A% E0% AF% 86% E0% AE% B2% E0% AF% 8D% E0% AE% B5% E0% AE% A9% E0% AF% 8D

But my code only extracts the first 10 comments. After clicking the button, the remaining 10 comments are loaded dynamically. How to extract all these comments using python selenium

+4

python selenium web-scraping selenium-webdriver screen-scraping

Vinayakumar r Apr 22 '16 at 16:21

source share

1 answer

alecxe · Answer 1 · 2016-04-22T16:34:25+0000

, , " " . , , " ". :

from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver


browser = webdriver.Firefox()
wait = WebDriverWait(browser, 10)
browser.get("http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")

# initial wait for the page to load
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".morered")))

pages = 1
while True:
    browser.find_elements_by_css_selector(".morered")[-1].click()

    # wait for more "load more" buttons to be present
    try:
        wait.until(lambda browser: len(browser.find_elements_by_css_selector(".morered")) > pages)
    except TimeoutException:
        break  # no more data loaded, exit the loop

    print("Comments loaded: %d" % len(browser.find_elements_by_css_selector(".dateg")))

    pages += 1

browser.close()

, URL.

Retrieving user comments from a news site

More articles: