library(XML) library(relenium) ##downloading website website<- firefoxClass$new() website$get("http://www.machinerytrader.com/list/list.aspx?pg=1&bcatid=4&DidSearch=1&EID=1&LP=MAT&ETID=5&catid=1015&mdlx=Contains&Cond=All&SO=26&btnSearch=Search&units=imperial") doc <- htmlParse(website$getPageSource()) ##reading tables and binding the information tables <- readHTMLTable(doc, stringsAsFactors=FALSE) data<-do.call("rbind", tables[seq(from=8, to=56, by=2)]) data<-cbind(data, sapply(lapply(tables[seq(from=9, to=57, by=2)], '[[', i=2), '[', 1)) rownames(data)<-NULL names(data) <- c("year.man.model", "sn", "price", "location", "auction")
This will give you what you want for the first page (only the first two lines are shown here):
head(data,2) year.man.model sn price location auction 1 1972 AMERICAN 5530 GS14745W US $50,100 MI Auction: 1/9/2013; 4,796 Hours; .. 2 AUSTIN-WESTERN 307 307 US $3,400 MT Auction: 12/18/2013; AUSTIN-WESTERN track excavator.
To get all the pages, just flip them by inserting pg=i into the address.
source share