1. 下载Html Agility Pack,解压保存到本地 下载地址: http://htmlagilitypack.codeplex.com/
1 void caijisoufun() 2 { 3 try 4 { 5 6 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); 7 String str = "http://esf.wuxi.soufun.com/agent/agent/AloneHouseList.aspx?agentid=160148311&housetype=esf&price=&roomtype=&district=&page=1"; 8 9 String htmlstr = fhttp2(str);//获取html页面的源文件10 doc.LoadHtml(htmlstr);11 HtmlNode navNode = doc.GetElementbyId("right");//获取id为right的节点12 //print(navNode.InnerHtml);13 HtmlNodeCollection categoryNodeList = navNode.SelectNodes("//div[1]/table/tr[1]/td[1]/a[1]"); //分析html结构14 15 HtmlNode temp = null;16 17 foreach (HtmlNode categoryNode in categoryNodeList)18 {19 temp = HtmlNode.CreateNode(categoryNode.OuterHtml);20 21 String url = "http://esf.wuxi.soufun.com" + temp.Attributes["href"].Value;22 println(url);//其实就是个Response.Write23 String showstr = fhttp2(url);24 HtmlAgilityPack.HtmlDocument doc2 = new HtmlAgilityPack.HtmlDocument();25 doc2.LoadHtml(showstr);26 HtmlNode cnode = doc2.GetElementbyId("wrap");27 HtmlNode title = cnode.SelectSingleNode("//div[2]/div[1]/h1[1]/font[1]");28 println(title.InnerText);//29 //这里就可以做很多事情了,包括楼盘户型全部可以通过抓取获得信息,导入自己的数据库。30 flush();31 sleep(10);32 33 //println(temp.Attributes["href"].Value);34 }35 36 }37 catch (Exception ex)38 {39 println(ex);40 }41 }42 43 String fhttp2(String url)44 {45 try46 {47 WebRequest rGet = WebRequest.Create(url);48 WebResponse rSet = rGet.GetResponse();49 Stream s = rSet.GetResponseStream();50 StreamReader sr = new StreamReader(s, Encoding.GetEncoding("GB2312"));51 StringBuilder sb = new StringBuilder();52 String Str;53 54 while ((Str = sr.ReadLine()) != null)55 {56 sb.Append(Str + "\n");57 }58 59 sr.Close();60 s.Close();61 rSet.Close();62 63 return tostr(sb);64 }65 catch (Exception e)66 {67 return "";68 }69 }转载于:https://www.cnblogs.com/LYunF/archive/2012/10/24/2737114.html
相关资源:模拟浏览器抓取网页内容(审查元素中内容)