调用远程页面并解析

mac2022-06-30  22

             一、 获取远程页面数据请求方法

        /// <summary>        /// 获取远程服务器页面文件        /// </summary>        /// <param name="Url">访问地址</param>        /// <param name="encoding">编码格式</param>        /// <returns>string</returns>        public  string GetStringByUrl(string Url, System.Text.Encoding encoding)        {            if (Url.Equals("about:blank")) return null; ;            if (!Url.StartsWith("http://") && !Url.StartsWith("https://")) { Url = "http://" + Url; }            StreamReader sreader = null;            string result = string.Empty;            try            {                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);

                //httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";                httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; EmbeddedWB 14.52 from: http://www.baidu.com/ EmbeddedWB 14.52; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";                httpWebRequest.Accept = "*/*";                httpWebRequest.KeepAlive = true;                httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");

                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();                if (httpWebResponse.StatusCode == HttpStatusCode.OK)                {                    sreader = new StreamReader(httpWebResponse.GetResponseStream(), encoding);                    char[] cCont = new char[256];                    int count = sreader.Read(cCont, 0, 256);                    while (count > 0)                    {                        String str = new String(cCont, 0, count);                        result += str;                        count = sreader.Read(cCont, 0, 256);                    }                }                if (null != httpWebResponse) { httpWebResponse.Close(); }                return result;            }            catch (WebException e)            {                WriteLogContent(e.ToString(), "Log");                return "";            }            finally            {                if (sreader != null)                {                    sreader.Close();                }            }        }

        /// <summary>        /// 发送post请求        /// </summary>        /// <param name="url">目标地址</param>        /// <param name="PostVars">发送参数</param>        /// <param name="encoding">编码格式</param>        /// <returns></returns>        public static string SendPostRequest(string url, System.Collections.Specialized.NameValueCollection PostVars, Encoding encoding)        {            try            {                System.Net.WebClient WebClientObj = new System.Net.WebClient();                byte[] byRemoteInfo = WebClientObj.UploadValues(url, "POST", PostVars);                string sRemoteInfo = encoding.GetString(byRemoteInfo);                return sRemoteInfo;            }            catch (Exception ex)            {                new Common().WriteLogContent(ex.ToString(), "Log");                return "";            }        }

      二、解析获取后的页面数据          例1  data = new string[2];                        PostVars = new System.Collections.Specialized.NameValueCollection();                        PostVars.Add("pageindex", "1");                        PostVars.Add("lottory", "TC22X5Data");                        PostVars.Add("pl3", "");                        PostVars.Add("name", "22选5");                        PostVars.Add("isgp", "0");

                        string content = Common.SendPostRequest(requestUrl, PostVars, Encoding.UTF8);                        Regex reg = new Regex(@">(\d{7})  </td><td align='center' style='width: 60%;'> <span id='MyGridView_ctl02_lblHao'>(\d{2} \d{2} \d{2} \d{2} \d{2})<");                        MatchCollection matchs = reg.Matches(content);                        if (matchs.Count > 0)                        {                            data[0] = matchs[0].Groups[1].Value;                            data[1] = matchs[0].Groups[2].Value.Replace(' ', ',');                        }

          例2   data = new string[2];                        requestUrl += "tjssc/";                        string content = new Common().GetStringByUrl(requestUrl, Encoding.GetEncoding("gb2312"));                        Regex reg1 = new Regex("<td class=\"qihao\">(\\d{11})期</td>");                        Regex reg2 = new Regex("<input type=\"button\" value=\"(\\d{1})\" class=\"q_orange\" />");                        MatchCollection matchs1 = reg1.Matches(content);                        if (matchs1.Count > 0)                        {                            data[0] = matchs1[0].Groups[1].Value;                        }                        MatchCollection matchs2 = reg2.Matches(content);                        if (matchs2.Count > 0)                        {                            data[1] = ""+matchs2[0].Groups[1].Value + matchs2[1].Groups[1].Value + matchs2[2].Groups[1].Value + matchs2[3].Groups[1].Value + matchs2[4].Groups[1].Value;                        }

转载于:https://www.cnblogs.com/LYunF/archive/2012/10/08/2715320.html

最新回复(0)