using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Framework.Core.Crawl;
using HtmlAgilityPack;
namespace WebCaptureSolution
{
static class Program
{
/// <summary>
/// 应用程序的主入口点。
[DllImport("urlmon.dll", CharSet = CharSet.Ansi)]
private static extern int UrlMkSetSessionOption(int dwOption, string pBuffer, int dwBufferLength, int dwReserved);
const int URLMON_OPTION_USERAGENT = 0x10000001;
const string SPUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36";
[STAThread]
static void Main(string[] args)
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
string log_url = "http://www.handsupowo.pl/member.php?action=login";
string url1 = "http://www.handsupowo.pl/archive/index.php?forum-13.html";
List<string> aList = new List<string>();
List<string> lastinfo = new List<string>();
var form = new WebCapture();
form.DocumentOK = false;
#region ログイン
if (!string.IsNullOrEmpty(SPUserAgent))
{
UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, SPUserAgent, SPUserAgent.Length, 0);
}
form.Navigate(log_url);
while (!form.DocumentOK)
{
Application.DoEvents();
}
//step 1 login
var s = form.WebBrowser.Document.GetElementById("content");
var input = s.GetElementsByTagName("input");
for (int i = 0; i < input.Count; i++)
{
var p = input[i];
if (p.OuterHtml.Contains("username"))
{
p.SetAttribute("value", "id");
}
else if (p.OuterHtml.Contains("pass"))
{
p.SetAttribute("value", "password");
}
else if (p.OuterHtml.Contains("submit"))
{
p.InvokeMember("Click");
break;
}
}
var startTime = System.DateTime.Now;
while ((System.DateTime.Now - startTime).TotalSeconds <= 10)
{
Application.DoEvents();
}
// System.IO.File.WriteAllText(form.SavePath, form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml, System.Text.Encoding.UTF8);
// step 2
#endregion
form.DocumentOK = false;
form.Navigate(url1);
while (!form.DocumentOK)
{
Application.DoEvents();
}
HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument();
htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
// div[contains(@class,'ads-creative')]
var anodes = htmldoc.DocumentNode.SelectNodes("//div[@class='threadlist']//a");
foreach (var tn in anodes)
{
aList.Add(tn.Attributes["href"].Value);
}
// 循环访问a
foreach (var cdmurl in aList)
{
form.DocumentOK = false;
form.Navigate(cdmurl);
while (!form.DocumentOK)
{
Application.DoEvents();
}
startTime = System.DateTime.Now;
while ((System.DateTime.Now - startTime).TotalSeconds <= 5)
{
Application.DoEvents();
}
htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
var downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@rel='nofollow']");
var info = htmldoc.DocumentNode.SelectSingleNode("//div[@id='fullversion']//a");
if (downloadurl == null)
{
downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@target='_blank']");
}
List<string> dllist = new List<string>();
if (downloadurl.Count == 1)
{
dllist.Add(info.InnerText);
dllist.Add(downloadurl[0].Attributes["href"].Value);
}
else
{
foreach (var dl in downloadurl)
{
if (dllist.Count == 0)
{
dllist.Add(info.InnerText);
}
else
{
dllist.Add(dl.Attributes["href"].Value);
}
}
}
lastinfo.Add(string.Join(Environment.NewLine, dllist.ToArray()));
lastinfo.Add(Environment.NewLine);
}
System.IO.File.WriteAllLines(@"D:\Nodejs\myjs\DownLoadUrl.txt", lastinfo.ToArray(), Encoding.UTF8);
}
}
}
转载于:https://www.cnblogs.com/c-x-a/p/6400796.html