System.Net.WebClient wc = new System.Net.WebClient();
HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode listbody=null;
HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode cotentbody = null;
for (int i = 1; i <= 33; i++)
{
string tempList = string.Format(ListUrl, i == 1 ? "" : i.ToString());
string htmlStr = wc.DownloadString(tempList);
if (htmlStr == null) continue;
listdoc.LoadHtml(htmlStr);
listbody = listdoc.DocumentNode;
HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
foreach (HtmlNode list in lists)
{
var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
var contenturl = "http://XXXX.cc/" + list.SelectSingleNode("./a").Attributes["href"].Value;
try
{
int end = 1;
int tonum=1;
for (int m = 1; m <= end; m++)
{
string willdir = @"D:\temp\1\" + listname;
string contentHtml = null;
if (m == 2)
{
contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
}
if (m >= 2)
{
contentHtml = wc.DownloadString(string.Format(contenturl, m.ToString()));
}
else
{
contentHtml = wc.DownloadString(contenturl);
}
cotentdoc.LoadHtml(contentHtml);
if (cotentdoc == null) continue;
cotentbody = cotentdoc.DocumentNode;
if (m == 1)
{
end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
System.IO.Directory.CreateDirectory(willdir);
}
var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
foreach( var n in contentlists)
{
var picname = n.Attributes["src"].Value;
wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
tonum++;
}
}
}
catch
{
continue;
}
}
}
9 个解决方案
#1
如果弄成多线程的会更好吧。呵呵
#2
what are you doing?
#3
lz,你这个帖子发得.....
#4
啥玩意呀,报错
#5
lists对象为null
#6
菜鸟路过,这个真不懂。。。
#7
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Threading;
using System.Collections;
namespace AutoPicDownLoad
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
bk.DoWork += new DoWorkEventHandler(bk_DoWork);
bk.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bk_RunWorkerCompleted);
bk.WorkerSupportsCancellation = true;
}
BackgroundWorker bk = new BackgroundWorker();
public static object wobj = new object();
void bk_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
this.Close();
}
static void PooledFunc(object state)
{
AutoResetEvent are = (AutoResetEvent)state;
System.Net.WebClient wc = new System.Net.WebClient();
HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode listbody = null;
HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode cotentbody = null;
string ListUrl = GetListUrl;
if (ListUrl == null)
{
are.Set();
return;
}
string htmlStr = wc.DownloadString(ListUrl.ToString());
if (htmlStr == null)
{
are.Set();
return;
}
listdoc.LoadHtml(htmlStr);
listbody = listdoc.DocumentNode;
HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
foreach (HtmlNode list in lists)
{
var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
var contenturl = "http://ccrt.cc" + list.SelectSingleNode("./a").Attributes["href"].Value;
try
{
int end = 1;
int tonum = 1;
for (int m = 1; m <= end; m++)
{
string willdir = @"D:\temp\1\" + listname;
string contentHtml = null;
if (m == 2)
{
contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
}
string tempContentUrl = string.Format(contenturl, m.ToString());
if (m >= 2)
{
contentHtml = wc.DownloadString(tempContentUrl);
}
else
{
contentHtml = wc.DownloadString(contenturl);
}
cotentdoc.LoadHtml(contentHtml);
if (cotentdoc == null) continue;
cotentbody = cotentdoc.DocumentNode;
if (m == 1)
{
end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
System.IO.Directory.CreateDirectory(willdir);
}
var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
foreach (var n in contentlists)
{
var picname = n.Attributes["src"].Value;
if (System.IO.File.Exists(willdir + "\\" + tonum.ToString() + ".jpg"))
{
tonum++;
continue;
}
wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
tonum++;
}
}
}
catch
{
continue;
}
finally
{
}
}
are.Set();
}
//开启5个线程分别下载
static WaitHandle[] waitHandles = new WaitHandle[5] { new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false) };
static System.Collections.Queue q = new System.Collections.Queue();
public static object obj = new object();
public static string GetListUrl
{
get
{
if (q.Count > 0)
{
lock (obj)
{
if (q.Count > 0)
{
return q.Dequeue().ToString();
}
}
}
return null;
}
}
void bk_DoWork(object sender, DoWorkEventArgs e)
{
string ListUrl = "http://ccrt.cc/html/yazhou/index{0}.html";
for (int i = 17; i <= 33; i++)//i=4 列表第四页
{
string tempList = string.Format(ListUrl.ToString(), i == 1 ? "" : i.ToString());
q.Enqueue(tempList);
}
WaitCallback callBack = new WaitCallback(PooledFunc);
while (q.Count != 0)
{
WriteLine("线程池开始"+q.Count.ToString());
for (int ccc = 0; ccc < waitHandles.Length; ccc++)
{
ThreadPool.QueueUserWorkItem(callBack, waitHandles[ccc]);
}
WaitHandle.WaitAll(waitHandles);
WriteLine("线程池结束"+q.Count.ToString());
}
}
public void WriteLine(string line)
{
using (System.IO.StreamWriter sw = new System.IO.StreamWriter(@"D:\temp\info.log"))
{
sw.WriteLine(line);
}
}
private void btnStart_Click(object sender, EventArgs e)
{
this.btnStart.Enabled=false;
bk.RunWorkerAsync();
}
}
}
#8
需下载HtmlAgilityPack 第三方DLL~
#9
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using HtmlAgilityPack; using System.Threading; using System.Collections; namespace AutoPicDownLoad { 应用我的http://www.ymrt.net不知是否可以?
#1
如果弄成多线程的会更好吧。呵呵
#2
what are you doing?
#3
lz,你这个帖子发得.....
#4
啥玩意呀,报错
#5
lists对象为null
#6
菜鸟路过,这个真不懂。。。
#7
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Threading;
using System.Collections;
namespace AutoPicDownLoad
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
bk.DoWork += new DoWorkEventHandler(bk_DoWork);
bk.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bk_RunWorkerCompleted);
bk.WorkerSupportsCancellation = true;
}
BackgroundWorker bk = new BackgroundWorker();
public static object wobj = new object();
void bk_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
this.Close();
}
static void PooledFunc(object state)
{
AutoResetEvent are = (AutoResetEvent)state;
System.Net.WebClient wc = new System.Net.WebClient();
HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode listbody = null;
HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
HtmlAgilityPack.HtmlNode cotentbody = null;
string ListUrl = GetListUrl;
if (ListUrl == null)
{
are.Set();
return;
}
string htmlStr = wc.DownloadString(ListUrl.ToString());
if (htmlStr == null)
{
are.Set();
return;
}
listdoc.LoadHtml(htmlStr);
listbody = listdoc.DocumentNode;
HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
foreach (HtmlNode list in lists)
{
var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
var contenturl = "http://ccrt.cc" + list.SelectSingleNode("./a").Attributes["href"].Value;
try
{
int end = 1;
int tonum = 1;
for (int m = 1; m <= end; m++)
{
string willdir = @"D:\temp\1\" + listname;
string contentHtml = null;
if (m == 2)
{
contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
}
string tempContentUrl = string.Format(contenturl, m.ToString());
if (m >= 2)
{
contentHtml = wc.DownloadString(tempContentUrl);
}
else
{
contentHtml = wc.DownloadString(contenturl);
}
cotentdoc.LoadHtml(contentHtml);
if (cotentdoc == null) continue;
cotentbody = cotentdoc.DocumentNode;
if (m == 1)
{
end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
System.IO.Directory.CreateDirectory(willdir);
}
var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
foreach (var n in contentlists)
{
var picname = n.Attributes["src"].Value;
if (System.IO.File.Exists(willdir + "\\" + tonum.ToString() + ".jpg"))
{
tonum++;
continue;
}
wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
tonum++;
}
}
}
catch
{
continue;
}
finally
{
}
}
are.Set();
}
//开启5个线程分别下载
static WaitHandle[] waitHandles = new WaitHandle[5] { new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false) };
static System.Collections.Queue q = new System.Collections.Queue();
public static object obj = new object();
public static string GetListUrl
{
get
{
if (q.Count > 0)
{
lock (obj)
{
if (q.Count > 0)
{
return q.Dequeue().ToString();
}
}
}
return null;
}
}
void bk_DoWork(object sender, DoWorkEventArgs e)
{
string ListUrl = "http://ccrt.cc/html/yazhou/index{0}.html";
for (int i = 17; i <= 33; i++)//i=4 列表第四页
{
string tempList = string.Format(ListUrl.ToString(), i == 1 ? "" : i.ToString());
q.Enqueue(tempList);
}
WaitCallback callBack = new WaitCallback(PooledFunc);
while (q.Count != 0)
{
WriteLine("线程池开始"+q.Count.ToString());
for (int ccc = 0; ccc < waitHandles.Length; ccc++)
{
ThreadPool.QueueUserWorkItem(callBack, waitHandles[ccc]);
}
WaitHandle.WaitAll(waitHandles);
WriteLine("线程池结束"+q.Count.ToString());
}
}
public void WriteLine(string line)
{
using (System.IO.StreamWriter sw = new System.IO.StreamWriter(@"D:\temp\info.log"))
{
sw.WriteLine(line);
}
}
private void btnStart_Click(object sender, EventArgs e)
{
this.btnStart.Enabled=false;
bk.RunWorkerAsync();
}
}
}
#8
需下载HtmlAgilityPack 第三方DLL~
#9
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using HtmlAgilityPack; using System.Threading; using System.Collections; namespace AutoPicDownLoad { 应用我的http://www.ymrt.net不知是否可以?