图片采集,你懂的~

时间:2022-07-21 21:29:17
string ListUrl = "http://XXXX.cc/html/yazhou/index{0}.html";
            System.Net.WebClient wc = new System.Net.WebClient();
            HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode listbody=null;

            HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode cotentbody = null;
            for (int i = 1; i <= 33; i++)
            {
                string tempList = string.Format(ListUrl, i == 1 ? "" : i.ToString());
                string htmlStr = wc.DownloadString(tempList);
                if (htmlStr == null) continue;
                listdoc.LoadHtml(htmlStr);
                listbody = listdoc.DocumentNode;
                HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
                foreach (HtmlNode list in lists)
                {
                    var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
                    var contenturl = "http://XXXX.cc/" + list.SelectSingleNode("./a").Attributes["href"].Value;

                    try
                    {
                        int end = 1;
                        int tonum=1;
                        for (int m = 1; m <= end; m++)
                        {
                         
                            string willdir = @"D:\temp\1\" + listname;
                            string contentHtml = null;
                           
                            if (m == 2)
                            {
                                contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
                            }
                            if (m >= 2)
                            {
                                contentHtml = wc.DownloadString(string.Format(contenturl, m.ToString()));
                            }
                            else
                            {
                                contentHtml = wc.DownloadString(contenturl);
                            }
                            cotentdoc.LoadHtml(contentHtml);
                            if (cotentdoc == null) continue;
                            cotentbody = cotentdoc.DocumentNode;
                            if (m == 1)
                            {
                                end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
                                System.IO.Directory.CreateDirectory(willdir);
                            }
                            var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
                            foreach( var n in contentlists)
                            {
                                var picname = n.Attributes["src"].Value;
                                wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
                                tonum++;
                            }
                        }
                    }
                    catch
                    {
                        continue;
                    }
                }
            }

9 个解决方案

#1


如果弄成多线程的会更好吧。呵呵

#2


what are you doing?

#3


lz,你这个帖子发得.....

#4


啥玩意呀,报错

#5


lists对象为null

#6


菜鸟路过,这个真不懂。。。

#7



using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Threading;
using System.Collections;
namespace AutoPicDownLoad
{
    public partial class frmMain : Form
    {
        public frmMain()
        {
            InitializeComponent();
            bk.DoWork += new DoWorkEventHandler(bk_DoWork);
            bk.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bk_RunWorkerCompleted);
            bk.WorkerSupportsCancellation = true;
        }
        BackgroundWorker bk = new BackgroundWorker();
        public static object wobj = new object();
        void bk_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            this.Close();
        }
        static void PooledFunc(object state)
        {
            AutoResetEvent are = (AutoResetEvent)state;
            System.Net.WebClient wc = new System.Net.WebClient();
            HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode listbody = null;
            HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode cotentbody = null;
            string ListUrl = GetListUrl;
            if (ListUrl == null)
            {
                are.Set();
                return;
            }
            string htmlStr = wc.DownloadString(ListUrl.ToString());
            if (htmlStr == null) 
            {
                are.Set();
                return; 
            }
            listdoc.LoadHtml(htmlStr);
            listbody = listdoc.DocumentNode;
            HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
            foreach (HtmlNode list in lists)
            {
                var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
                var contenturl = "http://ccrt.cc" + list.SelectSingleNode("./a").Attributes["href"].Value;
                try
                {
                    int end = 1;
                    int tonum = 1;
                    for (int m = 1; m <= end; m++)
                    {

                        string willdir = @"D:\temp\1\" + listname;
                        string contentHtml = null;
                        if (m == 2)
                        {
                            contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
                        }
                        string tempContentUrl = string.Format(contenturl, m.ToString());
                        if (m >= 2)
                        {
                            contentHtml = wc.DownloadString(tempContentUrl);
                        }
                        else
                        {
                            contentHtml = wc.DownloadString(contenturl);
                        }
                        cotentdoc.LoadHtml(contentHtml);
                        if (cotentdoc == null) continue;
                        cotentbody = cotentdoc.DocumentNode;
                        if (m == 1)
                        {
                            end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
                            System.IO.Directory.CreateDirectory(willdir);
                        }
                        var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
                        foreach (var n in contentlists)
                        {
                            var picname = n.Attributes["src"].Value;
                            if (System.IO.File.Exists(willdir + "\\" + tonum.ToString() + ".jpg"))
                            {
                                tonum++;
                                continue;
                            }
                            wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
                            tonum++;
                        }
                    }
                }
                catch
                {
                    continue;
                }
                finally
                {
                }
            }
            are.Set();
        }
        //开启5个线程分别下载
        static WaitHandle[] waitHandles = new WaitHandle[5] { new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false) };
        static System.Collections.Queue q = new System.Collections.Queue();
        public static object obj = new object();
        public static string GetListUrl
        {
            get
            {
                if (q.Count > 0)
                {
                    lock (obj)
                    {
                        if (q.Count > 0)
                        {
                            return q.Dequeue().ToString();
                        }
                    }
                }
                return null;
            }
        }
        void bk_DoWork(object sender, DoWorkEventArgs e)
        {

            string ListUrl = "http://ccrt.cc/html/yazhou/index{0}.html";
            for (int i = 17; i <= 33; i++)//i=4  列表第四页
            {
                string tempList = string.Format(ListUrl.ToString(), i == 1 ? "" : i.ToString());
                q.Enqueue(tempList);
            }
            WaitCallback callBack = new WaitCallback(PooledFunc);
            while (q.Count != 0)
            {
                WriteLine("线程池开始"+q.Count.ToString());
                for (int ccc = 0; ccc < waitHandles.Length; ccc++)
                {
                    ThreadPool.QueueUserWorkItem(callBack, waitHandles[ccc]);
                }
                WaitHandle.WaitAll(waitHandles);
                 WriteLine("线程池结束"+q.Count.ToString());
            }
        }
        public void WriteLine(string line)
        {
            using (System.IO.StreamWriter sw = new System.IO.StreamWriter(@"D:\temp\info.log"))
            {
                sw.WriteLine(line);
            }
        }
        private void btnStart_Click(object sender, EventArgs e)
        {
            this.btnStart.Enabled=false;
            bk.RunWorkerAsync();
        }
    }
}


#8


需下载HtmlAgilityPack 第三方DLL~

#9


using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using HtmlAgilityPack; using System.Threading; using System.Collections; namespace AutoPicDownLoad { 应用我的http://www.ymrt.net不知是否可以?

#1


如果弄成多线程的会更好吧。呵呵

#2


what are you doing?

#3


lz,你这个帖子发得.....

#4


啥玩意呀,报错

#5


lists对象为null

#6


菜鸟路过,这个真不懂。。。

#7



using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Threading;
using System.Collections;
namespace AutoPicDownLoad
{
    public partial class frmMain : Form
    {
        public frmMain()
        {
            InitializeComponent();
            bk.DoWork += new DoWorkEventHandler(bk_DoWork);
            bk.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bk_RunWorkerCompleted);
            bk.WorkerSupportsCancellation = true;
        }
        BackgroundWorker bk = new BackgroundWorker();
        public static object wobj = new object();
        void bk_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            this.Close();
        }
        static void PooledFunc(object state)
        {
            AutoResetEvent are = (AutoResetEvent)state;
            System.Net.WebClient wc = new System.Net.WebClient();
            HtmlAgilityPack.HtmlDocument listdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode listbody = null;
            HtmlAgilityPack.HtmlDocument cotentdoc = new HtmlAgilityPack.HtmlDocument();
            HtmlAgilityPack.HtmlNode cotentbody = null;
            string ListUrl = GetListUrl;
            if (ListUrl == null)
            {
                are.Set();
                return;
            }
            string htmlStr = wc.DownloadString(ListUrl.ToString());
            if (htmlStr == null) 
            {
                are.Set();
                return; 
            }
            listdoc.LoadHtml(htmlStr);
            listbody = listdoc.DocumentNode;
            HtmlAgilityPack.HtmlNodeCollection lists = listbody.SelectNodes("//div/ul/li");
            foreach (HtmlNode list in lists)
            {
                var listname = list.SelectSingleNode("./a").Attributes["title"].Value;
                var contenturl = "http://ccrt.cc" + list.SelectSingleNode("./a").Attributes["href"].Value;
                try
                {
                    int end = 1;
                    int tonum = 1;
                    for (int m = 1; m <= end; m++)
                    {

                        string willdir = @"D:\temp\1\" + listname;
                        string contentHtml = null;
                        if (m == 2)
                        {
                            contenturl = contenturl.Substring(0, contenturl.LastIndexOf(".htm")) + "_{0}.htm";
                        }
                        string tempContentUrl = string.Format(contenturl, m.ToString());
                        if (m >= 2)
                        {
                            contentHtml = wc.DownloadString(tempContentUrl);
                        }
                        else
                        {
                            contentHtml = wc.DownloadString(contenturl);
                        }
                        cotentdoc.LoadHtml(contentHtml);
                        if (cotentdoc == null) continue;
                        cotentbody = cotentdoc.DocumentNode;
                        if (m == 1)
                        {
                            end = int.Parse(cotentbody.SelectSingleNode("//div[@class='pp']/font[@color='red']").InnerHtml.Trim());
                            System.IO.Directory.CreateDirectory(willdir);
                        }
                        var contentlists = cotentbody.SelectNodes("//div[@class='pp']//img");
                        foreach (var n in contentlists)
                        {
                            var picname = n.Attributes["src"].Value;
                            if (System.IO.File.Exists(willdir + "\\" + tonum.ToString() + ".jpg"))
                            {
                                tonum++;
                                continue;
                            }
                            wc.DownloadFile(picname, willdir + "\\" + tonum.ToString() + ".jpg");
                            tonum++;
                        }
                    }
                }
                catch
                {
                    continue;
                }
                finally
                {
                }
            }
            are.Set();
        }
        //开启5个线程分别下载
        static WaitHandle[] waitHandles = new WaitHandle[5] { new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false), new AutoResetEvent(false) };
        static System.Collections.Queue q = new System.Collections.Queue();
        public static object obj = new object();
        public static string GetListUrl
        {
            get
            {
                if (q.Count > 0)
                {
                    lock (obj)
                    {
                        if (q.Count > 0)
                        {
                            return q.Dequeue().ToString();
                        }
                    }
                }
                return null;
            }
        }
        void bk_DoWork(object sender, DoWorkEventArgs e)
        {

            string ListUrl = "http://ccrt.cc/html/yazhou/index{0}.html";
            for (int i = 17; i <= 33; i++)//i=4  列表第四页
            {
                string tempList = string.Format(ListUrl.ToString(), i == 1 ? "" : i.ToString());
                q.Enqueue(tempList);
            }
            WaitCallback callBack = new WaitCallback(PooledFunc);
            while (q.Count != 0)
            {
                WriteLine("线程池开始"+q.Count.ToString());
                for (int ccc = 0; ccc < waitHandles.Length; ccc++)
                {
                    ThreadPool.QueueUserWorkItem(callBack, waitHandles[ccc]);
                }
                WaitHandle.WaitAll(waitHandles);
                 WriteLine("线程池结束"+q.Count.ToString());
            }
        }
        public void WriteLine(string line)
        {
            using (System.IO.StreamWriter sw = new System.IO.StreamWriter(@"D:\temp\info.log"))
            {
                sw.WriteLine(line);
            }
        }
        private void btnStart_Click(object sender, EventArgs e)
        {
            this.btnStart.Enabled=false;
            bk.RunWorkerAsync();
        }
    }
}


#8


需下载HtmlAgilityPack 第三方DLL~

#9


using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using HtmlAgilityPack; using System.Threading; using System.Collections; namespace AutoPicDownLoad { 应用我的http://www.ymrt.net不知是否可以?