+
80
-

c#如何获取网页里所有的链接完整url地址?

请问c#如何获取网页里所有的链接完整url地址?

网友回复

+
0
-
using System;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace HelloWorldApplication
{
    class HelloWorld
    {
        static void Main(string[] args) {
            System.Net.WebClient client = new WebClient();
            byte[] page = client.DownloadData("http://www.bfw.wiki");
            string content = System.Text.Encoding.UTF8.GetString(page);
            string regex = "href=[\\\"\\\'](http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?[\\\"\\\']";
            Regex re = new Regex(regex);
            MatchCollection matches = re.Matches(content);

            System.Collections.IEnumerator enu = matches.GetEnumerator();
            while (enu.MoveNext() && enu.Current != null) {
                Match match = (Match)(enu.Current);
                Console.Write(match.Value + "\r\n");
            }
        }
    }
}

+
0
-

这样也可以

using System;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace HelloWorldApplication
{
    class HelloWorld
    {

        public static string GetHtml(string url) {
            string res = "";
            WebClient client = new WebClient();
            Stream stream = client.OpenRead(url);
            StreamReader sr = new StreamReader(stream, Encoding.Default);
            res = sr.ReadToEnd();
            sr.Close();
            client.Dispose();
            return res;
        }
        static void Main(string[] args) {
            //爬出网页html代码
            string data=GetHtml("http://www.bfw.wiki");
    //正则匹配链接地址
             string regex = "href=[\\\"\\\'](http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?[\\\"\\\']";
            Regex re = new Regex(regex);
            MatchCollection matches = re.Matches(data);

            System.Collections.IEnumerator enu = matches.GetEnumerator();
            while (enu.MoveNext() && enu.Current != null)
            {
                //打印地址
                Match match = (Match)(enu.Current);
                Console.Write(match.Value + "\r\n");
            }

        }
    }
}

我知道答案,我要回答