闽公网安备 35020302035485号
2.使用 HtmlAgilityPack 解析 HTML 文档
2.HtmlAgilityPack
private static string GetHtml(string url)
{
ChromeOptions options = new ChromeOptions();
// 不显示浏览器
options.AddArgument("--headless");
// GPU加速可能会导致Chrome出现黑屏及CPU占用率过高
options.AddArgument("--nogpu");
// 设置chrome启动时size大小
options.AddArgument("--window-size=10,10");
// 堆代码 duidaima.com
using (var driver = new ChromeDriver(options))
{
try
{
driver.Manage().Window.Minimize();
driver.Navigate().GoToUrl(url);
// 等待页面动态加载完成
Thread.Sleep(5000);
// 返回页面源码
return driver.PageSource;
}
catch (NoSuchElementException)
{
Console.WriteLine("找不到该元素");
return string.Empty;
}
}
}
解析 HTML 文档class VideoInfo
{
public string Title { get; set; }
public string Href { get; set; }
public string ImgUrl { get; set; }
}
定义解析函数,返回视频信息列表:private static List<VideoInfo> GetVideoInfos(string url)
{
List<VideoInfo> videoInfos = new List<VideoInfo>();
// 加载文档
var html = GetHtml(url);
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
// 解析文档,先定位到视频列表标签
var xpath = "/html/body/div[2]/div[4]/div/div/div[1]/div[2]/div/div";
var htmlNodes = htmlDoc.DocumentNode.SelectNodes(xpath);
// 循环解析它的子节点视频信息
foreach (var node in htmlNodes)
{
var titleNode = node.SelectSingleNode("a[2]");
var imgNode = node.SelectSingleNode("a[1]/div[1]/picture/source[1]");
var title = titleNode.InnerText;
var href = titleNode.Attributes["href"].Value.Trim('/');
var imgUrl = imgNode.Attributes["srcset"].Value.Split('@')[0].Trim('/');
videoInfos.Add(new VideoInfo
{
Title = title,
Href = href,
ImgUrl = imgUrl
});
}
return videoInfos;
}
视频列表标签的 XPath 路径是通过浏览器调试工具,在指定标签上右键 复制完整的XPath 得到:
static void Main(string[] args)
{
var url = @"https://space.bilibili.com/401315430";
var videoInfos = GetVideoInfos(url);
foreach (var videoInfo in videoInfos)
{
Console.WriteLine(videoInfo.Title);
Console.WriteLine(videoInfo.Href);
Console.WriteLine(videoInfo.ImgUrl);
Console.WriteLine();
}
Console.ReadKey();
}
结果如下:等一下,好妹妹 www.bilibili.com/video/BV1uyxLeJEM9 i0.hdslb.com/bfs/archive/46a15065d1b6722a04696ffaaa2235287ceaa452.jpg 一口一个?你的超甜辣椒 www.bilibili.com/video/BV1AQsDeiEn1 i0.hdslb.com/bfs/archive/d93d47d67323ee284483e963ffed34fb9884cf61.jpg这里只是演示爬取动态页面的方法,如果想获取B站UP主的视频信息,建议直接使用 API 请求数据。