首页 新闻 论坛 群组 Blog 文档 下载 读书 Tag 网摘 搜索 开源 FAQ 第二书店 博文视点 程序员
频道: 研发 数据库 中间件 信息化 视频 .NET Java 游戏 移动 服务: 人才 外包 培训
    图书品种:235680
       
热门搜索: ASP.NET Ajax Spring Hibernate Java

13.4  各搜索引擎专用类

搜索引擎专用类用来完成具体的搜索任务,例如,Baidu类用来执行Baidu搜索,Google类用来执行Google搜索。它们都继承自ISearch类,主要是执行一些正则表达式操作,把搜索结果匹配出来。搜索结果作为数据,保存在了XML文件中。最后,这个XML文件按照格式化文件result.xsl的格式把搜索结果显示出来。

Search()方法的大致思路是:首先调用GetPageString()方法把搜索的关键字、页码等信息发送到特定搜索引擎,并接收搜索引擎返回的信息。然后对这个信息进行解析,分析出搜索结果的每个记录,并记录在XML文件中。然后再对搜索结果的分页导航进行分析,转换成本系统的形式,并采用Base64编码,把结果记录在XML文件中。在Search()方法执行的最后,XML文件被格式化输出到客户端浏览器显示出来。下面是6大搜索引擎专用类的具体实现代码:

//Google搜索类 google.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Text.RegularExpressions;

using System.Xml;

public class Google : ISearch

{

     public override System.Xml.XmlDocument Search()

     {

           string xmlstr = GetPageString();                    //获取搜索页面的字符串

           XmlDataDocument document = new XmlDataDocument();//用于返回的页面

           document.LoadXml("<search/>");

           string style = Tools.Match(xmlstr, "<head>[\\s\\S]*?</head>").Value;

           style = Tools.delTagArray(style, "script,meta,title", true);

                                                        //删除其他标记

           XmlNode xn = Tools.CreateNode(document, document.DocumentElement, "head");

           xn.InnerText = Tools.delTagArray(style, "head", false);//删除head标记

           //创建body

           XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                "body");

           //搜索记录数描述

           XmlNode txt = Tools.CreateNode(document, body, "key");

           XmlNode count = Tools.CreateNode(document, body, "count");

           string sou = Tools.Match(xmlstr, "(?<=符合<b>)[\\s\\S]*?(?=</b>的查
                询)").Value;  //记录总数

           string count2 = Tools.Match(xmlstr, "(?<=约有<b>)[\\s\\S]*?(?=</
                b>)").Value;

           count.InnerText = count2;

           txt.InnerText = sou;                                //记录总数

           MatchCollection mtc = Tools.MatchCollection(xmlstr, "<div class[\\s\\S]*?
                </div>");

           StringBuilder sb = new StringBuilder(1000);

           //遍历每个结果,把搜索结果插入xml文档中

           foreach (Match mt in mtc)  

           {

                 XmlNode item = Tools.CreateNode(document, body, "item");

                                                    //在xml中插入一条搜索记录

                 XmlNode link = Tools.CreateNode(document, item, "url");

                 XmlNode desc = Tools.CreateNode(document, item, "desc");

                 XmlNode title = Tools.CreateNode(document, item, "title");

                 sb = sb.Remove(0, sb.Length);

                 sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[\\s\\S]*?(?=</td>)").
                      Value);

                 MatchCollection itemc = Tools.MatchCollection(sb.ToString(), "[\\s
                      \\S]*?<br[^>]*?>");

                 string ul = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[\\s\\S]*?(?=</h2>)").
                      Value;

                 ul = Tools.Match(ul, "<a[\\s\\S]*?</a>").Value;

                 string u_li = Tools.Match(ul, "(?<=href=[\"]?).*?(?=[\"]?[\\s>])").
                      Value;

                 link.InnerText = Tools.Replace(u_li, "^\"", "");

                 string u_t = Tools.delTagArray(ul, "a", false);

                 title.InnerText = Tools.delHtml(u_t);           //删除html标记

                 if (itemc.Count > 0)

                 {

                         ul = Tools.delHtml(itemc[0].Value);

                         desc.InnerText = ul;

                 }

                 XmlNode sour1 = Tools.CreateNode(document, item, "sour");

                                                        //搜索得到的网页信息

                 string str1 = Tools.Match(mt.Value, "(?<=<h2[^>]*?>)[\\s\\S]*?(?=<
                      /h2>)").Value;

                 sour1.InnerText = str1;

                 XmlNode sour2 = Tools.CreateNode(document, item, "sour");

                 string str2 = Tools.Match(mt.Value, "(?<=<td[^>]*?>)[\\s\\S]*? (?=
                      </td>)").Value;

                 str2 = Tools.delTagArray(str2, "td", false);

                 sour2.InnerText = str2;

                 XmlNode begin = Tools.CreateNode(document, item, "begin");

                                                //单条搜索结果开始

                 string str3 = Tools.Match(mt.Value, "(?=<div[^>]*?>)[\\s\\S]*?(?=
                      <h2)").Value;

                 begin.InnerText = str3;

                 XmlNode end = Tools.CreateNode(document, item, "end");

                                                //单条搜索结果结束

                 str3 = Tools.Match(mt.Value, "(?<=</table>)[\\s\\S]*?(?<=</div>)").
                      Value;

                 end.InnerText = str3;

           }

           string page = Tools.Match(xmlstr, "(?=<div id=navbar class=n>)[\\s\\S]*?(?
                =<center>)").Value;

           MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[\\s\\
                S]*?</a>");

           foreach (Match mt in mcpage)                //遍历每个页码,替换为本系统的形式

           {

                 string s2 = mt.Value;

                 s2 = Tools.Match(s2, @"(?<=href=/search\?)[^\s>]*").Value;

                                                //和搜索相关的参数

                 page = page.Replace("/search?" + s2, "?nav_go_post=" + Tools.To
                      Base64(s2) + "&itemtype=" + ItemType);

           }

           XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");

                                                //插入分页导航

           page = Tools.delTagArray(page, "img", false);

           pageNv.InnerText = page;                //赋予导航内容

           return document;

     }

}

//百度搜索类 baidu.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Xml;

using System.Text.RegularExpressions;

     public class Baidu:ISearch

     {

          public override System.Xml.XmlDocument Search()

          {

               string xmlstr = GetPageString();     //获取搜索页

               XmlDocument document = new XmlDocument(); //返回的页面

               document.LoadXml("<search/>");

               string style = Tools.Match(xmlstr, "<head>[\\s\\S]*?</head>").Value;
                    //css

               style = Tools.delTagArray(style, "script", true); //删除脚本

               //创建头部

               XmlNode xn = Tools.CreateNode(document, document.DocumentElement,
                    "head");

               //删除head标记

               xn.InnerText = Tools.delTagArray(style, "head", false);

               //创建body

               XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                    "body");

               //搜索记录数的描述

               XmlNode txt = Tools.CreateNode(document, body, "key");

               XmlNode count = Tools.CreateNode(document, body, "count");

                                                            //记录总数

               string sou = Tools.Match(xmlstr, "(?<=<input name=wd size=\"35\" class=
                    \"i\" value=\")[\\s\\S]*?(?=\" maxlength=\"100\")").Value;

               string count2 = Tools.Match(xmlstr, "(?<=找到相关网页[^\\d])[\\s\\S]*?
                    (?=篇)").Value;

               count.InnerText = count2;

               txt.InnerText = sou                              ;//记录总数

               //搜索结果的记录集

               MatchCollection mtc = Tools.MatchCollection(xmlstr, "<table border=\"0\"
                    cellpadding=\"0\" cellspacing=\"0\">[\\s\\S]*?</table>");

               StringBuilder sb = new StringBuilder(1000);

               //遍历每个结果,把搜索结果插入xml文档中

               foreach (Match mt in mtc)

               {

                     XmlNode item = Tools.CreateNode(document, body, "item");

                                                    //在xml中插入一条搜索记录

                     XmlNode link = Tools.CreateNode(document, item, "url");                                                              //单条超链接

                     XmlNode desc = Tools.CreateNode(document, item, "desc");                                                             //单条搜索结果的描述

                     XmlNode title = Tools.CreateNode(document, item, "title");                                                           //单条搜索结果的标题

                     sb.Remove(0, sb.Length);         //清空

                     sb.Append(Tools.Match(mt.Value, "(?<=<td[^>]*?>)[\\s\\S]*?(?=
                          </td>)").Value);

                     MatchCollection itemc = Tools.MatchCollection(sb.ToString(),
                          "[\\s\\S]*?<br[^>]*?>");

                     if(itemc.Count>=3)

                     {

                            string u1 = Tools.Match(itemc[0].Value, "<a[\\s\\S]*?</
                                 a>").Value;

                            string u_li=Tools.Match(u1,"(?<=href=[\"]?).*?(?=[\"]?
                                 [\\s>])").Value;

                            link.InnerText = Tools.Replace(u_li, "^\"", "");

                            string u_t = Tools.delTagArray(u1, "a", false);

                            title.InnerText = Tools.delHtml(u_t);   //删除html标记

                            u1 = Tools.delHtml(itemc[1].Value);

                            desc.InnerText = u1;

                            foreach (Match mt1 in itemc)

                            {

                                 //搜索的网页信息

                                 XmlNode sour1 = Tools.CreateNode(document, item,
                                      "sour");

                                 string da = Tools.delTagArray(mt1.Value, "br", false);                                                              //删除br

                                 sour1.InnerText=da;

                            }

                     }

                     XmlNode end = Tools.CreateNode(document, item, "end");                                                                   //搜索结果结束

                     end.InnerText ="</font><br/>";

               }

               string page = Tools.Match(xmlstr, "<div class=\"p\">[\\s\\S]*?</div>").
                    Value;  //分页

               MatchCollection mcpage = Tools.MatchCollection(page,"<a[^>]*?>[\\s
                    \\S]*?</a>");

               //遍历每个页码,替换为本系统的形式

               foreach (Match mt in mcpage)

               {

                     string s2 = mt.Value;

                     s2 = Tools.Match(s2, @"(?<=href=s\?)[^\s>]*").Value;

                     page = page.Replace("s?" + s2,"?nav_go_post="+Tools.ToBase64(s2)
                          +"&itemtype=" + ItemType);   //替换超链接

               }

               XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");                                                                       //插入分页导航

               pageNv.InnerText = page;                         //赋予导航内容

               return document;

          }

     }

//搜狗类 sogou.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Text.RegularExpressions;

using System.Xml;

using System.Web;

public class Sogou : ISearch

{

     public override System.Xml.XmlDocument Search()

     {

          string xmlstr = GetPageString();                     //获取搜索页

          xmlstr = Tools.delTagArray(xmlstr, "script", true);  //删除脚本

          XmlDataDocument document = new XmlDataDocument();

          document.LoadXml("<search/>");

          string style = Tools.Match(xmlstr, "(?=<style[^>]*?)[\\s\\S]*?(?<=</style>)").
               Value;//css

          //头部

          XmlNode head = Tools.CreateNode(document, document.DocumentElement, "head");

          head.InnerText = style;

          XmlNode body = Tools.CreateNode(document, document.DocumentElement, "body");

          //记录描述

          XmlNode count = Tools.CreateNode(document, body, "count");   //记录总数

          XmlNode txt = Tools.CreateNode(document, body, "key");

          string sou = Tools.Match(xmlstr, "(?<=<input name=\"query\" type=\"text\" class
               =\"query\" size=\"35\" tabindex=\"1\" value=\")[\\s\\S]*?(?=\"/>)").Value;

          string count2 = Tools.Match(xmlstr, "(?<=找到)[\\s\\S]*?(?=个网页)").
               Value;

          count.InnerText = count2;

          txt.InnerText = sou;

          string xmlstr2 = Tools.Match(xmlstr, "(?<=<div id=\"content\">)[\\s
               \\S]*?(?<=<div id=\"pagebar\">)").Value;

          MatchCollection mtc = Tools.MatchCollection(xmlstr2, "(<div>)[\\s\\S]*?
               (</div>)");

          //遍历每个结果,把搜索结果插入xml文档中

          foreach (Match mt in mtc)

          {

                XmlNode item = Tools.CreateNode(document, body, "item");

                XmlNode url = Tools.CreateNode(document, item, "url");

                XmlNode desc = Tools.CreateNode(document, item, "desc");

                XmlNode title = Tools.CreateNode(document, item, "title");

                XmlNode begin = Tools.CreateNode(document, item, "begin");

                XmlNode end = Tools.CreateNode(document, item, "end");

                string ul = Tools.Match(mt.Value, "(?<=<h2>)[\\s\\S]*?(?=</h2>)").
                     Value;

                string ul_li = Tools.Match(ul, "(?<=href=[\"]?).*?(?=[\"]?[\\s>])").
                     Value;

                url.InnerText = Tools.Replace(ul_li, "[\"']", "");

                string u_t = Tools.delTagArray(ul, "a", false);  //删除超链接

                title.InnerText = Tools.delHtml(u_t);

                MatchCollection itemc = Tools.MatchCollection(mt.Value, "(?<=<p[^>]*?>)
                     [\\s\\S]*?(?=</p>)");

                if (itemc.Count > 0)

                {

                        string de = itemc[0].Value;

                        de = Tools.delHtml(de);

                        desc.InnerText = Tools.Replace(de, "[\"']", "");

                }

                XmlNode sour1 = Tools.CreateNode(document, item, "sour");//网页内容

                string sout1str = Tools.Match(mt.Value, "(?<=<h2>)[\\s\\S]*?(?=
                     </h2>)").Value;

                sout1str = Tools.Replace(sout1str, "(<br^[>]*?)*", "");

                sour1.InnerText = Tools.Replace(sout1str, "(?=onclick=)[\\s\\S]*?
                     (?<=;\")", "");

                XmlNode sour2 = Tools.CreateNode(document, item, "sour");

                string content = Tools.Match(mt.Value, "(?<=</h2>)[\\s\\S]*?(?=</
                     div>)").Value;

                content = Tools.Replace(content, "(<br^[>]*?)*", "");

                Tools.CreateCData(document, sour2, content);     //添加一条搜索记录

                begin.InnerText = "<div>";                       //每条记录的开始

                end.InnerText = "</div>";                        //每条记录的结束

          }

          string page = Tools.Match(xmlstr, "(?=<!-- begin of page up/down -->)[\\s
               \\S]*?(?=<!-- end of page up/down -->)").Value;//分页

          MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[\\s\\S]*?
               >)[\\s\\S]*?(?<=</a>)");

          //遍历每个页码,替换为本系统的形式

          foreach (Match mt in mcpage)

          {

                string s2 = mt.Value;

                s2 = Tools.Replace(s2, "\"", "");

                s2 = Tools.Match(s2, @"(?<=href=\?)[\s\S]*?(?=>)").Value;

                page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);

          }

          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航

          pageNv.InnerText = page;                                   //赋予导航内容

          return document;

     }

}

//爱问搜索类 iask.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Text.RegularExpressions;

using System.Xml;

public class Iask : ISearch

{

     public override System.Xml.XmlDocument Search()

     {

           string xmlstr = GetPageString();                        //获取搜索页

           XmlDataDocument document = new XmlDataDocument();   //返回的页面

           document.LoadXml("<search/>");

           XmlNode head = Tools.CreateNode(document, document.DocumentElement,
                "head");

           XmlNode body = Tools.CreateNode(document, document.DocumentElement,
                "body");

           //搜索记录数描述

           XmlNode count = Tools.CreateNode(document, body, "count");

           XmlNode txt = Tools.CreateNode(document, body, "key");

           string sou = Tools.Match(xmlstr, "(?<=<title>)[\\s\\S]*?(?= - 爱问搜索)").
                Value;

                                                            //记录总数

           string count2 = Tools.Match(xmlstr, "(?<=找到 <span class=\"ar\">) [\\s
                \\S]*?(?=</span> 篇网页)").Value;

           count.InnerText = count2;

           txt.InnerText = sou;

           string style = Tools.Match(xmlstr, "(?<=<head>)[\\s\\S]*?(?<=</head>)").
                Value;

           style = Tools.Match(xmlstr, "(?=<style[^>]*?>)[\\s\\S]*?(?<=</style>)").
                Value;

           style = Tools.delTagArray(style, "script", true);

           head.InnerText = style;

           string xmlstr2 = Tools.Match(xmlstr, "(?<=<!-- 网页搜索结果 begin -->)[\\s
                \\S]*?(?=<!-- 网页搜索结果 end -->)").Value;

           MatchCollection mtc = Tools.MatchCollection(xmlstr2, "<table[^>]*?>[\\s
                \\S]*?</table>");

           //遍历每个结果,把搜索结果插入xml文档中

           foreach (Match mt in mtc)

           {

                 XmlNode item = Tools.CreateNode(document, body, "item");

                 XmlNode url = Tools.CreateNode(document, item, "url");

                 XmlNode desc = Tools.CreateNode(document, item, "desc");

                 XmlNode title = Tools.CreateNode(document, item, "title");

                 MatchCollection itemc = Tools.MatchCollection(mt.Value, "[\\s\\S]*?
                      (?<=<br[^>]*?>)");

                 string ul = Tools.Match(itemc[0].Value, "<a[\\s\\S]*?</a>").Value;

                 string u_li = Tools.Match(ul, "(?<=href=[\"]?).*?(?=[\"]?[\\s>])").
                      Value;

                 url.InnerText = Tools.Replace(u_li, "^\"", "");

                 string u_t = Tools.delTagArray(ul, "a", false);

                 title.InnerText = Tools.delHtml(u_t);

                 ul = Tools.delHtml(itemc[1].Value);

                 desc.InnerText = ul;

                 //网页信息

                 XmlNode sour1 = Tools.CreateNode(document, item, "sour");

                 sour1.InnerText = Tools.Match(itemc[0].Value, "<a[\\s\\S]*?</a>").
                      Value;

                 XmlNode sour2 = Tools.CreateNode(document, item, "sour");

                 string str = "";

                 for (int j = 1; j < itemc.Count; j++)

                {

                        str = str + itemc[j].Value;

                }

                sour2.InnerText = str;

           }

           string page = Tools.Match(xmlstr, "(?<=<!-- 左侧搜索结果 end -->)[\\s\\S]*?
                (?<=</table>)").Value;

           MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[\\s \\S]*?
                </a>");

           //遍历每个页码,替换为本系统的形式

           foreach (Match mt in mcpage)

           {

                 string s2 = mt.Value;

                 s2 = Tools.Replace(s2, "\"", "");

                 s2 = Tools.Match(s2, @"(?<=href=/s\?)[^\s>]*").Value;

                 page = page.Replace("/s?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                      + "&itemtype=" + ItemType);

           }

           XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");                                                              //插入分页导航

           pageNv.InnerText = page;                    //赋予导航内容

           return document;

     }

}

//雅虎类 yahoo.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Xml;

using System.Text.RegularExpressions;

public class Yahoo : ISearch

{

     public override System.Xml.XmlDocument Search()

     {

          string xmlstr = GetPageString();             //获取搜索页

          xmlstr = Tools.Replace(xmlstr, "(?=<div class=\"pm r\">)[\\s\\S]*?(?=</
               table>)", "");

          XmlDocument document = new XmlDocument();        //返回的页面

          document.LoadXml("<search/>");

          XmlNode head = Tools.CreateNode(document, document.DocumentElement,
               "head");

          XmlNode body = Tools.CreateNode(document, document.DocumentElement,
               "body");

          //记录描述

          XmlNode count = Tools.CreateNode(document, body, "count");

          XmlNode txt = Tools.CreateNode(document, body, "key");

          string sou = Tools.Match(xmlstr, "(?<=<title>雅虎搜索_)[\\s\\S]*?
               (?=</title>)").Value;

          //记录总数

          count.InnerText = Tools.Match(xmlstr, "(?<=共返回[^\\d])[\\s\\S]*?(?=
               项)").Value; 

          txt.InnerText = sou;

          string style = Tools.Match(xmlstr, "(?<=<head>*?)[\\s\\S]*?(?<=</head>)").
               Value;//css

          style = Tools.delTagArray(style, "script,title", true);//删除脚本和title

          style = Tools.delTagArray(style, "meta", false);     //删除meta

          style = Tools.Match(style, "(?=<style>)[\\s\\S]*?(?<=</style>)").Value;

          head.InnerText = style;

          MatchCollection mtc = Tools.MatchCollection(xmlstr, "(<div class=\"i\">)[\\s
               \\S]*?(</table>)");

          //遍历每个结果,把搜索结果插入xml文档中

          foreach (Match mt in mtc)

          {

                XmlNode item = Tools.CreateNode(document, body, "item");

                string link = Tools.Match(mt.Value, "(?<=<div class=\"i\">)[\\s\\S]*?
                     (?=<table cellspacing=\"0\">)").Value;

                string ul_li = Tools.Match(link, "(?<=href=[\"]?).*?(?=[\"]?[\\s>])").
                     Value;

                if (ul_li != "")

                {

                       //信息节点

                       XmlNode url = Tools.CreateNode(document, item, "url");

                       XmlNode desc = Tools.CreateNode(document, item, "desc");

                       XmlNode title = Tools.CreateNode(document, item, "title");

                       url.InnerText = Tools.Replace(ul_li, "^\"", "");

                       string u_t = Tools.delTagArray(link, "a", false);

                       title.InnerText = Tools.delHtml(u_t);

                       string de = Tools.Match(mt.Value, "(?=<td class=\"d\">)[\\s\\S]
                            *?(?<=<div class=\"rel\">)").Value;

                       de = Tools.delHtml(de);

                       desc.InnerText = de;

                       XmlNode begin = Tools.CreateNode(document, item, "begin");                                                              //单条搜索结果的开始

                       begin.InnerText = "<div class=\"i\">";

                       XmlNode end = Tools.CreateNode(document, item, "end");                                                                  //单条搜索结果的结束

                       end.InnerText = "</div>";

                       XmlNode sour1 = Tools.CreateNode(document, item, "sour");                                                               //单条结果的内容

                       sour1.InnerText = Tools.Replace(link, "(?=onclick=)[\\s\\S]*?
                            (?<=;\")", "");

                       XmlNode sour2 = Tools.CreateNode(document, item, "sour");

                       string sourstr2 = Tools.Match(mt.Value, "(?=<table cellspacing
                            =\"0\">)[\\s\\S]*?(?<=</table>)").Value;

                       sourstr2 = Tools.Replace(sourstr2, "(<a[^>]*?)[\\s\\S]*?(?<=
                            -&nbsp;)", "");

                       Tools.CreateCData(document, sour2, Tools.Replace(sourstr2,
                            "(?=onclick=)[\\s\\S]*?(?<=;\")", ""));

                }

          }

          string page = Tools.Match(xmlstr, "(<div id=\"pg\">)[\\s\\S]*?(?<=</div>)").
               Value;//分页

          MatchCollection mcpage = Tools.MatchCollection(page, "(?=<a[\\s\\S]*?>)
               [\\s\\S]*?(?<=</a>)");

          //遍历每个页码,替换为本系统的形式

          foreach (Match mt in mcpage)

          {

                string s2 = mt.Value;

                s2 = Tools.Replace(s2, "\"", "");

                s2 = Tools.Match(s2, @"(?<=href=\?)[\s\S]*?(?=>)").Value;

                page = page.Replace("?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);

          }

          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");

                                                            //插入分页导航

          pageNv.InnerText = page;                             //赋予分页导航内容

          return document;

     }

}

////中搜 zhongsou.cs

using System;

using System.Collections.Generic;

using System.Text;

using System.Text.RegularExpressions;

using System.Xml;

public class Zhongsou : ISearch

{

     public override System.Xml.XmlDocument Search()

     {

          string xmlstr = GetPageString();                     //获取搜索页

          XmlDocument document = new XmlDocument();                //返回的xml

          document.LoadXml("<search/>");

          XmlNode head = Tools.CreateNode(document, document.DocumentElement,
               "head");//创建头部

          XmlNode body = Tools.CreateNode(document, document.DocumentElement,
               "body");//创建body

          //搜索记录数描述

          XmlNode count = Tools.CreateNode(document, body, "count");

          XmlNode txt2 = Tools.CreateNode(document, body, "key");

          string sou = Tools.Match(xmlstr, "(?<=<title>中搜网页_)[\\s\\S]*? (?=</
               title>)").Value;

          string count2 = Tools.Match(xmlstr, "(?<=找到)[\\s\\S]*?(?=条结果)").Value;                                                                //搜索结果总数

          count.InnerText = count2;

          txt2.InnerText = sou;

          string style = Tools.Match(xmlstr, "(?=<head>)[\\s\\S]*?(?<=</head>)").
               Value;                                                //样式

          style = Tools.Match(style, "(?=<style[^>]*?>)[\\s\\S]*?(?<=</style>)").
               Value;

          style = Tools.delTagArray(style, "script", true);

          head.InnerText = style;

          MatchCollection mtc = Tools.MatchCollection(xmlstr, "(?=<table cellspacing
               =\"0\" cellpadding=\"0\">)[\\s\\S]*?(?<=</table>)");

          //遍历每个结果,把搜索结果插入xml文档中

          foreach (Match mt in mtc)

          {

                XmlNode item = Tools.CreateNode(document, body, "item");

                                                    //在xml中插入一条搜索记录

                XmlNode url = Tools.CreateNode(document, item, "url");//单条超链接

                XmlNode desc = Tools.CreateNode(document, item, "desc");

                                                    //单条搜索结果的描述

                XmlNode title = Tools.CreateNode(document, item, "title");                                                               //单条搜索结果的标题

                MatchCollection itemc = Tools.MatchCollection(mt.Value, "[\\s\\S]*?
                     <br[^>]*?>");

                string ul = Tools.Match(itemc[0].Value, "<a[\\s\\S]*?</a>").Value;

                string ul_li = Tools.Match(ul, "(?<=href=[\"]?).*?(?=[\"]?[\\s>])").
                     Value;

                url.InnerText = Tools.Replace(ul_li, "^\"", "");

                string u_t = Tools.delTagArray(ul, "a", false);

                title.InnerText = Tools.delHtml(u_t);            //删除html

                if (itemc.Count > 1)

                {

                       ul = Tools.delHtml(itemc[1].Value);

                       desc.InnerText = ul;                        //获取描述信息

               }

               //显示信息

               XmlNode sour1 = Tools.CreateNode(document, item, "sour");

               string sourstr = Tools.Match(itemc[0].Value, "<a[\\s\\S]*?</a>").
                    Value;

               sour1.InnerText = Tools.Replace(sourstr, "(?=onmousedown=)[\\s\\S]*?
                    (?<=\\)\")", "");

               XmlNode sour2 = Tools.CreateNode(document, item, "sour");

               string txt = Tools.Match(mt.Value, "(?=<td[^>]*?)[\\s\\S]*?(?<=</td>)").
                    Value;

               txt = Tools.Replace(txt, "<a[\\s\\S]*?</a>", ""); //删除超链接

               txt = Tools.delTagArray(txt, "td", false);

               sour2.InnerText = txt;                           //单条记录的内容

               XmlNode begin = Tools.CreateNode(document, item, "begin");

                                                            //单条记录的开始

               begin.InnerText = "<table cellspacing=\"0\" cellpadding=\"0\"><tr><td
                    class=\"f\">";

               XmlNode end = Tools.CreateNode(document, item, "end");//单条记录的结束

               end.InnerText = "</td></tr></table>";

          }

          string page = Tools.Match(xmlstr, "(<table ><tr><td class=db>)[\\s\\S]*?
               (</table>)").Value;

          MatchCollection mcpage = Tools.MatchCollection(page, "<a[^>]*?>[\\s\\S]*?
               </a>");

          //遍历每个页码,替换为本系统的形式

          foreach (Match mt in mcpage)

          {

                string s2 = mt.Value;

                s2 = Tools.Replace(s2, "\"", "");

                s2 = Tools.Match(s2, @"(?<=href=p\?)[^\s>]*").Value;

                page = page.Replace("p?" + s2, "?nav_go_post=" + Tools.ToBase64(s2)
                     + "&itemtype=" + ItemType);

          }

          XmlNode pageNv = Tools.CreateNode(document, body, "pageSite");//插入分页导航

          pageNv.InnerText = page;                                 //赋予导航内容

          return document;

     }

}

查看所有评论(0)条】

最近评论



正在载入评论列表...
热点评论