运行平台:vs2005\vs2008 软件评级:
文件大小:1k 软件语言:简体中文
软件授权:免费版 演示地址:官方演示地址
下载次数:1 软件类别:seo工具
下载地址:下载地址一

软件简介:

以下是自动生成sitemap的核心代码,它显示出网站上的所有内部链接。它采用多线程和WebRequest下从一个网站的页面加载,单独解析。在这方面的应用,网页的锚标签的提取和每一个字是队列中的线程池继续处理。所有的链接发送到outputtext窗格。

 

using System;
using System.Windows.Forms;
using FormLib;
using MSD.WebSpiderLib;


namespace MSD.SiteMap { 

/// <summary>
/// The layer between the presentation layer (a Winform) and the application
/// </summary>
 
public class AppDispatcher : AppDispatcherBase {

MainForm mainForm;
PagePool pool;
RegExPageScannerFactory factory;

string oldResults="";


public AppDispatcher( MainForm mainForm ) : base( mainForm ) {


 this.mainForm = mainForm;
 mainForm.scanBox.CheckedChanged += new System.EventHandler( this.OnStartCheck );
 mainForm.urlText.KeyPress += new KeyPressEventHandler( this.urlText_KeyPress );
}

/// <summary>
///  called on form load event
/// </summary>
override public void OnLoad() {

 
 mainForm.urlText.Text = WebSpiderConfig.config.StartURL;
 mainForm.patternText.Text = WebSpiderConfig.config.SiteRegex;
}

/// <summary>
///  called on form dispose event
/// </summary>
override public void OnDispose() {
 if( pool != null ) {  
  pool.Clear();
  pool = null;
 }
}


public void OnStartCheck(object sender, System.EventArgs e) {

 lock( this ) {  
  if( mainForm.scanBox.Checked ) {
   StartScan();
  } else {
   StopScan();
   string outputResult="";
   string xmlResult="";

   factory.graph.Report( out outputResult, out xmlResult );
   //after searching google there seems no better way then talking to
   // MFC and manually setting the scroll bars through COM interop
   // but it looks like that will be fixed in Widbey.
   mainForm.outputText.Text = outputResult;       
   mainForm.xmlText.Text = xmlResult;

  }
 }
}

/// <summary>
/// This event occurs after the KeyDown event and can be used to prevent
/// characters from entering the control.
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
 void urlText_KeyPress(object sender, System.Windows.Forms.KeyPressEventArgs e){

 lock( this ) {  
  if( e.KeyChar == '\r' ) {
   if( !mainForm.scanBox.Checked  ) {    
    mainForm.scanBox.Checked = true; // calls OnStartCheck() delegate
   }
  }
 }
}

/// <summary>
///   Start the scan with a new PagePool
/// </summary>
void StartScan(){

 // You can have your own customized page scrap by deriving from PageScanner
 // and passing in your own factory.  The factory makes one scanner per page to
 // be scanned.  Each page is then parse in its own thread with the scanner.

 // pass mainForm.patternText.Text to the factory - this regex expression limits
 // the search to a given site or set of sites. since we would like the search to terminate
 factory = new RegExPageScannerFactory( mainForm.patternText.Text );


 // the seed page
 // assume that the site is a connected graph and
 // you can reach all pages from the seed page
 UrlItem item = new UrlItem( "", mainForm.urlText.Text );
 Vertex vertex;
 bool isNovel;
 factory.graph.AddVertex( "", item.url.AbsoluteUri, "pending", out vertex, out isNovel );   
 pool = new PagePool( factory );
 pool.FirstEnqueue( item );
 
 oldResults="scanning";
 mainForm.scanBox.Text = "Scanning";
}


/// <summary>
///  Abort a scan.  All pending threads will be aborted.
/// </summary>
void StopScan() {
 oldResults = pool.ToString();       
 pool.Clear(); 
 mainForm.scanBox.Text = "Done";

}

/// <summary>
///   The Form controls should only be updated from inside the Form thread.
///  
///   Our base class AppDispatcherBase has a thread to manually
///   Invoke a form update every UpdateDisplayInterval milliseconds
///   which calls this delegate.
/// </summary>
override public void OnUpdateDisplay() {

 lock( this ) {  
  try {
   if( oldResults.Equals( "scanning" ) ) {
    string status="";
    string xmlResult="";
    bool isWorkLeft=false;
    pool.ReportStatus( out status, out isWorkLeft );
    mainForm.statusLabel.Text = status;

    if( !isWorkLeft ) {
     mainForm.scanBox.Checked  = false;
    }

    if( !isWorkLeft || mainForm.isActiveDisplay.Checked ) {
     string outputResult = "";
     factory.graph.Report( out outputResult, out xmlResult );
     //after searching google there seems no better way then talking to
     // MFC and manually setting the scroll bars through COM interop
     // but it looks like that will be fixed in Widbey.
     mainForm.outputText.Text = outputResult;       
     mainForm.xmlText.Text = xmlResult;
    }/// no more pages left to scan

   // factory.graph.Report( out outputResult, out xmlResult );


    //FIXME  doing this forces the scoll bar to the top.
   } else {

    mainForm.statusLabel.Text = "Ready";         
   }

  } catch( Exception ex ){
   mainForm.outputText.Text =  ex.Message;
  }
 }
}


/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()  {


 try{
   //

  //this config file will set various scan parameters
  WebSpiderConfig.DeserializeFile( "WebSpiderConfig.xml" );

  MainForm form = new MainForm();
  
  AppDispatcher dispatcher = new AppDispatcher( form );

  Application.Run( form );

 } catch( Exception ex ) {
  MessageBox.Show( ex.Message
   +"\r\n"+ ex.StackTrace
  );
 }
}


}  //public class AppDispatcher


} // namespace SiteMap

 



上一篇:将Gmail的联系人导入asp.net 下一篇:没有了.. 【加入收藏】【打印此文】