المساعد الشخصي الرقمي

مشاهدة النسخة كاملة : Help needed with simple Web Spider Code



C# Programming
08-26-2009, 06:28 AM
Hi folks I know there are lots of different free ones out there.
But I wanted to have a go at somthing quick as a learning exercise.

I got as far as the code below, but am having trouble putting found URL's (i.e. 'eachLineValue') from a web page back into the WebRequest.Create(URL)

Do i need some sort of function around this that I can call recursively with different URL's ?

thanks Mark


using System;
using System.Collections.Generic;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;


namespace ShoppingDataExtractor
{
public partial class spider : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// used to build entire input
StringBuilder sb = new StringBuilder();

// used on each read operation
byte[] buf = new byte[8192];

string initialURL = "http://www.bestpricedirectory.com.au/";

try
{
WebRequest myRequest = WebRequest.Create(initialURL);

// Return the response.
WebResponse myResponse = myRequest.GetResponse();

Response.Write(myResponse);
// Code to use the WebResponse goes here.

Stream resStream = myResponse.GetResponseStream();

string tempString = null;
int count = 0;

do
{
// fill the buffer with data
count = resStream.Read(buf, 0, buf.Length);

// make sure we read some data
if (count != 0)
{
// translate from bytes to ASCII text
tempString = Encoding.ASCII.GetString(buf, 0, count);

// continue building the string
sb.Append(tempString);
}
}
while (count > 0); // any more data to read?


Response.Write(sb.ToString());

// regex to find all html links

// next - to use recursion to follow all links and write out


Regex r = new Regex(@"href=\""(.*?)\""",

RegexOptions.IgnoreCase | RegexOptions.Compiled);

MatchCollection matches = r.Matches(sb.ToString());

foreach (Match match in matches)
{
Array values = match.Value.ToCharArray();

foreach (System.Char line in values)
{
//Response.Write("{0} ", line);
File.AppendAllText(@"C:/Documents and Settings/Administrator/My Documents/Visual Studio 2008/Projects/ShoppingDataExtractor/spider.txt", ({0}) ,line);
sb.ToString();

//File.AppendAllText(@"C:/Documents and Settings/Administrator/My Documents/Visual Studio 2008/Projects/ShoppingDataExtractor/spider.txt", match.Value + Environment.NewLine);
}

// Close the response to free resources.
}
myResponse.Close();
resStream.Close();
}
catch (Exception ex)
{
Response.Write(ex.ToString());
}

}
}
}