This piece of code shows how to extract the SRC URL from the IMG element in HTML code, using a regular expression (RegEx). Every match is put into an Array.
public static ArrayList ExtractAllImagesFromHTMLbyURL(string lv_HTML)
{
ArrayList lv_Images = new ArrayList();
try
{
//Find SRC URL from IMG tag
Regex lv_FindAllImages = new Regex(@"]*src\s*=\s*[\""\']?(?
[^""'>\s]*)[\""\']?[^>]*>");
// get all the matches depending upon the regular expression
// and add them to the array.
MatchCollection mMatchCollection = lv_FindAllImages.Matches(lv_HTML);
foreach(Match mMatch in mMatchCollection)
{
string lv_Image = mMatch.Groups["ImageFile"].Value;
lv_Images.Add(lv_Image);
}
return lv_Images;
}
}
Posted by Xander Zelders

0 Comments:
Post a Comment
<< Home