直接用php正则写更好,就不用转这个了。例子:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
<?php
$str = file_get_contents("<a href="http://www.27.cn/kutu/2475/1613221.html
"" target="_blank">http://www.27.cn/kutu/2475/1613221.html
"</a>);
<a href="https://www.baidu.com/s?wd=var_dump&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1d9nvf3PHF9PhDknH0LuHRz0ZwV5Hcvrjm3rH6sPfKWUMw85HfYnjn4nH6sgvPsT6K1TL0qnfK1TL0z5HD0IgF_5y9YIZ0lQzqlpA-bmyt8mh7GuZR8mvqVQL7dugPYpyq8Q1cvn1bLPjm4nHR4rjn3nWmYPf
" target="_blank" class="baidu-highlight">var_dump</a>(gPic_Url($str));
//提取URL
function gFile_Url($content){
<a href="https://www.baidu.com/s?wd=preg_match_all&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1d9nvf3PHF9PhDknH0LuHRz0ZwV5Hcvrjm3rH6sPfKWUMw85HfYnjn4nH6sgvPsT6K1TL0qnfK1TL0z5HD0IgF_5y9YIZ0lQzqlpA-bmyt8mh7GuZR8mvqVQL7dugPYpyq8Q1cvn1bLPjm4nHR4rjn3nWmYPf
" target="_blank" class="baidu-highlight">preg_match_all</a>("'<\s*a\s.*?href\s*=\s*([\"\'])?(?(1)(.*?)\\1|([^\s\>]+))[^>]*>?(.*?)</a>'isx",$content,$links);
while(list($key,$val) = each($links[2])) {
if(!empty($val))
$match[] = $val;
}
while(list($key,$val) = each($links[3])) {
if(!empty($val))
$match[] = $val;
}
return $match;
}
//提取图片
function gPic_Url($content){
$pattern="/<[img|IMG].*?src=[\'|\"](.*?(?:[\.gif|\.jpg]))[\'|\"].*?[\/]?>/";//正则
<a href="https://www.baidu.com/s?wd=preg_match_all&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1d9nvf3PHF9PhDknH0LuHRz0ZwV5Hcvrjm3rH6sPfKWUMw85HfYnjn4nH6sgvPsT6K1TL0qnfK1TL0z5HD0IgF_5y9YIZ0lQzqlpA-bmyt8mh7GuZR8mvqVQL7dugPYpyq8Q1cvn1bLPjm4nHR4rjn3nWmYPf
" target="_blank" class="baidu-highlight">preg_match_all</a>($pattern,$content,$match);//匹配图片
return $match[1];//返回所有图片的路径
}
?>