本文实例讲述了php解析字符串里所有URL地址的方法。分享给大家供大家参考。具体如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
<?php
// $html = the html on the page
// $current_url = the full url that the html came from
//(only needed for $repath)
// $repath = converts ../ and / and // urls to full valid urls
function pageLinks( $html , $current_url = "" , $repath = false){
preg_match_all( "/\<a.+?href=(\"|')(?!javascript:|#)(.+?)(\"|')/i" , $html , $matches );
$links = array ();
if (isset( $matches [2])){
$links = $matches [2];
}
if ( $repath && count ( $links ) > 0 && strlen ( $current_url ) > 0){
$pathi = pathinfo ( $current_url );
$dir = $pathi [ "dirname" ];
$base = parse_url ( $current_url );
$split_path = explode ( "/" , $dir );
$url = "" ;
foreach ( $links as $k => $link ){
if (preg_match( "/^\.\./" , $link )){
$total = substr_count( $link , "../" );
for ( $i = 0; $i < $total ; $i ++){
array_pop ( $split_path );
}
$url = implode( "/" , $split_path ) . "/" . str_replace ( "../" , "" , $link );
} elseif (preg_match( "/^\/\//" , $link )){
$url = $base [ "scheme" ] . ":" . $link ;
} elseif (preg_match( "/^\/|^.\//" , $link )){
$url = $base [ "scheme" ] . "://" . $base [ "host" ] . $link ;
} elseif (preg_match( "/^[a-zA-Z0-9]/" , $link )){
if (preg_match( "/^http/" , $link )){
$url = $link ;
} else {
$url = $dir . "/" . $link ;
}
}
$links [ $k ] = $url ;
}
}
return $links ;
}
header( "content-type: text/plain" );
$html = file_get_contents ( $url );
// Gets links from the page:
print_r(pageLinks( $html ));
// Gets links from the page and formats them to a full valid url:
print_r(pageLinks( $html , $url , true));
|
希望本文所述对大家的php程序设计有所帮助。