css扒皮
class Grabstyle {
public $styles = array();
public $styles1 = array();
public $links = array();
public $class = array();
public $CSS = array();
public $id = array();
public $newStyles = array();
public $html = '';
public $url = '';
public function parseHtml($html) {
preg_match_all("~href=\"([^\"]+\.css)(?:.*?)\"[^>]*>~i",$html,$links);
$this->links = $links[1];
foreach((array)$this->links as $link) {
$cssContent = file_get_contents($link);
preg_match_all("/url\(\s*(?:[\"\'])?(.*?)\s*(?:[\"\'])?\)|src\=(?:[\"\'])([^'\"]+)/isx", $cssContent, $imagesURLArray);
$imagesURLArray = array_unique(array_filter(array_merge((array)$imagesURLArray[1], $imagesURLArray[2])));
$imagesEXPArray = $this->_expandlinks($imagesURLArray, $link);
foreach((array)$imagesEXPArray as $key=>$val) {
$cssContent = str_replace($imagesURLArray[$key], $val, $cssContent);
}
$this->parseStyle($cssContent);
}
$this->styles1 = $this->styles;
$this->getResetStyle();
//解析calss
preg_match_all("~ class=\"([^\"]+)~i",$html,$all);
$this->class = $all[1];
array_walk($this->class, array($this, 'getStyle'), 'class');
//解析id
preg_match_all("~ id=\"([^\"]+)~i",$html,$all);
$this->id = $all[1];
array_walk($this->id, array($this, 'getStyle'));
foreach($arr=array_unique($this->CSS) as $key) {
$this->newStyles[$key] = "$key { " . $this->styles1[$key] ." }";
}
return implode("\n", $this->newStyles);
}
public function getResetStyle() {
foreach($this->styles1 as $key=>$val) {
if($key{0} != '.' && $key{0} != '#') {
$this->CSS[] = $key;
unset($this->styles[$key]);
}
}
}
public function getStyle($key, $key2, $type='') {
$key = ($type == 'class' ? '.' : '#').$key;
foreach($stylekey = array_keys($this->styles) as $val) {
if(strstr($val, $key)) {
$this->CSS[] = $val;
unset($this->styles[$val]);
}
}
}
public function parseUrl($url) {
$this->url = $url;
$this->html = file_get_contents($this->url);
return $this->parseHtml($this->html);
}
public function parseStyle($css) {
preg_match_all("~(.*?)\{(.*?)\}~isx",$css,$style);
$this->styles = array_merge($this->styles, array_combine($style[1], $style[2]));
//d($this->styles);
}
public function _expandlinks($links,$URI) {
preg_match("/^[^\?]+/",$URI,$match);
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
$match = preg_replace("|/$|","",$match);
$match_part = parse_url($match);
$match_root =
$match_part["scheme"]."://".$match_part["host"];
$search = array( "|^http://".preg_quote($match_part["host"])."|i",
"|^(\/)|i",
"|^(?!http://)(?!mailto:)|i",
"|/\./|",
"|/[^\/]+/\.\./|"
);
$replace = array( "",
$match_root."/",
$match."/",
"/",
"/"
);
$expandedLinks = preg_replace($search,$replace,$links);
return $expandedLinks;
}
}
function d($var){
echo '';
print_r($var);
echo '
';
}
//<\s*a\s.*?href\s*=\s*([\"\'])?(?(1)(.*?)\\1|([^\s\>]+)) [^>]*>?(.*?)</a>
$obj = new Grabstyle();
echo $obj->parseUrl('t.htm');
//$c = $obj->_expandlinks('./baidu/ss.html','http://a.com/tttttt/aa.html');
//d($c);
</pre>