diff --git a/core/phpspider.php b/core/phpspider.php index d330270..a92ea5c 100644 --- a/core/phpspider.php +++ b/core/phpspider.php @@ -38,7 +38,7 @@ class phpspider * 版本号 * @var string */ - const VERSION = '2.1.0'; + const VERSION = '2.1.1'; /** * 爬虫爬取每个网页的时间间隔,0表示不延时, 单位: 毫秒 @@ -156,7 +156,7 @@ class phpspider 'headers' => array(), // 此url的Headers, 可以为空 'params' => array(), // 发送请求时需添加的参数, 可以为空 'context_data'=> '', // 此url附加的数据, 可以为空 - 'proxies' => false, // 是否使用代理 + 'proxy' => false, // 是否使用代理 'try_num' => 0 // 抓取次数 'max_try' => 0 // 允许抓取失败次数 ) @@ -361,7 +361,7 @@ function __construct($configs = array()) } $configs['name'] = isset($configs['name']) ? $configs['name'] : 'phpspider'; - $configs['proxies'] = isset($configs['proxies']) ? $configs['proxies'] : array(); + $configs['proxy'] = isset($configs['proxy']) ? $configs['proxy'] : false; $configs['user_agent'] = isset($configs['user_agent']) ? $configs['user_agent'] : self::AGENT_PC; $configs['client_ip'] = isset($configs['client_ip']) ? $configs['client_ip'] : array(); $configs['interval'] = isset($configs['interval']) ? $configs['interval'] : self::INTERVAL; @@ -1233,9 +1233,9 @@ public function request_url($url, $link = array()) } // 是否设置了代理 - if (!empty($link['proxies'])) + if ($link['proxy']) { - requests::set_proxies($link['proxies']); + requests::set_proxy($link['proxy']); } // 如何设置了 HTTP Headers @@ -1610,9 +1610,9 @@ public function link_compress($link) unset($link['context_data']); } - if (empty($link['proxies'])) + if (empty($link['proxy'])) { - unset($link['proxies']); + unset($link['proxy']); } if (empty($link['try_num'])) @@ -1651,7 +1651,7 @@ public function link_uncompress($link) 'headers' => isset($link['headers']) ? $link['headers'] : array(), 'params' => isset($link['params']) ? $link['params'] : array(), 'context_data' => isset($link['context_data']) ? $link['context_data'] : '', - 'proxies' => isset($link['proxies']) ? $link['proxies'] : self::$configs['proxies'], + 'proxy' => isset($link['proxy']) ? $link['proxy'] : self::$configs['proxy'], 'try_num' => isset($link['try_num']) ? $link['try_num'] : 0, 'max_try' => isset($link['max_try']) ? $link['max_try'] : self::$configs['max_try'], 'depth' => isset($link['depth']) ? $link['depth'] : 0, diff --git a/core/requests.php b/core/requests.php index 183b2b3..4a32b75 100644 --- a/core/requests.php +++ b/core/requests.php @@ -82,9 +82,9 @@ public static function set_timeout($timeout) * @author seatle * @created time :2016-09-18 10:17 */ - public static function set_proxies($proxies) + public static function set_proxy($proxy) { - self::$proxies = $proxies; + self::$proxies = is_array($proxy) ? $proxy : array($proxy); } /** diff --git a/core/selector.php b/core/selector.php index c32e446..4d8d001 100644 --- a/core/selector.php +++ b/core/selector.php @@ -177,6 +177,8 @@ private static function _xpath_select($html, $selector, $remove = false) private static function _css_select($html, $selector, $remove = false) { $selector = self::css_to_xpath($selector); + //echo $selector."\n"; + //exit("\n"); return self::_xpath_select($html, $selector, $remove); // 如果加载的不是之前的HTML内容,替换一下验证标识 //if (self::$dom_auth['css'] != md5($html)) @@ -271,6 +273,7 @@ public static function css_to_xpath($selectors) { $xquery .= '*'; } + // ID用精确查询 $xquery .= "[@id='".substr($s, 1)."']"; } // CLASSES @@ -280,7 +283,8 @@ public static function css_to_xpath($selectors) { $xquery .= '*'; } - $xquery .= '[@class]'; + // CLASS用模糊查询 + $xquery .= "[contains(@class,'".substr($s, 1)."')]"; } // ATTRIBUTES else if ($s[0] == '[')