You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
69 lines
1.9 KiB
69 lines
1.9 KiB
<?php |
|
/** |
|
* Created by IntelliJ IDEA. |
|
* User: luwei |
|
* Date: 2017/9/9 |
|
* Time: 11:53 |
|
*/ |
|
|
|
namespace app\models; |
|
|
|
use Curl\Curl; |
|
use yii\helpers\VarDumper; |
|
|
|
/** |
|
* 数据采集 |
|
*/ |
|
class DataAcquisition |
|
{ |
|
public static function getData($url) |
|
{ |
|
preg_match("/^https{0,1}:\/\/[\w\.]*\//", $url, $res); |
|
if (!(is_array($res) && count($res) > 0 && $res[0])) { |
|
return null; |
|
} |
|
$host = $res[0]; |
|
$host = preg_replace("/https{0,1}:\/\//", "", $host); |
|
$host = preg_replace("/\//", "", $host); |
|
$res = null; |
|
if ($host == 'item.taobao.com') { |
|
$res = self::getTaobaoData($url); |
|
} |
|
//VarDumper::dump($res, 3, 1); |
|
} |
|
|
|
public static function getTaobaoData($url) |
|
{ |
|
$curl = new Curl(); |
|
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false); |
|
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false); |
|
$curl->setOpt(CURLOPT_HEADER, ''); |
|
$curl->get($url); |
|
$res = mb_convert_encoding($curl->response, 'utf-8', 'gbk'); |
|
$pattern_list = [ |
|
'title' => "/<h3.*>.*<\/h3>/", |
|
]; |
|
$data = []; |
|
foreach ($pattern_list as $key => $pattern) { |
|
preg_match_all($pattern, $res, $val); |
|
$data[$key] = $val; |
|
} |
|
|
|
VarDumper::dump($data, 5, 1); |
|
//return $res; |
|
} |
|
|
|
private static function gbkToUtf8($str) |
|
{ |
|
$pregstr = "/[\x{4e00}-\x{9fa5}]+/u";//UTF-8中文正则 |
|
if (preg_match_all($pregstr, $str, $matchArray)) {//匹配中文,返回数组 |
|
foreach ($matchArray[0] as $key => $val) { |
|
$url = str_replace($val, urlencode($val), $str);//将转译替换中文 |
|
} |
|
if (strpos($str, ' ')) {//若存在空格 |
|
$url = str_replace(' ', '%20', $str); |
|
} |
|
} |
|
return $str; |
|
} |
|
}
|
|
|