百度微博热点监控,可写入数据库做数据分析。方便吃瓜

一个渣渣PHP自学者 2021-02-16 PM 292℃ 0条

**

代码没啥难度 就不做解释了 直接上代码了

**
baidu.png

百度的代码

<?php
header("Content-type: text/html; charset=utf-8");
class Baidu
{
    public function get_url($url, $header)
    {
        $baiduUrl = "https://www.baidu.com/s?cl=3&tn=baidutop10&fr=top1000&wd=";
        //初始化curl()
        $ch = curl_init();
        //设置curl
        curl_setopt($ch, CURLOPT_URL, $url); //设置目标url
        curl_setopt($ch, CURLOPT_HEADER, false);

        //设置header
        curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
        curl_setopt($ch, CURLOPT_HEADER, 0); //返回response头部信息
        //要求结果为字符串且输出到屏幕上
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        //规避SSL验证
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        //跳过HOST验证
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
        //运行curl
        $data = curl_exec($ch);

        curl_close($ch);
        $data = mb_convert_encoding($data, 'UTF-8', 'UTF-8,GBK,GB2312,BIG5');

        //正则
        /**
         * preg_match_all(pattern,subject,matches)  
         * pattern要搜索的模式,字符串形式。
         * subject  输入字符串。
         * matches  多维数组,作为输出参数输出所有匹配结果, 数组排序通过flags指定。
         */
        preg_match_all('/<a class="list-title" .*?>(.*?)<\/a>/s', $data, $baidu_hots); //热搜名
        preg_match_all('/<span class="icon-(.*?)>(.*?)<\/span>/U', $data, $baidu_sum); //热搜指数
        preg_match_all('/<a class="list-title" target="_blank" href=\"(.*?)\".*?>(.*?)<\/a>/s', $data, $baidu_url);      //url

        foreach ($baidu_url[1] as $v => $k) {
            $arr[] = ["keyword" => htmlspecialchars_decode($baidu_hots[1][$v]), "ranking" => $baidu_sum[2][$v], "url" => $baidu_url[1][$v]];
        }
        return json_encode($arr, 448);
    }
}

$a = new Baidu();
$header = [];
echo $a->get_url("http://top.baidu.com/buzz?b=1&fr=topindex", $header);

微博的代码

weibo.png

<?php
class  Curl
{
    //微博热点监控
    public function Get_url_wb($url, $header)
    {
        $wburl = "https://s.weibo.com/weibo?q=";
        // 1. 初始化 curl
        $ch = curl_init();
        // 设置URL和相应的选项
        curl_setopt($ch, CURLOPT_URL, $url); //设置目标url
        curl_setopt($ch, CURLOPT_HEADER, false);
        //设置header
        curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
        curl_setopt($ch, CURLOPT_HEADER, 0); //返回response头部信息
        //要求结果为字符串且输出到屏幕上
        $useragent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0';
        curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        //规避SSL验证
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        //跳过HOST验证
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
        //运行curl
        $data = curl_exec($ch);
        curl_close($ch);
        preg_match_all('/top" target="_blank">(.*?)<\/a>/U', $data, $Hot_search); //热搜名
        preg_match_all('/<span>(.*?)<\/span>/U', $data, $Hotspot); //热度
        preg_match_all('/href="\/weibo\?q=(.*?)Refer=top/U', $data, $Hoturl); //热度url
        /**
         * 合并输出
         */
        for ($i = 0; $i < count($Hoturl[1]); $i++) {
            $Hoturl[1][$i] = $wburl . $Hoturl[1][$i];
        }
        /**
         * (各项输出)另一种格式的json输出
         */
        // for ($i = 0; $i < count($Hoturl[1]); $i++) {
        //     $Hoturl[1][$i] = $wburl . $Hoturl[1][$i];
        // }
        // foreach ($Hoturl[1] as $v => $k) {
        //     $arr[] = ["keyword" => $Hot_search[1][$v], "ranking" => $Hotspot[1][$v], "url" => $k];
        // }

        $arr = ["Hot_search" => $Hot_search[1], "Hotspot" => $Hotspot[1], "Hoturl" => $Hoturl[1]];
        return json_encode($arr, 448);
    }
}

$a = new Curl();
$header = array();

echo $a->Get_url_wb("https://s.weibo.com/top/summary", $header);

微博前端的代码

<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="./echarts.min.js"></script>
    <title>Hot</title>
</head>

<body>
    <div style="text-align: center;margin-top: 20px;">微博热榜</div>
    <div id="main" style="width: 100%;height:1000px;"></div>
    <hr>
</body>
<script src="https://cdn.staticfile.org/jquery/2.2.4/jquery.min.js"></script>
<script>
    GetData();
    function GetData() {
        $.ajax({
            type: "get",        //数据提交方式(post/get)
            url: "weibo.php",    //提交到的url
            dataType: "json",    //返回的数据类型格式
            success: function (msg) {
                //返回成功的回调函数
                if (msg != '') {
                    // var data = eval(msg); //将返回的json数据进行解析,并赋给data
                    var keywords = msg.Hot_search;
                    var value = msg.Hotspot;
                    CreateBar(keywords.reverse(), value.reverse());
                    setInterval("GetData()", 30000); // 间隔10S
                }
            }
        });
    }
    function CreateBar(keywords, value) {
        //初始化echarts实例
        var myChart = echarts.init(document.getElementById('main'));
        myChart.on('click', function (param) {
            window.open('#');
        });
        //指定图标的配置和数据
        var option = {
            title: {
                text: ''
            },
            tooltip: {},
            grid: {
                top: "5%",
                left: "16%",
                bottom: "5%"
            },
            legend: {
                data: ['热搜词']
            },
            xAxis: {
            },
            yAxis: {
                data: keywords
            },
            series: [{
                name: '搜索量',
                type: 'bar',
                itemStyle: {
                    normal: {
                        color: '#ff9406'
                    }
                },
                data: value
            }]
        };
        myChart.setOption(option);
    }
</script>
</html>

微博后端的代码引用 九霄道长博客的代码

非特殊说明,本博所有文章均为博主原创。

评论啦~