news-classifier/crawler-module/tencent-serach-ai.py

import requests

def fetch_qq_news_links():
    url = "https://i.news.qq.com/gw/pc_search/result"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Referer": "https://new.qq.com/",
        "Origin": "https://new.qq.com",
        "Content-Type": "application/x-www-form-urlencoded"
    }

    # 依然使用 search_type="all" 以保证请求成功率
    payload = {
        "page": "1",
        "query": "AI",
        "is_pc": "1",
        "hippy_custom_version": "25",
        "search_type": "all",
        "search_count_limit": "10",
        "appver": "15.5_qqnews_7.1.80"
    }

    try:
        response = requests.post(url, data=payload, headers=headers)

        if response.status_code == 200:
            data = response.json()

            # 遍历 secList -> newsList
            sec_list = data.get('secList', [])
            for section in sec_list:
                if 'newsList' in section:
                    for item in section['newsList']:
                        # 获取链接 (优先 url，没有则 surl)
                        link = item.get('url') or item.get('surl')

                        # 简单的过滤：排除视频（根据需要可保留或删除）
                        # if 'video_channel' in item: continue

                        if link:
                            print(link)
        else:
            print(f"请求失败: {response.status_code}")

    except Exception as e:
        print(f"发生错误: {e}")

if __name__ == "__main__":
    fetch_qq_news_links()