news-classifier/crawler-module/tencent-serach-ai.py

50 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
def fetch_qq_news_links():
url = "https://i.news.qq.com/gw/pc_search/result"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://new.qq.com/",
"Origin": "https://new.qq.com",
"Content-Type": "application/x-www-form-urlencoded"
}
# 依然使用 search_type="all" 以保证请求成功率
payload = {
"page": "1",
"query": "AI",
"is_pc": "1",
"hippy_custom_version": "25",
"search_type": "all",
"search_count_limit": "10",
"appver": "15.5_qqnews_7.1.80"
}
try:
response = requests.post(url, data=payload, headers=headers)
if response.status_code == 200:
data = response.json()
# 遍历 secList -> newsList
sec_list = data.get('secList', [])
for section in sec_list:
if 'newsList' in section:
for item in section['newsList']:
# 获取链接 (优先 url没有则 surl)
link = item.get('url') or item.get('surl')
# 简单的过滤:排除视频(根据需要可保留或删除)
# if 'video_channel' in item: continue
if link:
print(link)
else:
print(f"请求失败: {response.status_code}")
except Exception as e:
print(f"发生错误: {e}")
if __name__ == "__main__":
fetch_qq_news_links()