news-classifier/crawler-module/kr36-health.txt


这是36kr关于爬取健康相关新闻的代码
```python
import requests
from bs4 import BeautifulSoup
import re

URL = "https://www.36kr.com/search/articles/%E5%81%A5%E5%BA%B7"
TARGET_URL = "https://www.36kr.com"
headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
}
resp = requests.get(URL,headers=headers,timeout=10)
resp.raise_for_status()
resp.encoding = "utf-8"
# print(resp.text)
with open("example/example-11.html","r",encoding="utf-8") as f:
    html = f.read()

# soup = BeautifulSoup(resp.text,"lxml")
soup = BeautifulSoup(html,"lxml")
li_list = soup.select("div.kr-layout div.kr-layout-main div.kr-layout-content div.kr-search-result-list ul.kr-search-result-list-main > li")

for item in li_list:
    a = item.select_one("div.kr-shadow-content a")
    href = TARGET_URL+ a.get("href")
    print(href)

```