[實作教學]使用Line Notify收到Dcard最新文章通知
透過官方帳號進行推播會因為人數越來越多而增加發送成本,因此 Line Notify是一個官方提供帳號來傳送通知,重要的是免費!!
首先登入到 LINE Notify 並進入到個人頁面
這樣到這邊 Line Notify 的設定就完成了,接著看程式碼 https://github.com/hardy1234554321/shockuccu-linenotify。
使用到的 python 模組
- requests:建立與 Dcard 之間連線,獲取我們要的網頁資料
- bs4:BeautifulSoup是一個用來解析HTML結構的Python套件
- re:利用正則表達式分析url
- time:用 time.sleep() 進行睡眠,降低發送 request 頻率
- datetime:取得當前時間
- sqlite3:簡單紀錄文章資訊
設定 Line Notify 參數設定
- token:傳送權證
- message:傳送要發送的訊息
- img:傳送要發送的圖片url
- isNotificationDisabled :是否要收到通知
def lineNotifyMessage(token, message, img, isNotificationDisabled = False):
headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/x-www-form-urlencoded"
}
payload = {
'message': message,
'imageThumbnail': img,
'imageFullsize': img,
'notificationDisabled': isNotificationDisabled
}
r = requests.post("https://notify-api.line.me/api/notify",
headers=headers, params=payload)
print(r.status_code)
time.sleep(1)
分析 Dcard 網頁資訊
def spider_dcard_sex(token):
my_headers = {
'cookie': 'over18=1;'
}
list_url = 'https://www.dcard.tw/f/sex?latest=true'
response = requests.get(list_url, headers=my_headers)
soup = bs4.BeautifulSoup(response.text, "html.parser")
articles = soup.find_all('a')
for ar in articles:
article_url = 'https://www.dcard.tw%s' % ar.get('href')
if '/f/sex/p/' not in article_url:
continue
# 檢查是否發送過
sql = "SELECT * FROM articles WHERE a_url = '%s'" % article_url
c.execute(sql)
result = c.fetchone()
if result != None:
print('暫無新文章 %s' % datetime.datetime.now())
time.sleep(1)
continue
# 載入文章
response = requests.get(article_url)
soup = bs4.BeautifulSoup(response.text, "html.parser")
# 檢查文章有沒有不見
content = soup.find('h1', 'sc-7mzcsk-2')
if content:
if content.string == 'Oh!文章不見了':
continue
# 標題
h1_title = soup.find('h1', 'sc-1932jlp-0')
title = h1_title.text
img_list = []
for img_url in soup.find_all('img'):
# 只抓圖檔連結
result = re.findall(
'https?://imgur.dcard.tw?\S+?/\S+?\.(?:jpg|gif|png)', img_url.get('src'))
if len(result) == 0:
continue
img_list.append(result[0])
# 內容轉文字
content = '\n'.join(img_list)
# 顯示
msg = '\n'
msg += '\n標題:%s' % title
msg += '\n網址:%s' % article_url
print(msg)
lineNotifyMessage(token=token, message=msg, img='')
# 紀錄文章資訊
sql = "INSERT INTO 'articles' ('id','a_url','a_author','a_title','a_type') VALUES (NULL,'%s','%s','%s','%s')" % (
article_url, 'dcard', title, 'DCARD')
c.execute(sql)
conn.commit()
for img_url in img_list:
img_msg = '\n%s' % (img_url)
print(img_msg)
lineNotifyMessage(token=token, message=img_msg, img=img_url, isNotificationDisabled=True)
time.sleep(1)
把 <access_token> 替換為剛剛的權證就完成了
if __name__ == "__main__":
while (1):
# create DATABASE
conn = sqlite3.connect('db/shockuccu.db')
c = conn.cursor()
# create TABLE
sql = "CREATE TABLE IF NOT EXISTS 'articles' (\
'id' INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\
'a_url' TEXT NOT NULL,\
'a_author' TEXT NOT NULL,\
'a_title' TEXT NOT NULL,\
'a_type' TEXT NOT NULL\
)"
c.execute(sql)
conn.commit()
# Dcard西斯版
token = '<access_token>'
spider_dcard_sex(token)
conn.close()
留言
張貼留言