爬取东航机票信息

多图预警

听闻最近国外非国内的机票水涨船高,直到有一天

image-20200514144120051

image-20200514153912180

收益不错

image-20200514145153392

​ 红圈如果有数字 代表有机票

1.0版本

  • 想法很简单,模拟页面点击嘛,用selium配合chrome驱动来搞

需要关注的页面元素

image-20200514150034355

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import random
import smtplib
import time
from email.mime.text import MIMEText
from email.utils import formataddr

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

sender_addr = '[email protected]'
receiver_addr = '****@gmail.com'

cnt = 0

dd = None


def send_email(subj='机票来了', detail='机票来了', sender_nick='水盆羊肉', receiver_nick='收件人昵称'):
try:
msg = MIMEText(detail, 'plain', 'utf-8')
msg['From'] = formataddr([sender_nick, '[email protected]'])
msg['To'] = formataddr([receiver_nick, '*****@gmail.com'])
msg['Subject'] = subj
server = smtplib.SMTP_SSL("mail.takfu.cf", 465)
server.login(sender_addr, password='*******')
server.sendmail(sender_addr, [receiver_addr, ], str(msg))
server.quit() # 关闭连接
print("发送成功")
except Exception as e:
print("发送失败")
raise e


def run(sp, p):
target = sp.findAll('article', attrs={'class': 'flight'})
for elem in target:
tar = elem.findAll("section", attrs={'class': 'detail'})
if not tar:
continue
for e in tar:
dd = e.findAll('dd')
if not dd:
continue
for d in dd:
cls = d.get_attribute_list('class')
if not cls:
continue
for c in cls:
if str(c).startswith('price'):
print('需要起来抢票了TCC')
send_email(subj='机票来了', detail='需要起来抢票了TCC,地址:{}'.format(p))
break
else:
print('票还没到,安心睡觉')
break
break
break


def mock(p, driverPath):
global cnt
global dd

if cnt == 0:
try:
dd = webdriver.Chrome()
except:
dd = webdriver.Chrome(executable_path=driverPath)
cnt = cnt + 1
else:
dd.refresh()

print("正在打开网页...")
dd.get(p)
print("等待网页响应...")
wait = WebDriverWait(dd, 10)
# 需要等一下,直到页面加载完成
wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'summary')))
print("等待网页响应...")

wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'font-size12')))

soup = BeautifulSoup(dd.page_source, "html5lib")
# print('540' in soup.text) test
return soup


if __name__ == '__main__':
# path = 'http://www.ceair.com/booking/sha-kmg-200424_CNY.html'

path = r'http://www.ceair.com/booking/hnd-sha-200424_CNY.html'
print(
'请输入GoogleDriver存储地址!driver下载地址为:\nhttps://sites.google.com/a/chromium.org/chromedriver/downloads \n下载完以后记录存放地址'
'\n请核对当前chrome浏览器得版本,下载对应版本的driver!')
driverPath = input('请输入GoogleDriver存储地址')
print("你输入的内容是: ", driverPath)
print(r'请输入每次模拟刷新页面得频率范围输入最小与最大2个数字,列入 1 60,系统会从1秒到60秒中随机挑选时间模拟访问网站')
mn = input(r'输入最小秒数: ')
mx = input(r'输入最大秒数: ')
print(r'输入要跟踪的机票网页,例如: http://www.ceair.com/booking/hnd-sha-200424_CNY.html')
trackurl = input(r'输入跟踪网页,直接回车跟踪东京-上海4月24航班: ')

if trackurl:
path = trackurl

print("driver地址: %s \r\n最小: %s \r\n最大: %s \r\n跟踪地址: %s" % (driverPath, mn, mx, trackurl))

confirm = input(r'回车确认开始任务')

while True:
try:
soup = mock(path, driverPath)
run(soup, path)
time.sleep(random.randint(int(mn), int(mx)))
except:
pass

打包命令

1
pyinstaller -F -w -i gen.ico EasternHacker.py

运行效果

image-20200514152759321

image-20200514152330960

用户反馈

image-20200514151808122

image-20200514152503593

代码做了随机请求处理,防止封IP,但是这么实现有几个问题

  1. 窗口无法多开,要同时监控对个航线困难。
  2. 使用难度高,需要输入driver存储地址,朋友就只要一个应用程序,反复双击 ,双击几次监控几条线路。。。
  3. 要多开,要不占资源,就一台机器还得打游戏

显然这样不合适。

image-20200514154223979

image-20200514155440764

可以抢票,程序是对的,但是要进行多线路跟踪,这个套路无法复用,得从页面找出查询ticket API,进行模拟请求。

2.0版本

image-20200514155914753

​ 只要出现salePrice并且冒号后面有一个非0数字,那就说明有票了

image-20200514160239579

​ 模拟测试成功

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import json
import random
import smtplib
import time
from email.mime.text import MIMEText
from email.utils import formataddr

import requests

sender_addr = '[email protected]'
receiver_addr = '[email protected]'


def run(url, param, px):
if px:
proxies = {'http': '127.0.0.1:1081',
'https': '127.0.0.1:1081'
}
r = requests.post(url, param, proxies=proxies)
else:
r = requests.post(url, param)

r.raise_for_status()
jstr = r.text

air_info = json.loads(jstr)
airList = air_info['searchProduct']
list = []
for air in airList:
price = air['salePrice']
if price and int(price) > 0:
print('grab ok!')
vv = {}
vv.update({'售价': price, '限重': air['baggageAllowance'], '仓位': air['cabin']['baseCabinCode'],
'折扣': air['discount']})
list.append(vv)

print(list)
if list:
print('prepare send email')
send_email(subj='机票来了', detail='需要起来抢票了TCC,地址:{%s},信息\r\n %s' % (url, str(list)))
else:
print('无票 安耽睡觉')


def send_email(subj='机票来了', detail='机票来了', sender_nick='水盆羊肉', receiver_nick='收件人昵称'):
try:
msg = MIMEText(detail, 'plain', 'utf-8')
msg['From'] = formataddr([sender_nick, '[email protected]'])
msg['To'] = formataddr([receiver_nick, '*****@gmail.com'])
msg['Subject'] = subj
server = smtplib.SMTP_SSL("mail.takfu.cf", 465)
server.login(sender_addr, password='*******')
server.sendmail(sender_addr, [receiver_addr, ], str(msg))
server.quit() # 关闭连接
print("发送成功")
except Exception as e:
print("发送失败")
raise e


if __name__ == '__main__':

url = r'http://www.ceair.com/otabooking/flight-search!doFlightSearch.shtml?Cookie=Webtrends=b8a4785e.5a3afb412c4bd; path=/; expires=Sat, 17-Oct-20 ;JSESSIONID=ng4birPwneWpQDtLDipjQbth.laputaServer7; Path=/'

# 东京上海
param = {'_': '5ed26ab082ae11eaab73474c66149b97',
'searchCond': '{"adtCount":1,"chdCount":0,"infCount":0,"currency":"CNY","tripType":"OW","recommend":false,"reselect":"","page":"0","sortType":"a","sortExec":"a","segmentList":[{"deptCd":"HND","arrCd":"SHA","deptDt":"2020-04-24","deptAirport":"","arrAirport":"","deptCdTxt":"东京","arrCdTxt":"上海","deptCityCode":"TYO","arrCityCode":"SHA"}],"version":"A.1.0"}'}


mn = input(r'输入轮询区间最小秒数: ')
mx = input(r'输入轮询区间最大秒数: ')

print("轮询区间最小[/秒]: %s \r\n轮询区间最大[/秒]: %s " % (mn, mx))

px = input(r'是否需要开启代理?输入任意字符开启代理,直接回车不需要代理:')
confirm = input(r'回车确认开始任务')
while True:
try:
run(url, param, px)
time.sleep(random.randint(int(mn), int(mx)))
except:
print("some error occur ,job still running on")
pass

​ 可以通过控制param具体的请求参数来改变查询航班,可以再次更新版本,暂时写死不封装

体验&反馈

image-20200514160917500

image-20200514161057639

image-20200514161150299

结尾

抢票还在继续,设定的阈值范围不能太过分,被封了自己组扶墙使用代理,2.0客户端开启后可以连接代理监听端口。