酷航
大數學堂
網路爬蟲(Crawler)基礎教學
http://www.largitdata.com/course_list/1
http://www.largitdata.com/course_list/8
網路爬蟲(Crawler)實戰教學
- requirement
sudo apt-get install python-pip
sudo pip install requests
sudo pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup as bs
payload = {
'availabilitySearch.SearchInfo.SearchStations[0].DepartureStationCode':'TPE',
'availabilitySearch.SearchInfo.SearchStations[0].ArrivalStationCode':'NRT',
'availabilitySearch.SearchInfo.SearchStations[0].DepartureDate':'8/17/2016',
'availabilitySearch.SearchInfo.SearchStations[1].DepartureStationCode':'',
'availabilitySearch.SearchInfo.SearchStations[1].ArrivalStationCode':'',
'availabilitySearch.SearchInfo.SearchStations[1].DepartureDate':'9/13/2016',
'availabilitySearch.SearchInfo.Direction':'Return',
'availabilitySearch.SearchInfo.AdultCount':'1',
'availabilitySearch.SearchInfo.ChildrenCount':'0',
'availabilitySearch.SearchInfo.InfantCount':'0',
'availabilitySearch.SearchInfo.PromoCode':'',
}
rs = requests.session()
res = rs.post("http://makeabooking.flyscoot.com/?culture=zh-TW", data=payload, verify=False)
res2 = rs.get("http://makeabooking.flyscoot.com/Flight/Select", verify=False)
#print res2.text
soup = bs(res2.text, "lxml")
for li in soup.select('.departure li'):
if len(li.select('.flightdate')) > 0:
print li.select('.flightdate')[0].text, li.select('.flight_price')[0].text