-
Notifications
You must be signed in to change notification settings - Fork 1
/
booking_room_facility.py
118 lines (93 loc) · 3.97 KB
/
booking_room_facility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# coding=utf-8
import pprint
import csv
import click
import requests
import datetime as datetime
# from splinter import Browser
import time
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import WebDriverException
def validate_d(date_text):
try:
datetime.datetime.strptime(date_text, '%Y-%m-%d')
except ValueError:
raise ValueError("Incorrect data format, should be YYYY-MM-DD")
def daterange(start_date, end_date):
for n in range(int ((end_date - start_date).days)):
yield start_date + datetime.timedelta(n)
def check_exists_by_css_selector(element, selector):
try:
element.find_element_by_css_selector(selector)
except NoSuchElementException:
return False
return True
@click.command()
def booking():
pp = pprint
res = []
# with Browser() as b:
driver = webdriver.Firefox()
driver.implicitly_wait(10)
# driver = webdriver.Ie()
wait_s = 3
wait_m = 10
wait_l = 15
# with open('output_booking_hotel_osaka_href.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_okinawa.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_sapporo.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_nagoya.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_kobe.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_fukuoka.csv', encoding='utf-8-sig') as csvfile:
# with open('output_booking_hotel_href_phuket.csv', encoding='utf-8-sig') as csvfile:
with open('output_booking_hotel_href_phuket2.csv', encoding='utf-8-sig') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row['hotel_href']:
url = row['hotel_href']
driver.get(url)
element = WebDriverWait(driver, 20).until(
lambda driver: driver.execute_script("return $.active == 0")
)
time.sleep(wait_m)
# gta_keys.append(row['hotel_href'])
# pp.pprint(entry['hotel_name'])
# pp.pprint(driver.find_element_by_css_selector('h2#hp_hotel_name').text)
rps = driver.find_elements_by_css_selector('td.roomType.room-type-container.rt__room-detail.rt__room-detail--legibility ')
for rp in rps:
entry = {}
entry['hotel_name'] = driver.find_element_by_css_selector('h2#hp_hotel_name').text
entry['hotel_href'] = url
entry['room_name'] = rp.find_element_by_css_selector('a.jqrt.togglelink.js-track-hp-rt-room-name').text
if check_exists_by_css_selector(rp, 'div.iconfont_wrapper > span'):
facilities = rp.find_elements_by_css_selector('div.iconfont_wrapper > span')
for facility in facilities:
entry[facility.text] = 'Y'
if check_exists_by_css_selector(rp, 'a.more_facilities.rt-show-more-facilities'):
rp.find_element_by_css_selector('a.more_facilities.rt-show-more-facilities').click()
facilities = rp.find_elements_by_css_selector('div.rt-all-facilities-hidden > span')
for facility in facilities:
entry[facility.text] = 'Y'
# pp.pprint(entry)
res.append(entry)
driver.quit()
field_names = set()
for entry in res:
field_names |= set(entry.keys())
#
keys = res[0].keys()
with open('output_hotel_facilities_room_' + datetime.datetime.now().strftime('%y%m%d_%H%M') + '.csv', 'w', encoding='utf-8') as output_file:
# dict_writer = csv.DictWriter(output_file, keys)
dict_writer = csv.DictWriter(output_file, field_names)
dict_writer.writeheader()
dict_writer.writerows(res)
if __name__ == '__main__':
booking()