1
1
import os
2
2
import time
3
+
3
4
from selenium import webdriver
5
+ from selenium .webdriver .chrome .options import Options
4
6
from selenium .webdriver .chrome .service import Service
5
7
from selenium .webdriver .common .by import By
6
- from selenium .webdriver .support .ui import WebDriverWait
7
8
from selenium .webdriver .support import expected_conditions as EC
8
- from selenium .webdriver .chrome . options import Options
9
+ from selenium .webdriver .support . ui import WebDriverWait
9
10
10
11
11
- def setup_driver ():
12
- print (" Setting up Chrome driver..." )
12
+ def setup_driver (cookie_value ):
13
+ print (' Setting up Chrome driver...' )
13
14
chrome_options = Options ()
14
15
16
+ # Basic Chrome options
15
17
chrome_options .add_argument ('--headless=new' )
16
18
chrome_options .add_argument ('--no-sandbox' )
17
19
chrome_options .add_argument ('--disable-dev-shm-usage' )
18
20
chrome_options .add_argument ('--window-size=1920,1080' )
19
21
20
- # Add user agent to avoid detection
22
+ # Add headers
23
+ chrome_options .add_argument ('--accept-encoding=gzip, deflate, br, zstd' )
24
+ chrome_options .add_argument ('--accept-language=en,cs;q=0.9' )
21
25
chrome_options .add_argument (
22
- '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' )
26
+ '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
27
+ )
23
28
24
- print (" Chrome options configured, initializing driver..." )
29
+ print (' Chrome options configured, initializing driver...' )
25
30
26
31
try :
27
32
service = Service ()
28
33
driver = webdriver .Chrome (service = service , options = chrome_options )
29
34
driver .set_page_load_timeout (30 )
30
- print ("Chrome driver initialized successfully" )
31
- return driver
32
- except Exception as e :
33
- print (f"Failed to initialize Chrome driver: { str (e )} " )
34
- raise
35
-
36
-
37
- def verify_cookie (cookie_value ):
38
- if not cookie_value :
39
- raise ValueError ("Cookie value is empty" )
40
- print (f"Cookie length: { len (cookie_value )} characters" )
41
- return cookie_value
42
-
43
35
44
- def check_page_state (driver ):
45
- """Debug page state"""
46
- print ("\n Page debugging information:" )
47
- print (f"Current URL: { driver .current_url } " )
48
- print (f"Page title: { driver .title } " )
49
- print ("\n First 500 characters of page source:" )
50
- print (driver .page_source [:500 ])
36
+ # Set up cookie before any navigation
37
+ print ('Setting up cookie...' )
38
+ driver .get ('https://adventofcode.com/robots.txt' )
39
+ driver .delete_all_cookies ()
51
40
52
- # Try to find any elements to verify page is loading
53
- all_elements = driver .find_elements (By .TAG_NAME , "*" )
54
- print (f"\n Total elements found on page: { len (all_elements )} " )
41
+ driver .add_cookie (
42
+ {
43
+ 'name' : 'session' ,
44
+ 'value' : cookie_value ,
45
+ }
46
+ )
55
47
56
- # Try to find main element
57
- main_elements = driver .find_elements ( By . TAG_NAME , "main" )
58
- print (f"Number of <main> elements : { len ( main_elements ) } " )
48
+ # Verify cookie
49
+ cookies = driver .get_cookies ( )
50
+ print (f"Cookies after setting : { [ f" { c [ 'name' ] } = { c [ 'domain' ] } " for c in cookies ] } " )
59
51
60
- # Try to find pre elements
61
- pre_elements = driver .find_elements (By .TAG_NAME , "pre" )
62
- print (f"Number of <pre> elements: { len (pre_elements )} " )
52
+ print ('Chrome driver initialized successfully' )
53
+ return driver
54
+ except Exception as e :
55
+ print (f'Failed to initialize Chrome driver: { e !s} ' )
56
+ raise
63
57
64
58
65
- def take_screenshot (driver , url , cookie_value , selector , output_name ):
66
- print (f" Attempting to take screenshot of { selector } at { url } " )
59
+ def take_screenshot (driver , url , selector , output_name ):
60
+ print (f' Attempting to take screenshot of { selector } at { url } ' )
67
61
os .makedirs ('screenshots' , exist_ok = True )
68
62
69
63
try :
70
- # Verify cookie before using
71
- cookie_value = verify_cookie (cookie_value )
72
-
73
- # First navigate to the domain root
74
- print ("Navigating to domain root..." )
75
- driver .get ("https://adventofcode.com" )
76
-
77
- # Set the session cookie
78
- print ("Setting cookie..." )
79
- driver .add_cookie ({
80
- 'name' : 'session' ,
81
- 'value' : cookie_value ,
82
- 'domain' : '.adventofcode.com'
83
- })
84
-
85
- # Get cookies for debugging
86
- cookies = driver .get_cookies ()
87
- print (f"Current cookies: { [cookie ['name' ] for cookie in cookies ]} " )
88
-
89
- print (f"Navigating to target URL: { url } " )
64
+ print (f'Navigating to target URL: { url } ' )
90
65
driver .get (url )
66
+ time .sleep (3 ) # Give the page time to load
91
67
92
68
# Take full page screenshot for debugging
93
69
driver .save_screenshot (f'screenshots/{ output_name } _full.png' )
94
- print ("Saved full page screenshot for debugging" )
95
-
96
- # Check page state
97
- check_page_state (driver )
98
-
99
- print ("Waiting for element..." )
100
- # Try different methods to find the element
101
- try :
102
- # First try with explicit wait
103
- element = WebDriverWait (driver , 15 ).until (
104
- EC .presence_of_element_located ((By .CSS_SELECTOR , selector ))
105
- )
106
- except Exception as e :
107
- print (f"Failed with explicit wait: { str (e )} " )
108
- print ("Trying direct find_element..." )
109
- # Try direct find_element
110
- element = driver .find_element (By .CSS_SELECTOR , selector )
111
-
112
- print ("Element found, taking screenshot..." )
113
- driver .execute_script ("arguments[0].scrollIntoView();" , element )
70
+ print ('Saved full page screenshot for debugging' )
71
+
72
+ print ('Waiting for element...' )
73
+ element = WebDriverWait (driver , 15 ).until (
74
+ EC .presence_of_element_located ((By .CSS_SELECTOR , selector ))
75
+ )
76
+
77
+ print ('Element found, taking screenshot...' )
78
+ driver .execute_script ('arguments[0].scrollIntoView();' , element )
114
79
time .sleep (2 )
115
80
element .screenshot (f'screenshots/{ output_name } .png' )
116
- print (f" Screenshot saved as screenshots/{ output_name } .png" )
81
+ print (f' Screenshot saved as screenshots/{ output_name } .png' )
117
82
118
83
except Exception as e :
119
- print (f"Error during screenshot process: { str (e )} " )
120
- check_page_state (driver ) # Get debug info even on failure
84
+ print (f'Error during screenshot process: { e !s} ' )
85
+ print (f'Current URL: { driver .current_url } ' )
86
+ print ('Page source:' )
87
+ print (driver .page_source [:1000 ]) # Print first 1000 characters of source
121
88
raise
122
89
123
90
124
91
def main ():
125
- print (" Starting screenshot process..." )
92
+ print (' Starting screenshot process...' )
126
93
driver = None
127
94
128
95
# Get cookie from environment variable
129
96
cookie = os .getenv ('COOKIE' )
130
97
if not cookie :
131
- raise ValueError (" COOKIE environment variable not set" )
98
+ raise ValueError (' COOKIE environment variable not set' )
132
99
133
100
try :
134
- driver = setup_driver ()
101
+ driver = setup_driver (cookie )
135
102
take_screenshot (
136
103
driver = driver ,
137
104
url = 'https://adventofcode.com/2024' ,
138
- cookie_value = cookie ,
139
105
selector = 'body > main > pre' ,
140
- output_name = 'aoc-content'
106
+ output_name = 'aoc-content' ,
141
107
)
142
108
143
109
except Exception as e :
144
- print (f" Fatal error in main: { str ( e ) } " )
110
+ print (f' Fatal error in main: { e !s } ' )
145
111
raise
146
112
finally :
147
113
if driver :
148
- print (" Cleaning up driver..." )
114
+ print (' Cleaning up driver...' )
149
115
driver .quit ()
150
116
151
117
152
- if __name__ == " __main__" :
153
- main ()
118
+ if __name__ == ' __main__' :
119
+ main ()
0 commit comments