1
1
import os
2
- from typing import Optional , Sequence , Union
2
+ import httpx
3
+ import time
4
+ from typing import Optional , Sequence , Union , List
5
+ from enum import Enum
6
+ from pydantic import BaseModel , Field
3
7
from playwright .sync_api import sync_playwright
4
8
5
9
10
+ class BrowserType (str , Enum ):
11
+ CHROME = "chrome"
12
+ FIREFOX = "firefox"
13
+ EDGE = "edge"
14
+ SAFARI = "safari"
15
+
16
+
17
+ class DeviceType (str , Enum ):
18
+ DESKTOP = "desktop"
19
+ MOBILE = "mobile"
20
+
21
+
22
+ class OperatingSystem (str , Enum ):
23
+ WINDOWS = "windows"
24
+ MACOS = "macos"
25
+ LINUX = "linux"
26
+ IOS = "ios"
27
+ ANDROID = "android"
28
+
29
+
30
+ class SessionStatus (str , Enum ):
31
+ NEW = "NEW"
32
+ CREATED = "CREATED"
33
+ ERROR = "ERROR"
34
+ RUNNING = "RUNNING"
35
+ REQUEST_RELEASE = "REQUEST_RELEASE"
36
+ RELEASING = "RELEASING"
37
+ COMPLETED = "COMPLETED"
38
+
39
+
40
+ class Screen (BaseModel ):
41
+ max_height : Optional [int ] = Field (None , alias = "maxHeight" )
42
+ max_width : Optional [int ] = Field (None , alias = "maxWidth" )
43
+ min_height : Optional [int ] = Field (None , alias = "minHeight" )
44
+ min_width : Optional [int ] = Field (None , alias = "minWidth" )
45
+
46
+
47
+ class Fingerprint (BaseModel ):
48
+ browser_list_query : Optional [str ] = Field (None , alias = "browserListQuery" )
49
+ http_version : Optional [int ] = Field (None , alias = "httpVersion" )
50
+ browsers : Optional [List [BrowserType ]] = None
51
+ devices : Optional [List [DeviceType ]] = None
52
+ locales : Optional [List [str ]] = None
53
+ operating_systems : Optional [List [OperatingSystem ]] = Field (
54
+ None , alias = "operatingSystems"
55
+ )
56
+ screen : Optional [Screen ] = None
57
+
58
+
59
+ class CreateSessionOptions (BaseModel ):
60
+ project_id : Optional [str ] = Field (None , alias = "projectId" )
61
+ extension_id : Optional [str ] = Field (None , alias = "extensionId" )
62
+ fingerprint : Optional [Fingerprint ] = None
63
+
64
+
65
+ class Session (BaseModel ):
66
+ id : str
67
+ created_at : str = Field (..., alias = "createdAt" )
68
+ started_at : str = Field (..., alias = "startedAt" )
69
+ ended_at : Optional [str ] = Field (..., alias = "endedAt" )
70
+ project_id : str = Field (..., alias = "projectId" )
71
+ status : Optional [SessionStatus ] = None
72
+ task_id : Optional [str ] = Field (None , alias = "taskId" )
73
+ proxy_bytes : Optional [int ] = Field (None , alias = "proxyBytes" )
74
+ expires_at : Optional [str ] = Field (None , alias = "expiresAt" )
75
+ avg_cpu_usage : Optional [float ] = Field (None , alias = "avg_cpu_usage" )
76
+ memory_usage : Optional [int ] = None
77
+ details : Optional [str ] = None
78
+ logs : Optional [str ] = None
79
+
80
+
81
+ class UpdateSessionOptions (BaseModel ):
82
+ project_id : Optional [str ] = Field (None , alias = "projectId" )
83
+ status : Optional [SessionStatus ] = None
84
+
85
+
86
+ class SessionRecording (BaseModel ):
87
+ type : Optional [str ] = None
88
+ time : Optional [str ] = None
89
+ data : Optional [dict ] = None
90
+
91
+
92
+ class DebugConnectionURLs (BaseModel ):
93
+ debugger_fullscreen_url : Optional [str ] = Field (None , alias = "debuggerFullscreenUrl" )
94
+ debugger_url : Optional [str ] = Field (None , alias = "debuggerUrl" )
95
+ ws_url : Optional [str ] = Field (None , alias = "wsUrl" )
96
+
97
+
98
+ class Request (BaseModel ):
99
+ timestamp : Optional [str ]
100
+ params : Optional [dict ]
101
+ raw_body : Optional [str ] = Field (alias = "rawBody" )
102
+
103
+
104
+ class Response (BaseModel ):
105
+ timestamp : Optional [str ]
106
+ result : Optional [dict ]
107
+ raw_body : Optional [str ] = Field (alias = "rawBody" )
108
+
109
+
110
+ class SessionLog (BaseModel ):
111
+ session_id : Optional [str ] = Field (alias = "sessionId" )
112
+ id : str
113
+ timestamp : Optional [str ]
114
+ method : Optional [str ]
115
+ request : Optional [Request ]
116
+ response : Optional [Response ]
117
+ page_id : Optional [str ] = Field (alias = "pageId" )
118
+
119
+
6
120
class Browserbase :
7
- def __init__ (self , api_key : Optional [str ] = None ):
121
+ def __init__ (
122
+ self ,
123
+ api_key : Optional [str ] = None ,
124
+ project_id : Optional [str ] = None ,
125
+ api_url : Optional [str ] = None ,
126
+ connect_url : Optional [str ] = None ,
127
+ ):
8
128
"""Create new Browserbase SDK client instance"""
9
129
self .api_key = api_key or os .environ ["BROWSERBASE_API_KEY" ]
130
+ self .project_id = project_id or os .environ ["BROWSERBASE_PROJECT_ID" ]
131
+ self .connect_url = connect_url or "wss://connect.browserbase.com"
132
+ self .api_url = api_url or "https://www.browserbase.com"
133
+
134
+ def list_sessions (self ) -> List [Session ]:
135
+ response = httpx .get (
136
+ f"{ self .api_url } /v1/sessions" ,
137
+ headers = {
138
+ "x-bb-api-key" : self .api_key ,
139
+ "Content-Type" : "application/json" ,
140
+ },
141
+ )
142
+
143
+ response .raise_for_status ()
144
+ data = response .json ()
145
+ return [Session (** item ) for item in data ]
146
+
147
+ def create_session (self , options : Optional [CreateSessionOptions ] = None ) -> Session :
148
+ payload = {"projectId" : self .project_id }
149
+ if options :
150
+ payload .update (options .model_dump (by_alias = True , exclude_none = True ))
151
+
152
+ response = httpx .post (
153
+ f"{ self .api_url } /v1/sessions" ,
154
+ headers = {
155
+ "x-bb-api-key" : self .api_key ,
156
+ "Content-Type" : "application/json" ,
157
+ },
158
+ json = payload ,
159
+ )
160
+
161
+ response .raise_for_status ()
162
+ return Session (** response .json ())
163
+
164
+ def get_session (self , session_id : str ) -> List [Session ]:
165
+ response = httpx .get (
166
+ f"{ self .api_url } /v1/sessions/{ session_id } " ,
167
+ headers = {
168
+ "x-bb-api-key" : self .api_key ,
169
+ "Content-Type" : "application/json" ,
170
+ },
171
+ )
172
+
173
+ response .raise_for_status ()
174
+ return Session (** response .json ())
175
+
176
+ def update_session (
177
+ self , session_id : str , options : Optional [UpdateSessionOptions ] = None
178
+ ) -> Session :
179
+ payload = {"projectId" : self .project_id }
180
+ if options :
181
+ payload .update (options .model_dump (by_alias = True , exclude_none = True ))
182
+
183
+ response = httpx .post (
184
+ f"{ self .api_url } /v1/sessions/{ session_id } " ,
185
+ headers = {
186
+ "x-bb-api-key" : self .api_key ,
187
+ "Content-Type" : "application/json" ,
188
+ },
189
+ json = payload ,
190
+ )
191
+
192
+ response .raise_for_status ()
193
+ return Session (** response .json ())
194
+
195
+ def get_session_recording (self , session_id : str ) -> List [SessionRecording ]:
196
+ response = httpx .get (
197
+ f"{ self .api_url } /v1/sessions/{ session_id } /recording" ,
198
+ headers = {
199
+ "x-bb-api-key" : self .api_key ,
200
+ "Content-Type" : "application/json" ,
201
+ },
202
+ )
203
+
204
+ response .raise_for_status ()
205
+ data = response .json ()
206
+ return [SessionRecording (** item ) for item in data ]
207
+
208
+ def get_session_downloads (
209
+ self , session_id : str , retry_interval : int = 2000 , retry_count : int = 2
210
+ ) -> Optional [bytes ]:
211
+ def fetch_download ():
212
+ nonlocal retry_count
213
+
214
+ response = httpx .get (
215
+ f"{ self .api_url } /v1/sessions/{ session_id } /downloads" ,
216
+ headers = {
217
+ "x-bb-api-key" : self .api_key ,
218
+ },
219
+ )
220
+ content = response .read ()
221
+ if len (content ) > 0 :
222
+ return content
223
+ else :
224
+ retry_count -= 1
225
+ if retry_count <= 0 :
226
+ return None
227
+ time .sleep (retry_interval / 1000 )
228
+ return fetch_download ()
229
+
230
+ return fetch_download ()
231
+
232
+ def get_debug_connection_urls (self , session_id : str ) -> DebugConnectionURLs :
233
+ response = httpx .get (
234
+ f"{ self .api_url } /v1/sessions/{ session_id } /debug" ,
235
+ headers = {
236
+ "x-bb-api-key" : self .api_key ,
237
+ "Content-Type" : "application/json" ,
238
+ },
239
+ )
240
+
241
+ response .raise_for_status ()
242
+ return DebugConnectionURLs (** response .json ())
243
+
244
+ def get_session_logs (self , session_id : str ) -> List [SessionLog ]:
245
+ response = httpx .get (
246
+ f"{ self .api_url } /v1/sessions/{ session_id } /logs" ,
247
+ headers = {
248
+ "x-bb-api-key" : self .api_key ,
249
+ "Content-Type" : "application/json" ,
250
+ },
251
+ )
252
+
253
+ response .raise_for_status ()
254
+ data = response .json ()
255
+ return [SessionLog (** item ) for item in data ]
256
+
257
+ def get_connect_url (self , session_id = None , proxy = False ):
258
+ base_url = f"{ self .connect_url } ?apiKey={ self .api_key } "
259
+ if session_id :
260
+ base_url += f"&sessionId={ session_id } "
261
+ if proxy :
262
+ base_url += "&enableProxy=true"
263
+ return base_url
10
264
11
265
def load (self , url : Union [str , Sequence [str ]], ** args ):
12
266
if isinstance (url , str ):
@@ -16,14 +270,20 @@ def load(self, url: Union[str, Sequence[str]], **args):
16
270
else :
17
271
raise TypeError ("Input must be a URL string or a Sequence of URLs" )
18
272
19
- def load_url (self , url : str , text_content : bool = False ):
273
+ def load_url (
274
+ self ,
275
+ url : str ,
276
+ session_id : Optional [str ] = None ,
277
+ proxy : Optional [bool ] = None ,
278
+ text_content : bool = False ,
279
+ ):
20
280
"""Load a page in a headless browser and return the contents"""
21
281
if not url :
22
282
raise ValueError ("Page URL was not provided" )
23
283
24
284
with sync_playwright () as p :
25
285
browser = p .chromium .connect_over_cdp (
26
- f"wss://api.browserbase.com?apiKey= { self .api_key } "
286
+ self .get_connect_url ( session_id , proxy )
27
287
)
28
288
default_context = browser .contexts [0 ]
29
289
page = default_context .pages [0 ]
@@ -40,14 +300,20 @@ def load_url(self, url: str, text_content: bool = False):
40
300
41
301
return html
42
302
43
- def load_urls (self , urls : Sequence [str ], text_content : bool = False ):
303
+ def load_urls (
304
+ self ,
305
+ urls : Sequence [str ],
306
+ session_id : Optional [str ] = None ,
307
+ proxy : Optional [bool ] = None ,
308
+ text_content : bool = False ,
309
+ ):
44
310
"""Load multiple pages in a headless browser and return the contents"""
45
311
if not urls :
46
312
raise ValueError ("Page URL was not provided" )
47
313
48
314
with sync_playwright () as p :
49
315
browser = p .chromium .connect_over_cdp (
50
- f"wss://api.browserbase.com?apiKey= { self .api_key } "
316
+ self .get_connect_url ( session_id , proxy )
51
317
)
52
318
53
319
default_context = browser .contexts [0 ]
@@ -67,14 +333,20 @@ def load_urls(self, urls: Sequence[str], text_content: bool = False):
67
333
68
334
browser .close ()
69
335
70
- def screenshot (self , url : str , full_page : bool = False ):
336
+ def screenshot (
337
+ self ,
338
+ url : str ,
339
+ session_id : Optional [str ] = None ,
340
+ proxy : Optional [bool ] = None ,
341
+ full_page : bool = False ,
342
+ ):
71
343
"""Load a page in a headless browser and return a screenshot as bytes"""
72
344
if not url :
73
345
raise ValueError ("Page URL was not provided" )
74
346
75
347
with sync_playwright () as p :
76
348
browser = p .chromium .connect_over_cdp (
77
- f"wss://api.browserbase.com?apiKey= { self .api_key } "
349
+ self .get_connect_url ( session_id , proxy )
78
350
)
79
351
80
352
page = browser .new_page ()
0 commit comments