Merge pull request #1759 from hlohaus/goo

.har files
xtekky · Mar 26, 2024 · dd08125 · dd08125
2 parents cf3f8cc + fd92918
commit dd08125
Show file tree

Hide file tree

Showing 9 changed files with 86 additions and 62 deletions.
diff --git a/.gitignore b/.gitignore
@@ -53,4 +53,5 @@ info.txt
 local.py
 *.gguf
 image.py
-.buildozer
+.buildozer
+hardir
diff --git a/README.md b/README.md
@@ -89,7 +89,7 @@ As per the survey, here is a list of improvements to come
 
 ```sh
 docker pull hlohaus789/g4f
-docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" hlohaus789/g4f:latest
+docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/hardir hlohaus789/g4f:latest
 ```
 3. Open the included client on: [http://localhost:8080/chat/](http://localhost:8080/chat/)
 or set the API base in your client to: [http://localhost:1337/v1](http://localhost:1337/v1)
@@ -218,30 +218,45 @@ See: [/docs/interference](/docs/interference.md)
 
 ### Configuration
 
-##### Cookies / Access Token
+#### Cookies
 
-For generating images with Bing and for the OpenAI Chat  you need cookies or a token from your browser session. From Bing you need the "_U" cookie and from OpenAI you need the "access_token". You can pass the cookies / the access token in the create function or you use the `set_cookies` setter before you run G4F:
+You need cookies for BingCreateImages and the Gemini Provider.
+From Bing you need the "_U" cookie and from Gemini you need the "__Secure-1PSID" cookie.
+Sometimes you doesn't need the "__Secure-1PSID" cookie, but some other auth cookies.
+You can pass the cookies in the create function or you use the `set_cookies` setter before you run G4F:
 
 ```python
 from g4f.cookies import set_cookies
 
 set_cookies(".bing.com", {
   "_U": "cookie value"
 })
-set_cookies("chat.openai.com", {
-  "access_token": "token value"
-})
 set_cookies(".google.com", {
   "__Secure-1PSID": "cookie value"
 })
-
 ...
 ```
 
-Alternatively, G4F reads the cookies with `browser_cookie3` from your browser
-or it starts a browser instance with selenium `webdriver` for logging in.
+#### .HAR File for OpenaiChat Provider
+
+##### Generating a .HAR File
+
+To utilize the OpenaiChat provider, a .har file is required from https://chat.openai.com/. Follow the steps below to create a valid .har file:
+
+1. Navigate to https://chat.openai.com/ using your preferred web browser and log in with your credentials.
+2. Access the Developer Tools in your browser. This can typically be done by right-clicking the page and selecting "Inspect," or by pressing F12 or Ctrl+Shift+I (Cmd+Option+I on a Mac).
+3. With the Developer Tools open, switch to the "Network" tab.
+4. Reload the website to capture the loading process within the Network tab.
+5. Initiate an action in the chat which can be capture in the .har file.
+6. Right-click any of the network activities listed and select "Save all as HAR with content" to export the .har file.
+
+##### Storing the .HAR File
+
+- Place the exported .har file in the `./hardir` directory if you are using Docker. Alternatively, you can store it in any preferred location within your current working directory.
+
+Note: Ensure that your .har file is stored securely, as it may contain sensitive information.
 
-##### Using Proxy
+#### Using Proxy
 
 If you want to hide or change your IP address for the providers, you can set a proxy globally via an environment variable:
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -81,7 +81,14 @@ WORKDIR $G4F_DIR
 COPY requirements.txt $G4F_DIR
 
 # Upgrade pip for the latest features and install the project's Python dependencies.
-RUN pip install --break-system-packages --upgrade pip && pip install --break-system-packages -r requirements.txt
+RUN pip install --break-system-packages --upgrade pip \
+  && pip install --break-system-packages -r requirements.txt
+
+# Install selenium driver and uninstall webdriver
+RUN pip install --break-system-packages \
+    undetected-chromedriver selenium-wire \
+  && pip uninstall -y --break-system-packages \
+    webdriver plyer
 
 # Copy the entire package into the container.
 ADD --chown=$G4F_USER:$G4F_USER g4f $G4F_DIR/g4f

diff --git a/g4f/Provider/bing/conversation.py b/g4f/Provider/bing/conversation.py
@@ -3,8 +3,9 @@
 from aiohttp import ClientSession
 from ...requests import raise_for_status
 from ...errors import RateLimitError
+from ...providers.conversation import BaseConversation
 
-class Conversation:
+class Conversation(BaseConversation):
     """
     Represents a conversation with specific attributes.
     """
@@ -32,7 +33,7 @@ async def create_conversation(session: ClientSession, headers: dict, tone: str)
     Returns:
     Conversation: An instance representing the created conversation.
     """
-    if tone == "copilot":
+    if tone == "Copilot":
         url = "https://copilot.microsoft.com/turing/conversation/create?bundleVersion=1.1634.3-nodesign2"
     else:
         url = "https://www.bing.com/turing/conversation/create?bundleVersion=1.1626.1"

diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py
@@ -3,10 +3,10 @@
 import asyncio
 import uuid
 import json
-import os
 import base64
 import time
 from aiohttp import ClientWebSocketResponse
+from copy import copy
 
 try:
     import webview
@@ -22,13 +22,13 @@
     pass
 
 from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
-from ..helper import get_cookies
 from ...webdriver import get_browser
 from ...typing import AsyncResult, Messages, Cookies, ImageType, Union, AsyncIterator
 from ...requests import get_args_from_browser, raise_for_status
 from ...requests.aiohttp import StreamSession
 from ...image import to_image, to_bytes, ImageResponse, ImageRequest
-from ...errors import MissingRequirementsError, MissingAuthError, ProviderNotWorkingError
+from ...errors import MissingAuthError
+from ...providers.conversation import BaseConversation
 from ..openai.har_file import getArkoseAndAccessToken
 from ... import debug
 
@@ -56,11 +56,6 @@ async def create(
         prompt: str = None,
         model: str = "",
         messages: Messages = [],
-        history_disabled: bool = False,
-        action: str = "next",
-        conversation_id: str = None,
-        parent_id: str = None,
-        image: ImageType = None,
         **kwargs
     ) -> Response:
         """
@@ -89,12 +84,7 @@ async def create(
         generator = cls.create_async_generator(
             model,
             messages,
-            history_disabled=history_disabled,
-            action=action,
-            conversation_id=conversation_id,
-            parent_id=parent_id,
-            image=image,
-            response_fields=True,
+            return_conversation=True,
             **kwargs
         )
         return Response(
@@ -209,7 +199,7 @@ def create_messages(cls, messages: Messages, image_request: ImageRequest = None)
         } for message in messages]
 
         # Check if there is an image response
-        if image_request:
+        if image_request is not None:
             # Change content in last user message
             messages[-1]["content"] = {
                 "content_type": "multimodal_text",
@@ -308,10 +298,11 @@ async def create_async_generator(
         history_disabled: bool = True,
         action: str = "next",
         conversation_id: str = None,
+        conversation: Conversation = None,
         parent_id: str = None,
         image: ImageType = None,
         image_name: str = None,
-        response_fields: bool = False,
+        return_conversation: bool = False,
         **kwargs
     ) -> AsyncResult:
         """
@@ -330,7 +321,7 @@ async def create_async_generator(
             conversation_id (str): ID of the conversation.
             parent_id (str): ID of the parent message.
             image (ImageType): Image to include in the conversation.
-            response_fields (bool): Flag to include response fields in the output.
+            return_conversation (bool): Flag to include response fields in the output.
             **kwargs: Additional keyword arguments.
 
         Yields:
@@ -387,6 +378,8 @@ async def create_async_generator(
                 arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy)
                 cls._create_request_args(cookies)
                 cls._set_api_key(api_key)
+                if arkose_token is None:
+                    raise MissingAuthError("No arkose token found in .har file")
 
             try:
                 image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None
@@ -396,7 +389,8 @@ async def create_async_generator(
                     print(f"{e.__class__.__name__}: {e}")
 
             model = cls.get_model(model).replace("gpt-3.5-turbo", "text-davinci-002-render-sha")
-            fields = ResponseFields()
+            fields = Conversation() if conversation is None else copy(conversation)
+            fields.finish_reason = None
             while fields.finish_reason is None:
                 conversation_id = conversation_id if fields.conversation_id is None else fields.conversation_id
                 parent_id = parent_id if fields.message_id is None else fields.message_id
@@ -409,7 +403,7 @@ async def create_async_generator(
                     "conversation_id": conversation_id,
                     "parent_message_id": parent_id,
                     "model": model,
-                    "history_and_training_disabled": history_disabled and not auto_continue,
+                    "history_and_training_disabled": history_disabled and not auto_continue and not return_conversation,
                     "websocket_request_id": websocket_request_id
                 }
                 if action != "continue":
@@ -422,8 +416,6 @@ async def create_async_generator(
                 }
                 if need_arkose:
                     headers["OpenAI-Sentinel-Arkose-Token"] = arkose_token
-                    headers["OpenAI-Sentinel-Chat-Requirements-Token"] = chat_token
-
                 async with session.post(
                     f"{cls.url}/backend-api/conversation",
                     json=data,
@@ -432,15 +424,15 @@ async def create_async_generator(
                     cls._update_request_args(session)
                     await raise_for_status(response)
                     async for chunk in cls.iter_messages_chunk(response.iter_lines(), session, fields):
-                        if response_fields:
-                            response_fields = False
+                        if return_conversation:
+                            return_conversation = False
                             yield fields
                         yield chunk
                 if not auto_continue:
                     break
                 action = "continue"
                 await asyncio.sleep(5)
-            if history_disabled and auto_continue:
+            if history_disabled and auto_continue and not return_conversation:
                 await cls.delete_conversation(session, cls._headers, fields.conversation_id)
 
     @staticmethod
@@ -458,7 +450,7 @@ async def iter_messages_chunk(
         cls,
         messages: AsyncIterator,
         session: StreamSession,
-        fields: ResponseFields
+        fields: Conversation
     ) -> AsyncIterator:
         last_message: int = 0
         async for message in messages:
@@ -487,7 +479,7 @@ async def iter_messages_chunk(
                 break
 
     @classmethod
-    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: ResponseFields) -> AsyncIterator:
+    async def iter_messages_line(cls, session: StreamSession, line: bytes, fields: Conversation) -> AsyncIterator:
         if not line.startswith(b"data: "):
             return
         elif line.startswith(b"data: [DONE]"):
@@ -618,7 +610,7 @@ def _create_request_args(cls, cookies: Union[Cookies, None]):
     @classmethod
     def _update_request_args(cls, session: StreamSession):
         for c in session.cookie_jar if hasattr(session, "cookie_jar") else session.cookies.jar:
-            cls._cookies[c.name if hasattr(c, "name") else c.key] = c.value
+            cls._cookies[c.key if hasattr(c, "key") else c.name] = c.value
         cls._update_cookie_header()
 
     @classmethod
@@ -631,7 +623,7 @@ def _set_api_key(cls, api_key: str):
     def _update_cookie_header(cls):
         cls._headers["Cookie"] = cls._format_cookies(cls._cookies)
 
-class ResponseFields:
+class Conversation(BaseConversation):
     """
     Class to encapsulate response fields.
     """
@@ -664,7 +656,7 @@ async def generator(self) -> AsyncIterator:
             self._generator = None
             chunks = []
             async for chunk in self._generator:
-                if isinstance(chunk, ResponseFields):
+                if isinstance(chunk, Conversation):
                     self._fields = chunk
                 else:
                     yield chunk

diff --git a/g4f/Provider/openai/har_file.py b/g4f/Provider/openai/har_file.py
@@ -11,11 +11,6 @@
 from .crypt import decrypt, encrypt
 from ...requests import StreamSession
 
-arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
-sessionUrl = "https://chat.openai.com/api/auth/session"
-chatArk = None
-accessToken = None
-
 class arkReq:
     def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
         self.arkURL = arkURL
@@ -25,21 +20,30 @@ def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
         self.arkCookies = arkCookies
         self.userAgent = userAgent
 
+arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
+sessionUrl = "https://chat.openai.com/api/auth/session"
+chatArk: arkReq = None
+accessToken: str = None
+cookies: dict = None
+
 def readHAR():
     dirPath = "./"
     harPath = []
     chatArks = []
     accessToken = None
+    cookies = {}
     for root, dirs, files in os.walk(dirPath):
         for file in files:
             if file.endswith(".har"):
                 harPath.append(os.path.join(root, file))
+        if harPath:
+            break
     if not harPath:
         raise RuntimeError("No .har file found")
     for path in harPath:
-        with open(path, 'r') as file:
+        with open(path, 'rb') as file:
             try:
-                harFile = json.load(file)
+                harFile = json.loads(file.read())
             except json.JSONDecodeError:
                 # Error: not a HAR file!
                 continue
@@ -48,19 +52,20 @@ def readHAR():
                     chatArks.append(parseHAREntry(v))
                 elif v['request']['url'] == sessionUrl:
                     accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken")
-    if not chatArks:
-        RuntimeError("No arkose requests found in .har files")
+                    cookies = {c['name']: c['value'] for c in v['request']['cookies']}
     if not accessToken:
         RuntimeError("No accessToken found in .har files")
-    return chatArks.pop(), accessToken
+    if not chatArks:
+        return None, accessToken, cookies
+    return chatArks.pop(), accessToken, cookies
 
 def parseHAREntry(entry) -> arkReq:
     tmpArk = arkReq(
         arkURL=entry['request']['url'],
         arkBx="",
         arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')},
         arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
-        arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']],
+        arkCookies={c['name']: c['value'] for c in entry['request']['cookies']},
         userAgent=""
     )
     tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
@@ -81,7 +86,6 @@ def genArkReq(chatArk: arkReq) -> arkReq:
     tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
     tmpArk.arkBody['rnd'] = str(random.random())
     tmpArk.arkHeader['x-ark-esync-value'] = bw
-    tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies}
     return tmpArk
 
 async def sendRequest(tmpArk: arkReq, proxy: str = None):
@@ -117,8 +121,10 @@ def getN() -> str:
     return base64.b64encode(timestamp.encode()).decode()
 
 async def getArkoseAndAccessToken(proxy: str):
-    global chatArk, accessToken
+    global chatArk, accessToken, cookies
     if chatArk is None or accessToken is None:
-        chatArk, accessToken = readHAR()
+        chatArk, accessToken, cookies = readHAR()
+    if chatArk is None:
+        return None, accessToken, cookies
     newReq = genArkReq(chatArk)
-    return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies
+    return await sendRequest(newReq, proxy), accessToken, cookies