Merge branch 'main' into 79-technique-guest-user-access

mbrg · Sep 29, 2024 · a8b217d · a8b217d
2 parents 34bd648 + 9de7cbf
commit a8b217d
Show file tree

Hide file tree

Showing 42 changed files with 462 additions and 34 deletions.
diff --git a/.github/workflows/pr-validation.yaml b/.github/workflows/pr-validation.yaml
@@ -25,3 +25,23 @@ jobs:
 
     - name: Run pre-commit
       run: pre-commit run --all-files
+
+  pytest:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run pytest
+      run: pytest tests
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ build/
 book/
 bin/
 .DS_Store
+__pycache__/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,10 +3,12 @@ repos:
     rev: v4.6.0
     hooks:
     -   id: check-json
+        exclude: '^\.vscode/settings\.json$'
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
     -   id: check-added-large-files
     -   id: pretty-format-json
+        exclude: '^\.vscode/settings\.json$'
         args: ["--autofix"]
     -   id: requirements-txt-fixer
 -   repo: https://github.com/psf/black
@@ -40,3 +42,7 @@ repos:
         name: "procedure schema"
         files: ^procedure/.*\.json$
         args: ["--schemafile", "schema/procedure.schema.json"]
+    -   id: check-jsonschema
+        name: "mitigation schema"
+        files: ^mitigation/.*\.json$
+        args: ["--schemafile", "schema/mitigation.schema.json"]
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -26,5 +26,11 @@
     "tamir",
     "tapowerplatform",
     "willison"
-  ]
+  ],
+  "python.testing.pytestArgs": [
+    "tests"
+  ],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true,
+  "python.testing.autoTestDiscoverOnSaveEnabled": true,
 }
diff --git a/book.toml b/book.toml
@@ -9,3 +9,4 @@ build-dir = "book"
 
 [output.html]
 edit-url-template = "https://github.com/mbrg/genai-attacks/edit/main/{path}"
+fold.enable = true
diff --git a/build_scripts/generate_content_as_md.py b/build_scripts/generate_content_as_md.py
@@ -69,6 +69,7 @@ def load_json_files(base_dir, version):
         os.path.join(base_dir, "procedure"),
         os.path.join(base_dir, "platform"),
         os.path.join(base_dir, "entity"),
+        os.path.join(base_dir, "mitigation"),
     ]
 
     for directory in search_dirs:
@@ -145,7 +146,7 @@ def generate_main_page(tactics, techniques, matrix):
 
     # Find the maximum number of techniques for any tactic
     max_techniques = max(len(matrix[tactic["$id"]]) for tactic in sorted_tactics)
-    logger.debug(f"Found max techniques-per-tactic: {max_techniques}")
+    logger.info(f"Found max techniques-per-tactic: {max_techniques}")
 
     # Generate rows for techniques
     for i in range(max_techniques):
@@ -185,19 +186,55 @@ def generate_object_page(obj, all_objects, base_dir):
     for ref in obj.get("external_references", []):
         content += f"- [{ref['title']}]({ref['href']}), {ref['source']}\n"
 
-    content += "\n### Reference To Other Objects\n"
+    if obj["$type"] == "procedure":
+        content += "\n### Techniques\n"
+        content += "\n| Tactic | Technique | Details |\n"
+        content += "| -- | -- | -- |\n"
+
+        techniques = []
+        for ref in obj.get("object_references", []):
+            if ref["$id"] in all_objects and ref["$type"] == "technique":
+                technique_obj = all_objects[ref["$id"]]
+
+                # tactic id is either the '$tactic_id' property, or the first related tactic
+                for technique_ref in technique_obj.get("object_references", []):
+                    if (
+                        technique_ref["$id"] in all_objects
+                        and technique_ref["$type"] == "tactic"
+                    ):
+                        tactic_id = technique_ref["$id"]
+                        break
+                tactic_id = technique_obj.get("$tactic_id", tactic_id)
+                tactic_obj = all_objects[tactic_id]
+
+                techniques.append(
+                    (
+                        tactic_obj["tactic_order"],
+                        f"[{tactic_obj['name']}](../{tactic_obj['$type']}/{tactic_obj['$id'].split('/')[-1]}.md)",
+                        f"[{technique_obj['name']}](../{technique_obj['$type']}/{technique_obj['$id'].split('/')[-1]}.md)",
+                        ref["description"],
+                    )
+                )
+
+        for _, tactic_name, technique_name, description in sorted(
+            techniques, key=lambda x: x[0]
+        ):
+            content += f"| {tactic_name} | {technique_name} | {description} |\n"
+
+    content += "\n### Related Objects\n"
     for ref in obj.get("object_references", []):
+        # filter out techniques in procedure pages
+        if ref["$type"] == "technique" and obj["$type"] == "procedure":
+            continue
         if ref["$id"] in all_objects:
             referenced_obj = all_objects[ref["$id"]]
-            content += f"- [{referenced_obj['name']}](../{referenced_obj['$type']}/{ref['$id'].split('/')[-1]}.md) ({referenced_obj['$type']}): {ref['description']}\n"
+            content += f"- --> [{referenced_obj['name']}](../{referenced_obj['$type']}/{ref['$id'].split('/')[-1]}.md) ({referenced_obj['$type']}){': ' if ref['description'] else ''}{ref['description']}\n"
         else:
-            content += f"- {ref['$id']} ({ref['$type']}): {ref['description']} (Reference not found)\n"
-
-    content += "\n### Referenced By Other Objects\n"
+            logger.warning(f"{ref['$id']} ({ref['$type']}): Reference not found")
     for other_obj in all_objects.values():
         for ref in other_obj.get("object_references", []):
             if ref["$id"] == obj["$id"]:
-                content += f"- [{other_obj['name']}](../{other_obj['$type']}/{other_obj['$id'].split('/')[-1]}.md) ({other_obj['$type']}): {ref['description']}\n"
+                content += f"- <-- [{other_obj['name']}](../{other_obj['$type']}/{other_obj['$id'].split('/')[-1]}.md) ({other_obj['$type']}){': ' if ref['description'] else ''}{ref['description']}\n"
 
     content += "\n### Related Frameworks\n"
     for ref in obj.get("framework_references", []):
@@ -208,7 +245,9 @@ def generate_object_page(obj, all_objects, base_dir):
     return content
 
 
-def generate_summary_page(tactics, techniques, procedures, platforms, entities, matrix):
+def generate_summary_page(
+    tactics, techniques, procedures, platforms, entities, mitigations, matrix
+):
     logger.debug("Generating summary page content")
     content = "# GenAI Attacks\n\n"
     content += "* [Attacks Matrix](matrix.md)\n"
@@ -234,6 +273,11 @@ def generate_summary_page(tactics, techniques, procedures, platforms, entities,
     for platform in platforms.values():
         content += f"    * [{platform['name']}](platform/{platform['$id'].split('/')[-1]}.md)\n"
 
+    content += "\n## Mitigations\n"
+    content += "* [Mitigations](mitigations.md)\n"
+    for mitigation in mitigations.values():
+        content += f"    * [{mitigation['name']}](mitigation/{mitigation['$id'].split('/')[-1]}.md)\n"
+
     content += "\n## Entities\n"
     content += "* [Entities](entities.md)\n"
     for entity in entities.values():
@@ -244,6 +288,16 @@ def generate_summary_page(tactics, techniques, procedures, platforms, entities,
     return content
 
 
+def generate_object_list_page(objects, title):
+    logger.debug(f"Generating object list page for {title}")
+    content = f"# {title}\n\n"
+
+    for obj in objects:
+        content += f"- [{obj['name']}]({obj['$type']}/{obj['$id'].split('/')[-1]}.md)\n"
+
+    return content
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Generate documentation for GenAI Attacks Matrix"
@@ -281,9 +335,10 @@ def main():
     procedures = {k: v for k, v in all_objects.items() if v["$type"] == "procedure"}
     platforms = {k: v for k, v in all_objects.items() if v["$type"] == "platform"}
     entities = {k: v for k, v in all_objects.items() if v["$type"] == "entity"}
+    mitigations = {k: v for k, v in all_objects.items() if v["$type"] == "mitigation"}
 
     logger.info(
-        f"Found {len(tactics)} tactics, {len(techniques)} techniques, {len(procedures)} procedures, {len(platforms)} platforms, and {len(entities)} entities"
+        f"Found {len(tactics)} tactics, {len(techniques)} techniques, {len(procedures)} procedures, {len(platforms)} platforms, {len(entities)} entities, {len(mitigations)} mitigations"
     )
 
     if not tactics or not techniques:
@@ -299,7 +354,14 @@ def main():
     with open(matrix_path, "w") as f:
         f.write(matrix_content)
 
-    object_types = ["tactic", "technique", "procedure", "platform", "entity"]
+    object_types = [
+        "tactic",
+        "technique",
+        "procedure",
+        "platform",
+        "entity",
+        "mitigation",
+    ]
 
     for obj_type in object_types:
         type_dir = os.path.join(build_dir, obj_type)
@@ -318,6 +380,20 @@ def main():
                 except Exception as e:
                     logger.error(f"Error writing file {file_path}: {str(e)}")
 
+    # generate object list pages
+    for objects, title in (
+        (tactics.values(), "tactics"),
+        (techniques.values(), "techniques"),
+        (procedures.values(), "procedures"),
+        (platforms.values(), "platforms"),
+        (entities.values(), "entities"),
+        (mitigations.values(), "mitigations"),
+    ):
+        page_content = generate_object_list_page(objects, title.capitalize())
+        page_path = os.path.join(build_dir, f"{title}.md")
+        with open(page_path, "w") as f:
+            f.write(page_content)
+
     # Copy repo md files to build directory
     intro_dir = os.path.join(build_dir, "intro")
     logger.info(f"Creating directory: {intro_dir}")
@@ -330,7 +406,7 @@ def main():
 
     # Generate summary page (SUMMARY.md)
     summary_content = generate_summary_page(
-        tactics, techniques, procedures, platforms, entities, matrix
+        tactics, techniques, procedures, platforms, entities, mitigations, matrix
     )
     summary_path = os.path.join(build_dir, "SUMMARY.md")
     logger.info(f"Writing summary page to: {summary_path}")

diff --git a/build_scripts/local.sh b/build_scripts/local.sh
@@ -7,3 +7,5 @@ cp book_theme/head.hbs build/theme/head.hbs
 bin/mdbook build
 # rewrite book URLs
 python build_scripts/rewrite_mdbook_links.py --book-dir book/
+# echo book location for easy access
+echo "book/index.html"
diff --git a/build_scripts/rewrite_mdbook_links.py b/build_scripts/rewrite_mdbook_links.py
@@ -29,6 +29,7 @@ def update_edit_urls(book_dir):
                     "techniques.html",
                     "platforms.html",
                     "tactics.html",
+                    "mitigations.html",
                 ):
                     # Remove Change links from the identified pages
                     change_links = soup.find_all("a", title="Suggest an edit")

diff --git a/entity/gal_malka.json b/entity/gal_malka.json
@@ -9,6 +9,11 @@
       "href": "https://twitter.com/GalMalka6",
       "source": "Twitter",
       "title": "Gal Malka on Twitter"
+    },
+    {
+      "href": "https://labs.zenity.io/authors/cd14ef65-1ce7-4fff-804d-73d883cb38e3",
+      "source": "Zenity Labs",
+      "title": "Gal Malka on Zenity Labs"
     }
   ],
   "name": "Gal Malka"

diff --git a/entity/lana_salameh.json b/entity/lana_salameh.json
@@ -9,6 +9,11 @@
       "href": "https://twitter.com/@lana__salameh",
       "source": "Twitter",
       "title": "Lana Salameh on Twitter"
+    },
+    {
+      "href": "https://labs.zenity.io/authors/ed0835b8-ae63-4d4f-b924-23d801808cac",
+      "source": "Zenity Labs",
+      "title": "Lana Salameh on Zenity Labs"
     }
   ],
   "name": "Lana Salameh"

diff --git a/entity/michael_bargury.json b/entity/michael_bargury.json
@@ -9,6 +9,16 @@
       "href": "https://twitter.com/mbrg0",
       "source": "Twitter",
       "title": "Michael Bargury on Twitter"
+    },
+    {
+      "href": "https://labs.zenity.io/authors/c9e6a5a0-1122-4d65-a5bd-7426d4aa65af",
+      "source": "Zenity Labs",
+      "title": "Michael Bargury on Zenity Labs"
+    },
+    {
+      "href": "https://mbgsec.com",
+      "source": "mbgsec.com",
+      "title": "Personal blog"
     }
   ],
   "name": "Michael Bargury"

diff --git a/entity/pliny.json b/entity/pliny.json
@@ -8,8 +8,8 @@
     {
       "href": "https://twitter.com/elder_plinius",
       "source": "Twitter",
-      "title": "Pliny the Liberator on Twitter"
+      "title": "Pliny on Twitter"
     }
   ],
-  "name": "Pliny the Liberator"
+  "name": "Pliny"
 }
diff --git a/entity/simon_willison.json b/entity/simon_willison.json
@@ -12,7 +12,7 @@
     },
     {
       "href": "https://simonwillison.net",
-      "source": "Simon Willison",
+      "source": "simonwillison.net",
       "title": "Personal blog"
     }
   ],

diff --git a/entity/tamir_ishay_sharbat.json b/entity/tamir_ishay_sharbat.json
@@ -9,6 +9,11 @@
       "href": "https://twitter.com/tamirishaysh",
       "source": "Twitter",
       "title": "Tamir Ishay Sharbat on Twitter"
+    },
+    {
+      "href": "https://labs.zenity.io/authors/15e910e0-0161-49aa-acf4-a83ec581e372",
+      "source": "Zenity Labs",
+      "title": "Tamir Ishay Sharbat on Zenity Labs"
     }
   ],
   "name": "Tamir Ishay Sharbat"

diff --git a/mitigation/content_security_policy.json b/mitigation/content_security_policy.json
@@ -0,0 +1,22 @@
+{
+  "$id": "$gai-mitigation/content_security_policy",
+  "$schema": "../schema/mitigation.schema.json",
+  "$type": "mitigation",
+  "description": "A defense mechanism that helps protect against web browsing tools and markdown rendering for data exfiltration.\nWhen a user asks the AI system to access a URL, it will only access it if the URL is from a limited set of trusted domains.\n",
+  "external_references": [
+    {
+      "description": "The mechanism is explained in the context of Microsoft Copilot (previously Bing Chat). The page you are reading draws a lot of content from this reference.",
+      "href": "https://embracethered.com/blog/posts/2023/bing-chat-data-exfiltration-poc-and-fix/",
+      "source": "Embrace The Red",
+      "title": "Bing Chat: Data Exfiltration Exploit Explained"
+    }
+  ],
+  "name": "Content Security Policy",
+  "object_references": [
+    {
+      "$id": "$gai-platform/microsoft_copilot",
+      "$type": "platform",
+      "description": "Microsoft Copilot can render URLs and links if they fall under these trusted domains:\n\n```\nth.bing.com\nwww.bing.com\nedgeservices.bing.com\nr.bing.com\n```"
+    }
+  ]
+}
diff --git a/mitigation/index_based_browsing.json b/mitigation/index_based_browsing.json
@@ -0,0 +1,21 @@
+{
+  "$id": "$gai-mitigation/index_based_browsing",
+  "$schema": "../schema/mitigation.schema.json",
+  "$type": "mitigation",
+  "description": "A defense mechanism that helps protect against use of web browser tools data exfiltration and initial access.\nWhen a user asks the AI system to access a URL, it will ask a web search and have access only to information that the index provides. The URL will is not accessed by the AI System at all.",
+  "external_references": [
+    {
+      "href": "https://learn.microsoft.com/en-us/copilot/microsoft-365/manage-public-web-access",
+      "source": "Microsoft",
+      "title": "Data, privacy, and security for web queries in Microsoft 365 Copilot and Microsoft Copilot"
+    }
+  ],
+  "name": "Index-Based Browsing",
+  "object_references": [
+    {
+      "$id": "$gai-platform/microsoft_copilot_for_m365",
+      "$type": "platform",
+      "description": "Microsoft Copilot for M365 cannot browse websites. Instead, its web browsing tool has access to the data available on Bing's index for the relevant website page."
+    }
+  ]
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,4 @@ build/ @@
     book/
     bin/
     .DS_Store
+    __pycache__/
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,3 +9,4 @@ build-dir = "book"

		[output.html]
		edit-url-template = "https://github.com/mbrg/genai-attacks/edit/main/{path}"
		fold.enable = true