Skip to content

Commit

Permalink
Merge branch 'main' into 79-technique-guest-user-access
Browse files Browse the repository at this point in the history
  • Loading branch information
AvishaiEZen authored Sep 29, 2024
2 parents 34bd648 + 9de7cbf commit a8b217d
Show file tree
Hide file tree
Showing 42 changed files with 462 additions and 34 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/pr-validation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,23 @@ jobs:
- name: Run pre-commit
run: pre-commit run --all-files

pytest:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run pytest
run: pytest tests
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ build/
book/
bin/
.DS_Store
__pycache__/
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ repos:
rev: v4.6.0
hooks:
- id: check-json
exclude: '^\.vscode/settings\.json$'
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-added-large-files
- id: pretty-format-json
exclude: '^\.vscode/settings\.json$'
args: ["--autofix"]
- id: requirements-txt-fixer
- repo: https://github.com/psf/black
Expand Down Expand Up @@ -40,3 +42,7 @@ repos:
name: "procedure schema"
files: ^procedure/.*\.json$
args: ["--schemafile", "schema/procedure.schema.json"]
- id: check-jsonschema
name: "mitigation schema"
files: ^mitigation/.*\.json$
args: ["--schemafile", "schema/mitigation.schema.json"]
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,11 @@
"tamir",
"tapowerplatform",
"willison"
]
],
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.autoTestDiscoverOnSaveEnabled": true,
}
1 change: 1 addition & 0 deletions book.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ build-dir = "book"

[output.html]
edit-url-template = "https://github.com/mbrg/genai-attacks/edit/main/{path}"
fold.enable = true
98 changes: 87 additions & 11 deletions build_scripts/generate_content_as_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def load_json_files(base_dir, version):
os.path.join(base_dir, "procedure"),
os.path.join(base_dir, "platform"),
os.path.join(base_dir, "entity"),
os.path.join(base_dir, "mitigation"),
]

for directory in search_dirs:
Expand Down Expand Up @@ -145,7 +146,7 @@ def generate_main_page(tactics, techniques, matrix):

# Find the maximum number of techniques for any tactic
max_techniques = max(len(matrix[tactic["$id"]]) for tactic in sorted_tactics)
logger.debug(f"Found max techniques-per-tactic: {max_techniques}")
logger.info(f"Found max techniques-per-tactic: {max_techniques}")

# Generate rows for techniques
for i in range(max_techniques):
Expand Down Expand Up @@ -185,19 +186,55 @@ def generate_object_page(obj, all_objects, base_dir):
for ref in obj.get("external_references", []):
content += f"- [{ref['title']}]({ref['href']}), {ref['source']}\n"

content += "\n### Reference To Other Objects\n"
if obj["$type"] == "procedure":
content += "\n### Techniques\n"
content += "\n| Tactic | Technique | Details |\n"
content += "| -- | -- | -- |\n"

techniques = []
for ref in obj.get("object_references", []):
if ref["$id"] in all_objects and ref["$type"] == "technique":
technique_obj = all_objects[ref["$id"]]

# tactic id is either the '$tactic_id' property, or the first related tactic
for technique_ref in technique_obj.get("object_references", []):
if (
technique_ref["$id"] in all_objects
and technique_ref["$type"] == "tactic"
):
tactic_id = technique_ref["$id"]
break
tactic_id = technique_obj.get("$tactic_id", tactic_id)
tactic_obj = all_objects[tactic_id]

techniques.append(
(
tactic_obj["tactic_order"],
f"[{tactic_obj['name']}](../{tactic_obj['$type']}/{tactic_obj['$id'].split('/')[-1]}.md)",
f"[{technique_obj['name']}](../{technique_obj['$type']}/{technique_obj['$id'].split('/')[-1]}.md)",
ref["description"],
)
)

for _, tactic_name, technique_name, description in sorted(
techniques, key=lambda x: x[0]
):
content += f"| {tactic_name} | {technique_name} | {description} |\n"

content += "\n### Related Objects\n"
for ref in obj.get("object_references", []):
# filter out techniques in procedure pages
if ref["$type"] == "technique" and obj["$type"] == "procedure":
continue
if ref["$id"] in all_objects:
referenced_obj = all_objects[ref["$id"]]
content += f"- [{referenced_obj['name']}](../{referenced_obj['$type']}/{ref['$id'].split('/')[-1]}.md) ({referenced_obj['$type']}): {ref['description']}\n"
content += f"- --> [{referenced_obj['name']}](../{referenced_obj['$type']}/{ref['$id'].split('/')[-1]}.md) ({referenced_obj['$type']}){': ' if ref['description'] else ''}{ref['description']}\n"
else:
content += f"- {ref['$id']} ({ref['$type']}): {ref['description']} (Reference not found)\n"

content += "\n### Referenced By Other Objects\n"
logger.warning(f"{ref['$id']} ({ref['$type']}): Reference not found")
for other_obj in all_objects.values():
for ref in other_obj.get("object_references", []):
if ref["$id"] == obj["$id"]:
content += f"- [{other_obj['name']}](../{other_obj['$type']}/{other_obj['$id'].split('/')[-1]}.md) ({other_obj['$type']}): {ref['description']}\n"
content += f"- <-- [{other_obj['name']}](../{other_obj['$type']}/{other_obj['$id'].split('/')[-1]}.md) ({other_obj['$type']}){': ' if ref['description'] else ''}{ref['description']}\n"

content += "\n### Related Frameworks\n"
for ref in obj.get("framework_references", []):
Expand All @@ -208,7 +245,9 @@ def generate_object_page(obj, all_objects, base_dir):
return content


def generate_summary_page(tactics, techniques, procedures, platforms, entities, matrix):
def generate_summary_page(
tactics, techniques, procedures, platforms, entities, mitigations, matrix
):
logger.debug("Generating summary page content")
content = "# GenAI Attacks\n\n"
content += "* [Attacks Matrix](matrix.md)\n"
Expand All @@ -234,6 +273,11 @@ def generate_summary_page(tactics, techniques, procedures, platforms, entities,
for platform in platforms.values():
content += f" * [{platform['name']}](platform/{platform['$id'].split('/')[-1]}.md)\n"

content += "\n## Mitigations\n"
content += "* [Mitigations](mitigations.md)\n"
for mitigation in mitigations.values():
content += f" * [{mitigation['name']}](mitigation/{mitigation['$id'].split('/')[-1]}.md)\n"

content += "\n## Entities\n"
content += "* [Entities](entities.md)\n"
for entity in entities.values():
Expand All @@ -244,6 +288,16 @@ def generate_summary_page(tactics, techniques, procedures, platforms, entities,
return content


def generate_object_list_page(objects, title):
logger.debug(f"Generating object list page for {title}")
content = f"# {title}\n\n"

for obj in objects:
content += f"- [{obj['name']}]({obj['$type']}/{obj['$id'].split('/')[-1]}.md)\n"

return content


def main():
parser = argparse.ArgumentParser(
description="Generate documentation for GenAI Attacks Matrix"
Expand Down Expand Up @@ -281,9 +335,10 @@ def main():
procedures = {k: v for k, v in all_objects.items() if v["$type"] == "procedure"}
platforms = {k: v for k, v in all_objects.items() if v["$type"] == "platform"}
entities = {k: v for k, v in all_objects.items() if v["$type"] == "entity"}
mitigations = {k: v for k, v in all_objects.items() if v["$type"] == "mitigation"}

logger.info(
f"Found {len(tactics)} tactics, {len(techniques)} techniques, {len(procedures)} procedures, {len(platforms)} platforms, and {len(entities)} entities"
f"Found {len(tactics)} tactics, {len(techniques)} techniques, {len(procedures)} procedures, {len(platforms)} platforms, {len(entities)} entities, {len(mitigations)} mitigations"
)

if not tactics or not techniques:
Expand All @@ -299,7 +354,14 @@ def main():
with open(matrix_path, "w") as f:
f.write(matrix_content)

object_types = ["tactic", "technique", "procedure", "platform", "entity"]
object_types = [
"tactic",
"technique",
"procedure",
"platform",
"entity",
"mitigation",
]

for obj_type in object_types:
type_dir = os.path.join(build_dir, obj_type)
Expand All @@ -318,6 +380,20 @@ def main():
except Exception as e:
logger.error(f"Error writing file {file_path}: {str(e)}")

# generate object list pages
for objects, title in (
(tactics.values(), "tactics"),
(techniques.values(), "techniques"),
(procedures.values(), "procedures"),
(platforms.values(), "platforms"),
(entities.values(), "entities"),
(mitigations.values(), "mitigations"),
):
page_content = generate_object_list_page(objects, title.capitalize())
page_path = os.path.join(build_dir, f"{title}.md")
with open(page_path, "w") as f:
f.write(page_content)

# Copy repo md files to build directory
intro_dir = os.path.join(build_dir, "intro")
logger.info(f"Creating directory: {intro_dir}")
Expand All @@ -330,7 +406,7 @@ def main():

# Generate summary page (SUMMARY.md)
summary_content = generate_summary_page(
tactics, techniques, procedures, platforms, entities, matrix
tactics, techniques, procedures, platforms, entities, mitigations, matrix
)
summary_path = os.path.join(build_dir, "SUMMARY.md")
logger.info(f"Writing summary page to: {summary_path}")
Expand Down
2 changes: 2 additions & 0 deletions build_scripts/local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ cp book_theme/head.hbs build/theme/head.hbs
bin/mdbook build
# rewrite book URLs
python build_scripts/rewrite_mdbook_links.py --book-dir book/
# echo book location for easy access
echo "book/index.html"
1 change: 1 addition & 0 deletions build_scripts/rewrite_mdbook_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def update_edit_urls(book_dir):
"techniques.html",
"platforms.html",
"tactics.html",
"mitigations.html",
):
# Remove Change links from the identified pages
change_links = soup.find_all("a", title="Suggest an edit")
Expand Down
5 changes: 5 additions & 0 deletions entity/gal_malka.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"href": "https://twitter.com/GalMalka6",
"source": "Twitter",
"title": "Gal Malka on Twitter"
},
{
"href": "https://labs.zenity.io/authors/cd14ef65-1ce7-4fff-804d-73d883cb38e3",
"source": "Zenity Labs",
"title": "Gal Malka on Zenity Labs"
}
],
"name": "Gal Malka"
Expand Down
5 changes: 5 additions & 0 deletions entity/lana_salameh.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"href": "https://twitter.com/@lana__salameh",
"source": "Twitter",
"title": "Lana Salameh on Twitter"
},
{
"href": "https://labs.zenity.io/authors/ed0835b8-ae63-4d4f-b924-23d801808cac",
"source": "Zenity Labs",
"title": "Lana Salameh on Zenity Labs"
}
],
"name": "Lana Salameh"
Expand Down
10 changes: 10 additions & 0 deletions entity/michael_bargury.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@
"href": "https://twitter.com/mbrg0",
"source": "Twitter",
"title": "Michael Bargury on Twitter"
},
{
"href": "https://labs.zenity.io/authors/c9e6a5a0-1122-4d65-a5bd-7426d4aa65af",
"source": "Zenity Labs",
"title": "Michael Bargury on Zenity Labs"
},
{
"href": "https://mbgsec.com",
"source": "mbgsec.com",
"title": "Personal blog"
}
],
"name": "Michael Bargury"
Expand Down
4 changes: 2 additions & 2 deletions entity/pliny.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
{
"href": "https://twitter.com/elder_plinius",
"source": "Twitter",
"title": "Pliny the Liberator on Twitter"
"title": "Pliny on Twitter"
}
],
"name": "Pliny the Liberator"
"name": "Pliny"
}
2 changes: 1 addition & 1 deletion entity/simon_willison.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"href": "https://simonwillison.net",
"source": "Simon Willison",
"source": "simonwillison.net",
"title": "Personal blog"
}
],
Expand Down
5 changes: 5 additions & 0 deletions entity/tamir_ishay_sharbat.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"href": "https://twitter.com/tamirishaysh",
"source": "Twitter",
"title": "Tamir Ishay Sharbat on Twitter"
},
{
"href": "https://labs.zenity.io/authors/15e910e0-0161-49aa-acf4-a83ec581e372",
"source": "Zenity Labs",
"title": "Tamir Ishay Sharbat on Zenity Labs"
}
],
"name": "Tamir Ishay Sharbat"
Expand Down
22 changes: 22 additions & 0 deletions mitigation/content_security_policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"$id": "$gai-mitigation/content_security_policy",
"$schema": "../schema/mitigation.schema.json",
"$type": "mitigation",
"description": "A defense mechanism that helps protect against web browsing tools and markdown rendering for data exfiltration.\nWhen a user asks the AI system to access a URL, it will only access it if the URL is from a limited set of trusted domains.\n",
"external_references": [
{
"description": "The mechanism is explained in the context of Microsoft Copilot (previously Bing Chat). The page you are reading draws a lot of content from this reference.",
"href": "https://embracethered.com/blog/posts/2023/bing-chat-data-exfiltration-poc-and-fix/",
"source": "Embrace The Red",
"title": "Bing Chat: Data Exfiltration Exploit Explained"
}
],
"name": "Content Security Policy",
"object_references": [
{
"$id": "$gai-platform/microsoft_copilot",
"$type": "platform",
"description": "Microsoft Copilot can render URLs and links if they fall under these trusted domains:\n\n```\nth.bing.com\nwww.bing.com\nedgeservices.bing.com\nr.bing.com\n```"
}
]
}
21 changes: 21 additions & 0 deletions mitigation/index_based_browsing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"$id": "$gai-mitigation/index_based_browsing",
"$schema": "../schema/mitigation.schema.json",
"$type": "mitigation",
"description": "A defense mechanism that helps protect against use of web browser tools data exfiltration and initial access.\nWhen a user asks the AI system to access a URL, it will ask a web search and have access only to information that the index provides. The URL will is not accessed by the AI System at all.",
"external_references": [
{
"href": "https://learn.microsoft.com/en-us/copilot/microsoft-365/manage-public-web-access",
"source": "Microsoft",
"title": "Data, privacy, and security for web queries in Microsoft 365 Copilot and Microsoft Copilot"
}
],
"name": "Index-Based Browsing",
"object_references": [
{
"$id": "$gai-platform/microsoft_copilot_for_m365",
"$type": "platform",
"description": "Microsoft Copilot for M365 cannot browse websites. Instead, its web browsing tool has access to the data available on Bing's index for the relevant website page."
}
]
}
Loading

0 comments on commit a8b217d

Please sign in to comment.