Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New internal module "unarchive" #1918

Open
wants to merge 34 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
57622c2
Added an internal module to always extract different file types
domwhewell-sage Nov 4, 2024
2665bd9
Fix some tests
domwhewell-sage Nov 6, 2024
f9777a7
Merge branch 'dev' into extract_internal_module
domwhewell-sage Nov 6, 2024
f329ecb
Add `extra_opts` to ansible unarchive
domwhewell-sage Nov 6, 2024
95b4cbb
Ugh have to include the libs
domwhewell-sage Nov 6, 2024
363cd6d
Merge branch 'dev' into extract_internal_module
domwhewell-sage Nov 12, 2024
f72315f
Add a map of the different compression types, comment them until a te…
domwhewell-sage Nov 12, 2024
d679d0c
Merge branch 'dev' into extract_internal_module
domwhewell-sage Nov 21, 2024
9536b57
Added more compression formats to extract
domwhewell-sage Nov 21, 2024
3a68fbc
Add imports
domwhewell-sage Nov 21, 2024
d935444
Change to use OS commands
domwhewell-sage Dec 2, 2024
6c6a511
Made changes to the tests
domwhewell-sage Dec 8, 2024
b71841a
Remove jadx compatable types from compression map
domwhewell-sage Dec 8, 2024
bf8a1b3
Merge branch 'dev' into extract_internal_module
domwhewell-sage Dec 8, 2024
7db38fd
Rename to unarchive, move jar exclusions into module and restore helper
domwhewell-sage Dec 15, 2024
892663d
Change lzma to 7zip
domwhewell-sage Dec 19, 2024
9238d66
Merge branch 'dev' into extract_internal_module
domwhewell-sage Dec 20, 2024
3dfe07b
Remove apt_deps
domwhewell-sage Dec 20, 2024
12c68fb
Move file creation to setup_after_prep
domwhewell-sage Dec 20, 2024
0aa69b2
Swap unarchive and speculate
domwhewell-sage Dec 20, 2024
4e22bb2
Add rar to the `CORE_DEPS`
domwhewell-sage Dec 20, 2024
a4a3712
Remove rar install and move it to /test
domwhewell-sage Jan 1, 2025
7f3f222
include the test.rar file in the python test
domwhewell-sage Jan 6, 2025
5589a02
Dont create the directory without checking the compression type first
domwhewell-sage Jan 11, 2025
9a78768
List out the files in the rar folder to see why ubuntu is failing in …
domwhewell-sage Jan 11, 2025
3340882
List the parent folder
domwhewell-sage Jan 11, 2025
a50a775
Dont accept files that are not compressed
domwhewell-sage Jan 11, 2025
849924b
We dont want trufflehog re-scanning folders it has already done
domwhewell-sage Jan 11, 2025
fe4f456
Merge branch 'dev' into extract_internal_module
domwhewell-sage Jan 12, 2025
b58288b
Remove failing line
domwhewell-sage Jan 12, 2025
91b71f5
Try with an older rar file created on ubuntu:22.04
domwhewell-sage Jan 12, 2025
2548289
Use 7z instead to extract the rar file instead as ubuntu:22.04 has a …
domwhewell-sage Jan 12, 2025
bad3a44
Adding 7zip plugins for fedora as without it uses 7za
domwhewell-sage Jan 13, 2025
b21ab37
Add p7zip-plugins on fedora
domwhewell-sage Jan 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion bbot/core/helpers/depsinstaller/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class DepsInstaller:
"gcc": "gcc",
"bash": "bash",
"which": "which",
"unrar": "unrar-free",
"tar": "tar",
# debian why are you like this
"7z": [
Expand All @@ -47,6 +46,12 @@ class DepsInstaller:
"become": True,
"when": "ansible_facts['os_family'] != 'Debian'",
},
{
"name": "Install p7zip-plugins (Fedora)",
"package": {"name": ["p7zip-plugins"], "state": "present"},
"become": True,
"when": "ansible_facts['distribution'] == 'Fedora'",
},
],
}

Expand Down
6 changes: 6 additions & 0 deletions bbot/modules/filedownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class filedownload(BaseModule):
"swp", # Swap File (temporary file, often Vim)
"sxw", # OpenOffice.org Writer document
"tar.gz", # Gzip-Compressed Tar Archive
"tgz", # Gzip-Compressed Tar Archive
"tar", # Tar Archive
"txt", # Plain Text Document
"vbs", # Visual Basic Script
Expand All @@ -76,6 +77,11 @@ class filedownload(BaseModule):
"yaml", # YAML Ain't Markup Language
"yml", # YAML Ain't Markup Language
"zip", # Zip Archive
"lzma", # LZMA Compressed File
"rar", # RAR Compressed File
"7z", # 7-Zip Compressed File
"xz", # XZ Compressed File
"bz2", # Bzip2 Compressed File
],
"max_filesize": "10MB",
"base_64_encoded_file": "false",
Expand Down
79 changes: 79 additions & 0 deletions bbot/modules/internal/unarchive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from pathlib import Path
from bbot.modules.internal.base import BaseInternalModule
from bbot.core.helpers.libmagic import get_magic_info, get_compression


class unarchive(BaseInternalModule):
watched_events = ["FILESYSTEM"]
produced_events = ["FILESYSTEM"]
flags = ["passive", "safe"]
meta = {
"description": "Extract different types of files into folders on the filesystem",
"created_date": "2024-12-08",
"author": "@domwhewell-sage",
}

async def setup(self):
self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"]
self.compression_methods = {
"zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
"xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
"7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"rar": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
"gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
}
return True

async def filter_event(self, event):
if "file" in event.tags:
if event.data["magic_mime_type"] in self.ignore_compressions:
return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}"
if "compression" in event.data:
if not event.data["compression"] in self.compression_methods:
return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
else:
return False, f"Event is not a compressed file: {event.data['path']}"
else:
return False, "Event is not a file"
return True

async def handle_event(self, event):
path = Path(event.data["path"])
output_dir = path.parent / path.name.replace(".", "_")

# Use the appropriate extraction method based on the file type
self.info(f"Extracting {path} to {output_dir}")
success = await self.extract_file(path, output_dir)

# If the extraction was successful, emit the event
if success:
await self.emit_event(
{"path": str(output_dir)},
"FILESYSTEM",
tags=["folder", "unarchived-folder"],
parent=event,
context=f'extracted "{path}" to: {output_dir}',
)
else:
output_dir.rmdir()

async def extract_file(self, path, output_dir):
extension, mime_type, description, confidence = get_magic_info(path)
compression_format = get_compression(mime_type)
cmd_list = self.compression_methods.get(compression_format, [])
if cmd_list:
if not output_dir.exists():
self.helpers.mkdir(output_dir)
command = [s.format(filename=path, extract_dir=output_dir) for s in cmd_list]
try:
await self.run_process(command, check=True)
for item in output_dir.iterdir():
if item.is_file():
await self.extract_file(item, output_dir / item.stem)
except Exception as e:
self.warning(f"Error extracting {path}. Error: {e}")
return False
return True
4 changes: 2 additions & 2 deletions bbot/modules/trufflehog.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ async def filter_event(self, event):
else:
return False, "Deleted forks is not enabled"
else:
if "parsed-folder" in event.tags:
return False, "Not accepting parsed-folder events"
if "unarchived-folder" in event.tags:
return False, "Not accepting unarchived-folder events"
return True

async def handle_event(self, event):
Expand Down
6 changes: 3 additions & 3 deletions bbot/test/test_step_1/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,17 +342,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
monkeypatch.setattr("sys.argv", ["bbot", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate)" in caplog.text
assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate,unarchive)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 3/3 internal modules (aggregate,cloudcheck,dnsresolve)" in caplog.text
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,excavate)" in caplog.text
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text

# custom target type
out, err = capsys.readouterr()
Expand Down
10 changes: 9 additions & 1 deletion bbot/test/test_step_1/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,14 @@ def test_preset_module_resolution(clean_default_config):
# make sure we have the expected defaults
assert not preset.scan_modules
assert set(preset.output_modules) == {"python", "csv", "txt", "json"}
assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate", "cloudcheck", "dnsresolve"}
assert set(preset.internal_modules) == {
"aggregate",
"excavate",
"unarchive",
"speculate",
"cloudcheck",
"dnsresolve",
}
assert preset.modules == set(preset.output_modules).union(set(preset.internal_modules))

# make sure dependency resolution works as expected
Expand Down Expand Up @@ -553,6 +560,7 @@ def test_preset_module_resolution(clean_default_config):
"dnsresolve",
"aggregate",
"excavate",
"unarchive",
"txt",
"httpx",
"csv",
Expand Down
Loading
Loading