From 4f400212ba8fb58291518957cd5af23e06a4b0f5 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Mon, 7 Feb 2022 21:58:17 -0800 Subject: [PATCH] Add auto formatting and reduce pylint errors --- KickFIFO.py | 40 ++ README.md | 8 + Texture.py | 361 ++++++----- Trace.py | 1454 +++++++++++++++++++++++-------------------- XboxHelper.py | 229 +++++++ githooks/pre-commit | 65 ++ helper.py | 189 ------ nv2a-trace.py | 483 +++++++------- 8 files changed, 1591 insertions(+), 1238 deletions(-) create mode 100644 KickFIFO.py create mode 100644 XboxHelper.py create mode 100755 githooks/pre-commit delete mode 100644 helper.py diff --git a/KickFIFO.py b/KickFIFO.py new file mode 100644 index 0000000..67bce96 --- /dev/null +++ b/KickFIFO.py @@ -0,0 +1,40 @@ +"""Manages the kick_fifo.asm patch.""" + +# pylint: disable=consider-using-f-string +# pylint: disable=too-few-public-methods + +import struct +import XboxHelper + + +class _KickFIFO: + """Manages the kick_fifo.asm patch.""" + + def __init__(self, verbose=True): + self.kick_fifo_addr = None + self.verbose = verbose + + def _install_kicker(self, xbox): + if self.kick_fifo_addr is not None: + return + + with open("kick_fifo", "rb") as patch_file: + data = patch_file.read() + + self.kick_fifo_addr = XboxHelper.load_binary(xbox, data) + if self.verbose: + print("kick_fifo installed at 0x%08X" % self.kick_fifo_addr) + + def call(self, xbox, expected_put): + """Calls the kicker with the given argument.""" + self._install_kicker(xbox) + eax = xbox.call(self.kick_fifo_addr, struct.pack("> offset) & mask - - width = size[0] - height = size[1] - - if (len(channel_sizes) == 3): - mode = 'RGB' - elif (len(channel_sizes) == 4): - mode = 'RGBA' - else: - raise Exception("Unsupported channel_sizes %d" % len(channel_sizes)) - - img = Image.new(mode, (width, height)) - - #FIXME: Unswizzle data on the fly instead - if swizzled: - data = nv2a.Unswizzle(data, bits_per_pixel, (width, height), pitch) - - pixels = img.load() # create the pixel map - - assert(bits_per_pixel % 8 == 0) - - for y in range(height): - for x in range(width): - - pixel_offset = y * pitch + x * bits_per_pixel // 8 - pixel_bytes = data[pixel_offset:pixel_offset + bits_per_pixel // 8] - pixel_bits = int.from_bytes(pixel_bytes, byteorder='little') - - pixel_channels = () - for channel_offset, channel_size in zip(channel_offsets, channel_sizes): - channel_value = get_bits(pixel_bits, channel_offset, channel_size) - - # Normalize channel - if channel_size > 0: - channel_value /= (1 << channel_size) - 1 - channel_value = int(channel_value * 0xFF) - else: - channel_value = 0x00 - - pixel_channels += (channel_value,) - pixels[x, y] = pixel_channels - - return img - - -def surfaceColorFormatToTextureFormat(fmt, swizzled): - """Convert nv2a draw format to the equivalent Texture format.""" - if fmt == 0x3: # ARGB1555 - return 0x3 if swizzled else 0x1C - elif fmt == 0x5: # RGB565 - return 0x5 if swizzled else 0x11 - elif fmt == 0x7 or fmt == 0x8: # XRGB8888 - return 0x7 if swizzled else 0x1E - elif fmt == 0xC: # ARGB8888 - return 0x6 if swizzled else 0x12 - else: - raise Exception("Unknown color fmt %d (0x%X) %s" % (fmt, fmt, "swizzled" if swizzled else "unswizzled")) - - -def surfaceZetaFormatToTextureFormat(fmt, swizzled, is_float): - """Convert nv2a zeta format to the equivalent Texture format.""" - if fmt == 0x1: # Z16 - if is_float: - return 0x2D if swizzled else 0x31 +R5G6B5 = TextureDescription(16, (5, 6, 5), (11, 5, 0)) +A4R4G4B4 = TextureDescription(16, (4, 4, 4, 4), (8, 4, 0, 12)) +A1R5G5B5 = TextureDescription(16, (5, 5, 5, 1), (10, 5, 0, 15)) +X1R5G5B5 = TextureDescription(16, (5, 5, 5), (10, 5, 0)) +A8R8G8B8 = TextureDescription(32, (8, 8, 8, 8), (16, 8, 0, 24)) +X8R8G8B8 = TextureDescription(32, (8, 8, 8), (16, 8, 0)) + + +def _decode_texture( + data, size, pitch, swizzled, bits_per_pixel, channel_sizes, channel_offsets +): + """Convert the given texture data into a PIL.Image.""" + + # Check argument sanity + assert len(size) == 2 # FIXME: Support 1D and 3D? + assert len(channel_offsets) == len(channel_sizes) + + # Helper function to extract integer at bit offset with bit size + def get_bits(bits, offset, length): + mask = (1 << length) - 1 + return (bits >> offset) & mask + + width = size[0] + height = size[1] + + if len(channel_sizes) == 3: + mode = "RGB" + elif len(channel_sizes) == 4: + mode = "RGBA" else: - return 0x2C if swizzled else 0x30 - elif fmt == 0x2: # Z24S8 - if is_float: - return 0x2B if swizzled else 0x2F + raise Exception("Unsupported channel_sizes %d" % len(channel_sizes)) + + img = Image.new(mode, (width, height)) + + # FIXME: Unswizzle data on the fly instead + if swizzled: + data = nv2a.Unswizzle(data, bits_per_pixel, (width, height), pitch) + + pixels = img.load() # create the pixel map + + assert bits_per_pixel % 8 == 0 + + for y in range(height): + for x in range(width): + + pixel_offset = y * pitch + x * bits_per_pixel // 8 + pixel_bytes = data[pixel_offset : pixel_offset + bits_per_pixel // 8] + pixel_bits = int.from_bytes(pixel_bytes, byteorder="little") + + pixel_channels = () + for channel_offset, channel_size in zip(channel_offsets, channel_sizes): + channel_value = get_bits(pixel_bits, channel_offset, channel_size) + + # Normalize channel + if channel_size > 0: + channel_value /= (1 << channel_size) - 1 + channel_value = int(channel_value * 0xFF) + else: + channel_value = 0x00 + + pixel_channels += (channel_value,) + pixels[x, y] = pixel_channels + + return img + + +def surface_color_format_to_texture_format(fmt, swizzled): + """Convert nv2a draw format to the equivalent Texture format.""" + if fmt == 0x3: # ARGB1555 + return 0x3 if swizzled else 0x1C + + if fmt == 0x5: # RGB565 + return 0x5 if swizzled else 0x11 + + if fmt in [0x7, 0x08]: # XRGB8888 + return 0x7 if swizzled else 0x1E + + if fmt == 0xC: # ARGB8888 + return 0x6 if swizzled else 0x12 + + raise Exception( + "Unknown color fmt %d (0x%X) %s" + % (fmt, fmt, "swizzled" if swizzled else "unswizzled") + ) + + +def surface_zeta_format_to_texture_format(fmt, swizzled, is_float): + """Convert nv2a zeta format to the equivalent Texture format.""" + if fmt == 0x1: # Z16 + if is_float: + return 0x2D if swizzled else 0x31 + return 0x2C if swizzled else 0x30 + + if fmt == 0x2: # Z24S8 + if is_float: + return 0x2B if swizzled else 0x2F + return 0x2A if swizzled else 0x2E + + raise Exception( + "Unknown zeta fmt %d (0x%X) %s %s" + % ( + fmt, + fmt, + "float" if is_float else "fixed", + "swizzled" if swizzled else "unswizzled", + ) + ) + + +def dump_texture(xbox, offset, pitch, fmt_color, width, height): + """Convert the texture at the given offset into a PIL.Image.""" + img = None + + if fmt_color == 0x0: + tex_info = (True, Y8) + elif fmt_color == 0x1: + tex_info = (True, AY8) + elif fmt_color == 0x2: + tex_info = (True, A1R5G5B5) + elif fmt_color == 0x3: + tex_info = (True, X1R5G5B5) + elif fmt_color == 0x4: + tex_info = (True, A4R4G4B4) + elif fmt_color == 0x5: + tex_info = (True, R5G6B5) + elif fmt_color == 0x6: + tex_info = (True, A8R8G8B8) + elif fmt_color == 0x7: + tex_info = (True, X8R8G8B8) + elif fmt_color == 0xB: + img = Image.new( + "RGB", (width, height), (255, 0, 255, 255) + ) # FIXME! Palette mode! + elif fmt_color == 0xC: # DXT1 + data = xbox.read(0x80000000 | offset, width * height // 2) + img = Image.frombytes("RGBA", (width, height), data, "bcn", 1) # DXT1 + elif fmt_color == 0xE: # DXT3 + data = xbox.read(0x80000000 | offset, width * height * 1) + img = Image.frombytes("RGBA", (width, height), data, "bcn", 2) # DXT3 + elif fmt_color == 0xF: # DXT5 + data = xbox.read(0x80000000 | offset, width * height * 1) + img = Image.frombytes("RGBA", (width, height), data, "bcn", 3) # DXT5 + elif fmt_color == 0x10: + tex_info = (False, A1R5G5B5) + elif fmt_color == 0x11: + tex_info = (False, R5G6B5) + elif fmt_color == 0x12: + tex_info = (False, A8R8G8B8) + elif fmt_color == 0x19: + tex_info = (True, A8) + elif fmt_color == 0x1A: + tex_info = (True, A8Y8) + elif fmt_color == 0x1C: + tex_info = (False, X1R5G5B5) + elif fmt_color == 0x1D: + tex_info = (False, A4R4G4B4) + elif fmt_color == 0x1E: + tex_info = (False, X8R8G8B8) + elif fmt_color == 0x2E: + img = Image.new( + "RGB", (width, height), (255, 0, 255, 255) + ) # FIXME! Depth format + elif fmt_color == 0x30: + img = Image.new( + "RGB", (width, height), (255, 0, 255, 255) + ) # FIXME! Depth format + elif fmt_color == 0x31: + img = Image.new( + "RGB", (width, height), (255, 0, 255, 255) + ) # FIXME! Depth format else: - return 0x2A if swizzled else 0x2E - else: - raise Exception("Unknown zeta fmt %d (0x%X) %s %s" % (fmt, fmt, "float" if is_float else "fixed", "swizzled" if swizzled else "unswizzled")) - - -def dumpTexture(xbox, offset, pitch, fmt_color, width, height): - """Convert the texture at the given offset into a PIL.Image.""" - img = None - - if fmt_color == 0x0: tex_info = (True, Y8) - elif fmt_color == 0x1: tex_info = (True, AY8) - elif fmt_color == 0x2: tex_info = (True, A1R5G5B5) - elif fmt_color == 0x3: tex_info = (True, X1R5G5B5) - elif fmt_color == 0x4: tex_info = (True, A4R4G4B4) - elif fmt_color == 0x5: tex_info = (True, R5G6B5) - elif fmt_color == 0x6: tex_info = (True, A8R8G8B8) - elif fmt_color == 0x7: tex_info = (True, X8R8G8B8) - elif fmt_color == 0xB: img = Image.new('RGB', (width, height), (255, 0, 255, 255)) #FIXME! Palette mode! - elif fmt_color == 0xC: # DXT1 - data = xbox.read(0x80000000 | offset, width * height // 2) - img = Image.frombytes('RGBA', (width, height), data, 'bcn', 1) # DXT1 - elif fmt_color == 0xE: # DXT3 - data = xbox.read(0x80000000 | offset, width * height * 1) - img = Image.frombytes('RGBA', (width, height), data, 'bcn', 2) # DXT3 - elif fmt_color == 0xF: # DXT5 - data = xbox.read(0x80000000 | offset, width * height * 1) - img = Image.frombytes('RGBA', (width, height), data, 'bcn', 3) # DXT5 - elif fmt_color == 0x10: tex_info = (False, A1R5G5B5) - elif fmt_color == 0x11: tex_info = (False, R5G6B5) - elif fmt_color == 0x12: tex_info = (False, A8R8G8B8) - elif fmt_color == 0x19: tex_info = (True, A8) - elif fmt_color == 0x1A: tex_info = (True, A8Y8) - elif fmt_color == 0x1C: tex_info = (False, X1R5G5B5) - elif fmt_color == 0x1D: tex_info = (False, A4R4G4B4) - elif fmt_color == 0x1E: tex_info = (False, X8R8G8B8) - elif fmt_color == 0x2E: img = Image.new('RGB', (width, height), (255, 0, 255, 255)) #FIXME! Depth format - elif fmt_color == 0x30: img = Image.new('RGB', (width, height), (255, 0, 255, 255)) #FIXME! Depth format - elif fmt_color == 0x31: img = Image.new('RGB', (width, height), (255, 0, 255, 255)) #FIXME! Depth format - else: - raise Exception("Unknown texture format: 0x%X" % fmt_color) - - # Some formats might have been parsed already - if img == None: - - swizzled = tex_info[0] - format_info = tex_info[1] - - # Parse format info - bits_per_pixel, channel_sizes, channel_offsets = format_info - - #FIXME: Avoid this nasty ~~convience feature~~ hack - if pitch == 0: - pitch = width * bits_per_pixel // 8 - - #FIXME: Might want to skip the empty area if pitch and width diverge? - data = xbox.read(0x80000000 | offset, pitch * height) - img = _decodeTexture(data, (width, height), pitch, swizzled, bits_per_pixel, channel_sizes, channel_offsets) - - return img - - + raise Exception("Unknown texture format: 0x%X" % fmt_color) + + # Some formats might have been parsed already + if img is None: + + swizzled = tex_info[0] + format_info = tex_info[1] + + # Parse format info + bits_per_pixel, channel_sizes, channel_offsets = format_info + + # FIXME: Avoid this nasty ~~convience feature~~ hack + if pitch == 0: + pitch = width * bits_per_pixel // 8 + + # FIXME: Might want to skip the empty area if pitch and width diverge? + data = xbox.read(0x80000000 | offset, pitch * height) + img = _decode_texture( + data, + (width, height), + pitch, + swizzled, + bits_per_pixel, + channel_sizes, + channel_offsets, + ) + + return img diff --git a/Trace.py b/Trace.py index d24bcac..0a59431 100644 --- a/Trace.py +++ b/Trace.py @@ -1,713 +1,823 @@ +"""Provides methods to trace nv2a commands.""" + +# FIXME: DONOTSUBMIT REMOVE BELOW +# pylint: disable=fixme + +# pylint: disable=consider-using-f-string +# pylint: disable=missing-function-docstring +# pylint: disable=too-many-arguments +# pylint: disable=too-many-instance-attributes +# pylint: disable=too-many-locals +# pylint: disable=too-many-statements + import atexit import os import struct import time import traceback +import KickFIFO import Texture +import XboxHelper -from helper import * +class MaxFlipExceeded(Exception): + """Exception to indicate the maximum number of buffer flips has been reached.""" + + +# pylint: disable=invalid-name OutputDir = "out" PixelDumping = True TextureDumping = True SurfaceDumping = True DebugPrint = False MaxFrames = 0 -class MaxFlipExceeded(Exception): - pass +# pylint: enable=invalid-name -pgraph_dump = None +# pylint: disable=invalid-name exchange_u32_addr = None -kick_fifo_addr = None -debugLog = os.path.join(OutputDir, "debug.html") - - -def _addHTML(xx): - f = open(debugLog,"a") - f.write("") - for x in xx: - f.write("%s" % x) - f.write("\n") - f.close() -f = open(debugLog,"w") -f.write("\n") -#FIXME: atexit close tags.. but yolo! -f.close() -_addHTML(["#", "Opcode / Method", "..."]) - - -def _htmlPrint(s): - print(s) - _addHTML([s]) - - -def _loadBinary(xbox, code): - code_addr = xbox.ke.MmAllocateContiguousMemory(len(code)) - print("Allocated 0x%08X" % code_addr) - def free_code(): - print("Free'ing 0x%08X" % code_addr) - xbox.ke.MmFreeContiguousMemory(code_addr) - atexit.register(free_code) - xbox.write(code_addr, code) - return code_addr - - -def _exchangeU32Call(xbox, address, value): - global exchange_u32_addr - if exchange_u32_addr is None: - - with open("exchange_u32", "rb") as infile: - data = infile.read() - - exchange_u32_addr = _loadBinary(xbox, data) - print("exchange_u32 installed at 0x%08X" % exchange_u32_addr) - return xbox.call(exchange_u32_addr, struct.pack("> 8) & 0x7F - width_shift = (fmt >> 20) & 0xF - height_shift = (fmt >> 24) & 0xF - width = 1 << width_shift - height = 1 << height_shift - - if True: - pass - #FIXME: self.out("tex-%d.bin" % (i), xbox.read(0x80000000 | offset, pitch * height)) - - - print("Texture %d [0x%08X, %d x %d (pitch: 0x%X), format 0x%X]" % (i, offset, width, height, pitch, fmt_color)) - img = Texture.dumpTexture(xbox, offset, pitch, fmt_color, width, height) - - if img != None: - img.save(os.path.join(OutputDir, path)) - extraHTML += ['%s' % (path, path)] - - return extraHTML - - def DumpSurfaces(self, xbox, data, *args): - global PixelDumping - global SurfaceDumping - if not PixelDumping or not SurfaceDumping: - return [] - - color_pitch = xbox.read_u32(0xFD400858) - depth_pitch = xbox.read_u32(0xFD40085C) - - color_offset = xbox.read_u32(0xFD400828) - depth_offset = xbox.read_u32(0xFD40082C) - - color_base = xbox.read_u32(0xFD400840) - depth_base = xbox.read_u32(0xFD400844) - - #FIXME: Is this correct? pbkit uses _base, but D3D seems to use _offset? - color_offset += color_base - depth_offset += depth_base - - surface_clip_x = xbox.read_u32(0xFD4019B4) - surface_clip_y = xbox.read_u32(0xFD4019B8) - - draw_format = xbox.read_u32(0xFD400804) - surface_type = xbox.read_u32(0xFD400710) - swizzle_unk = xbox.read_u32(0xFD400818) - - swizzle_unk2 = xbox.read_u32(0xFD40086c) - - clip_x = (surface_clip_x >> 0) & 0xFFFF - clip_y = (surface_clip_y >> 0) & 0xFFFF - - clip_w = (surface_clip_x >> 16) & 0xFFFF - clip_h = (surface_clip_y >> 16) & 0xFFFF - - surface_anti_aliasing = (surface_type >> 4) & 3 - - clip_x, clip_y = apply_anti_aliasing_factor(surface_anti_aliasing, clip_x, clip_y) - clip_w, clip_h = apply_anti_aliasing_factor(surface_anti_aliasing, clip_w, clip_h) - - width = clip_x + clip_w - height = clip_y + clip_h - - #FIXME: 128 x 128 [pitch = 256 (0x100)], at 0x01AA8000 [PGRAPH: 0x01AA8000?], format 0x5, type: 0x21000002, swizzle: 0x7070000 [used 0] - - #FIXME: This does not seem to be a good field for this - #FIXME: Patched to give 50% of coolness - swizzled = ((surface_type & 3) == 2) - #FIXME: if surface_type is 0, we probably can't even draw.. - - format_color = (draw_format >> 12) & 0xF - format_depth = (draw_format >> 18) & 0x3 - - fmt_color = Texture.surfaceColorFormatToTextureFormat(format_color, swizzled) - #fmt_depth = Texture.surface_zeta_format_to_texture_format(format_depth) - - # Dump stuff we might care about - if True: - self.out("pgraph.bin", _dumpPGRAPH(xbox)) - self.out("pfb.bin", _dumpPFB(xbox)) - if color_offset != 0x00000000: - self.out("mem-2.bin", xbox.read(0x80000000 | color_offset, color_pitch * height)) - if depth_offset != 0x00000000: - self.out("mem-3.bin", xbox.read(0x80000000 | depth_offset, depth_pitch * height)) - self.out("pgraph-rdi-vp-instructions.bin", _readPGRAPHRDI(xbox, 0x100000, 136 * 4)) - self.out("pgraph-rdi-vp-constants0.bin", _readPGRAPHRDI(xbox, 0x170000, 192 * 4)) - self.out("pgraph-rdi-vp-constants1.bin", _readPGRAPHRDI(xbox, 0xCC0000, 192 * 4)) - - #FIXME: Respect anti-aliasing - - path = "command%d--color.png" % (self.commandCount) - extraHTML = [] - extraHTML += ['%s' % (path, path)] - extraHTML += ['%d x %d [pitch = %d (0x%X)], at 0x%08X, format 0x%X, type: 0x%X, swizzle: 0x%08X, 0x%08X [used %d]' % (width, height, color_pitch, color_pitch, color_offset, format_color, surface_type, swizzle_unk, swizzle_unk2, swizzled)] - print(extraHTML[-1]) - - try: - if color_offset == 0x00000000: - print("Color offset is null") - raise Exception() - else: - print("Attempting to dump surface; swizzle: %s" % (str(swizzled))) - img = Texture.dumpTexture(xbox, color_offset, color_pitch, fmt_color, width, height) - except: - img = None - print("Failed to dump color surface") - traceback.print_exc() - - if img != None: - - # Hack to remove alpha channel - if True: - img = img.convert('RGB') - - img.save(os.path.join(OutputDir, path)) - - return extraHTML - - def HandleBegin(self, xbox, data, *args): - - # Avoid handling End - if data == 0: - return [] - - extraHTML = [] - #extraHTML += self.DumpSurfaces(xbox, data, *args) - #extraHTML += self.DumpTextures(xbox, data, *args) - return extraHTML - - def HandleEnd(self, xbox, data, *args): - - # Avoid handling Begin - if data != 0: - return [] - - extraHTML = [] - extraHTML += self.DumpSurfaces(xbox, data, *args) - return extraHTML - - def beginPGRAPHRecord(self, xbox, data, *args): - global pgraph_dump - pgraph_dump = _dumpPGRAPH(xbox) - _addHTML(["", "", "", "", "Dumped PGRAPH for later"]) - return [] - - def endPGRAPHRecord(self, xbox, data, *args): - global pgraph_dump - - # Debug feature to understand PGRAPH - if pgraph_dump != None: - new_pgraph_dump = _dumpPGRAPH(xbox) - - # This blacklist was created from a CLEAR_COLOR, CLEAR - blacklist = [] - blacklist += [0x0040000C] # 0xF3DF0479 → 0xF3DE04F9 - blacklist += [0x0040002C] # 0xF3DF37FF → 0xF3DE37FF - blacklist += [0x0040010C] # 0x03DF0000 → 0x020000F1 - blacklist += [0x0040012C] # 0x13DF379F → 0x131A37FF - blacklist += [0x00400704] # 0x00001D9C → 0x00001D94 - blacklist += [0x00400708] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040070C] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040072C] # 0x01DF2700 → 0x000027F1 - blacklist += [0x00400740] # 0x01DF37DD → 0x01DF37FF - blacklist += [0x00400744] # 0x18111D9C → 0x18111D94 - blacklist += [0x00400748] # 0x01DF0011 → 0x000000F1 - blacklist += [0x0040074C] # 0x01DF0097 → 0x000000F7 - blacklist += [0x00400750] # 0x00DF005C → 0x00DF0064 - blacklist += [0x00400760] # 0x000000CC → 0x000000FF - blacklist += [0x00400764] # 0x08001D9C → 0x08001D94 - blacklist += [0x00400768] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040076C] # 0x01DF0000 → 0x000000F1 - blacklist += [0x00400788] # 0x01DF110A → 0x000011FB - blacklist += [0x004007A0] # 0x00200100 → 0x00201D70 - blacklist += [0x004007A4] # 0x00200100 → 0x00201D70 - blacklist += [0x004007A8] # 0x00200100 → 0x00201D70 - blacklist += [0x004007AC] # 0x00200100 → 0x00201D70 - blacklist += [0x004007B0] # 0x00200100 → 0x00201D70 - blacklist += [0x004007B4] # 0x00200100 → 0x00201D70 - blacklist += [0x004007B8] # 0x00200100 → 0x00201D70 - blacklist += [0x004007BC] # 0x00200100 → 0x00201D70 - blacklist += [0x004007C0] # 0x00000000 → 0x000006C9 - blacklist += [0x004007C4] # 0x00000000 → 0x000006C9 - blacklist += [0x004007C8] # 0x00000000 → 0x000006C9 - blacklist += [0x004007CC] # 0x00000000 → 0x000006C9 - blacklist += [0x004007D0] # 0x00000000 → 0x000006C9 - blacklist += [0x004007D4] # 0x00000000 → 0x000006C9 - blacklist += [0x004007D8] # 0x00000000 → 0x000006C9 - blacklist += [0x004007DC] # 0x00000000 → 0x000006C9 - blacklist += [0x004007E0] # 0x00000000 → 0x000006C9 - blacklist += [0x004007E4] # 0x00000000 → 0x000006C9 - blacklist += [0x004007E8] # 0x00000000 → 0x000006C9 - blacklist += [0x004007EC] # 0x00000000 → 0x000006C9 - blacklist += [0x004007F0] # 0x00000000 → 0x000006C9 - blacklist += [0x004007F4] # 0x00000000 → 0x000006C9 - blacklist += [0x004007F8] # 0x00000000 → 0x000006C9 - blacklist += [0x004007FC] # 0x00000000 → 0x000006C9 - blacklist += [0x00400D6C] # 0x00000000 → 0xFF000000 - blacklist += [0x0040110C] # 0x03DF0000 → 0x020000F1 - blacklist += [0x0040112C] # 0x13DF379F → 0x131A37FF - blacklist += [0x00401704] # 0x00001D9C → 0x00001D94 - blacklist += [0x00401708] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040170C] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040172C] # 0x01DF2700 → 0x000027F1 - blacklist += [0x00401740] # 0x01DF37FD → 0x01DF37FF - blacklist += [0x00401744] # 0x18111D9C → 0x18111D94 - blacklist += [0x00401748] # 0x01DF0011 → 0x000000F1 - blacklist += [0x0040174C] # 0x01DF0097 → 0x000000F7 - blacklist += [0x00401750] # 0x00DF0064 → 0x00DF006C - blacklist += [0x00401760] # 0x000000CC → 0x000000FF - blacklist += [0x00401764] # 0x08001D9C → 0x08001D94 - blacklist += [0x00401768] # 0x01DF0000 → 0x000000F1 - blacklist += [0x0040176C] # 0x01DF0000 → 0x000000F1 - blacklist += [0x00401788] # 0x01DF110A → 0x000011FB - blacklist += [0x004017A0] # 0x00200100 → 0x00201D70 - blacklist += [0x004017A4] # 0x00200100 → 0x00201D70 - blacklist += [0x004017A8] # 0x00200100 → 0x00201D70 - blacklist += [0x004017AC] # 0x00200100 → 0x00201D70 - blacklist += [0x004017B0] # 0x00200100 → 0x00201D70 - blacklist += [0x004017B4] # 0x00200100 → 0x00201D70 - blacklist += [0x004017B8] # 0x00200100 → 0x00201D70 - blacklist += [0x004017BC] # 0x00200100 → 0x00201D70 - blacklist += [0x004017C0] # 0x00000000 → 0x000006C9 - blacklist += [0x004017C4] # 0x00000000 → 0x000006C9 - blacklist += [0x004017C8] # 0x00000000 → 0x000006C9 - blacklist += [0x004017CC] # 0x00000000 → 0x000006C9 - blacklist += [0x004017D0] # 0x00000000 → 0x000006C9 - blacklist += [0x004017D4] # 0x00000000 → 0x000006C9 - blacklist += [0x004017D8] # 0x00000000 → 0x000006C9 - blacklist += [0x004017DC] # 0x00000000 → 0x000006C9 - blacklist += [0x004017E0] # 0x00000000 → 0x000006C9 - blacklist += [0x004017E4] # 0x00000000 → 0x000006C9 - blacklist += [0x004017E8] # 0x00000000 → 0x000006C9 - blacklist += [0x004017EC] # 0x00000000 → 0x000006C9 - blacklist += [0x004017F0] # 0x00000000 → 0x000006C9 - blacklist += [0x004017F4] # 0x00000000 → 0x000006C9 - blacklist += [0x004017F8] # 0x00000000 → 0x000006C9 - blacklist += [0x004017FC] # 0x00000000 → 0x000006C9 - #blacklist += [0x0040186C] # 0x00000000 → 0xFF000000 # CLEAR COLOR - blacklist += [0x0040196C] # 0x00000000 → 0xFF000000 - blacklist += [0x00401C6C] # 0x00000000 → 0xFF000000 - blacklist += [0x00401D6C] # 0x00000000 → 0xFF000000 - - for i in range(len(pgraph_dump) // 4): - off = 0x00400000 + i * 4 - if off in blacklist: - continue - word = struct.unpack_from("= MaxFrames: - raise MaxFlipExceeded() - return [] - - def HandleSetTexture(self, xbox, data, *args): - pass - #FIXME: Dump texture here? - - def filterPGRAPHMethod(self, xbox, method): - # Do callback for pre-method - if method in self.method_callbacks: - return self.method_callbacks[method] - return [], [] - - def recordPGRAPHMethod(self, xbox, method_info, data, pre_info, post_info): - if data is not None: - dataf = struct.unpack(""] + pre_info + post_info) - - - - def WritePUT(self, xbox, target): - - prev_target = self.target_dma_put_addr - prev_real = self.real_dma_put_addr - - real = _exchangeU32(xbox, dma_put_addr, target) - self.target_dma_put_addr = target - - # It must point where we pointed previously, otherwise something is broken - if (real != prev_target): - _htmlPrint("New real PUT (0x%08X -> 0x%08X) while changing hook 0x%08X -> 0x%08X" % (prev_real, real, prev_target, target)) - s1 = xbox.read_u32(put_state) - if s1 & 1: - print("PUT was modified and pusher was already active!") - time.sleep(60.0) - self.real_dma_put_addr = real - #traceback.print_stack() - - def run_fifo(self, xbox, xbox_helper, put_target): - global DebugPrint - - # Queue the commands - self.WritePUT(xbox, put_target) - - #FIXME: we can avoid this read in some cases, as we should know where we are - self.real_dma_get_addr = xbox.read_u32(dma_get_addr) - - - _addHTML(["WARNING", "Running FIFO (GET: 0x%08X -- PUT: 0x%08X / 0x%08X)" % (self.real_dma_get_addr, put_target, self.real_dma_put_addr)]) - - # Loop while this command is being ran. - # This is necessary because a whole command might not fit into CACHE. - # So we have to process it chunk by chunk. - command_base = self.real_dma_get_addr - #FIXME: This used to be a check which made sure that `v_dma_get_addr` did - # never leave the known PB. - while self.real_dma_get_addr != put_target: - if DebugPrint: print("At 0x%08X, target is 0x%08X (Real: 0x%08X)" % (self.real_dma_get_addr, put_target, self.real_dma_put_addr)) - - # Disable PGRAPH, so it can't run anything from CACHE. - xbox_helper.disable_pgraph_fifo() - xbox_helper.wait_until_pgraph_idle() - - # This scope should be atomic. - if True: - - # Avoid running bad code, if the PUT was modified sometime during - # this command. - self.WritePUT(xbox, self.target_dma_put_addr) - - # Kick commands into CACHE. - _kickFifo(xbox, self.target_dma_put_addr) - - #print("PUT STATE 0x%08X" % xbox.read_u32(0xFD003220)) - - - # Run the commands we have moved to CACHE, by enabling PGRAPH. - xbox_helper.enable_pgraph_fifo() - - # Get the updated PB address. - self.real_dma_get_addr = xbox.read_u32(dma_get_addr) - - # This is just to confirm that nothing was modified in the final chunk - self.WritePUT(xbox, self.target_dma_put_addr) - - def parsePushBufferCommand(self, xbox, get_addr): - global DebugPrint - - # Retrieve command type from Xbox - word = xbox.read_u32(0x80000000 | get_addr) - _addHTML(["", "", "", "@0x%08X: DATA: 0x%08X" % (get_addr, word)]) - - #FIXME: Get where this command ends - next_parser_addr = parseCommand(get_addr, word, DebugPrint) - - # If we don't know where this command ends, we have to abort. - if next_parser_addr == 0: - return None, 0 - - # Check which method it is. - if ((word & 0xe0030003) == 0) or ((word & 0xe0030003) == 0x40000000): - # methods - method = word & 0x1fff; - subchannel = (word >> 13) & 7; - method_count = (word >> 18) & 0x7ff; - method_nonincreasing = word & 0x40000000; - - method_info = {} - method_info['address'] = get_addr - method_info['method'] = method - method_info['nonincreasing'] = method_nonincreasing - method_info['subchannel'] = subchannel - - # Download this command from Xbox - if (method_count == 0): - # Halo: CE has cases where method_count is 0?! - html_print("Warning: Command 0x%X with method_count == 0\n" % method) - data = [] - else: - command = xbox.read(0x80000000 | (get_addr + 4), method_count * 4) - - #FIXME: Unpack all of them? - data = struct.unpack("<%dL" % method_count, command) - assert(len(data) == method_count) - method_info['data'] = data - else: - method_info = None - - return method_info, next_parser_addr - - def filterPushBufferCommand(self, xbox, method_info): - - pre_callbacks = [] - post_callbacks = [] - - method = method_info['method'] - for data in method_info['data']: - pre_callbacks_this, post_callbacks_this = self.filterPGRAPHMethod(xbox, method) - - # Queue the callbacks - pre_callbacks += pre_callbacks_this - post_callbacks += post_callbacks_this - - if not method_info['nonincreasing']: - method += 4 - - return pre_callbacks, post_callbacks - - def recordPushBufferCommand(self, xbox, address, method_info, pre_info, post_info): - orig_method = method_info['method'] - - # Put info in debug HTML - _addHTML(["%d" % self.commandCount, "%s" % method_info]) - for data in method_info['data']: - - self.recordPGRAPHMethod(xbox, method_info, data, pre_info, post_info) - - if not method_info['nonincreasing']: - method_info['method'] += 4 - - # Handle special case from Halo: CE where there are commands with no data. - if not method_info['data']: - self.recordPGRAPHMethod(xbox, method_info, None, pre_info, post_info) - - method_info['method'] = orig_method - - self.commandCount += 1 - - return - - def processPushBufferCommand(self, xbox, xbox_helper, parser_addr): - _addHTML(["WARNING", "Starting FIFO parsing from 0x%08X -- 0x%08X" % (parser_addr, self.real_dma_put_addr)]) +# pylint: enable=invalid-name - if parser_addr == self.real_dma_put_addr: - unprocessed_bytes = 0 - else: - # Filter commands and check where it wants to go to - method_info, post_addr = self.parsePushBufferCommand(xbox, parser_addr) +def _exchange_u32(xbox, address, value): + global exchange_u32_addr + if exchange_u32_addr is None: + with open("exchange_u32", "rb") as infile: + data = infile.read() - # We have a problem if we can't tell where to go next - assert(post_addr != 0) + exchange_u32_addr = XboxHelper.load_binary(xbox, data) + print("exchange_u32 installed at 0x%08X" % exchange_u32_addr) + return xbox.call(exchange_u32_addr, struct.pack(" 0: - - # Go where we want to go - self.run_fifo(xbox, xbox_helper, parser_addr) + # Return the PFB dump + assert len(buffer) == 0x1000 + return bytes(buffer) - # Do the pre callbacks before running the command - #FIXME: assert we are where we wanted to be - for callback in pre_callbacks: - pre_info += callback(xbox, method_info['data'][0]) +def _read_pgraph_rdi(xbox, offset, count): + # FIXME: Assert pusher access is disabled + # FIXME: Assert PGRAPH idle + + NV10_PGRAPH_RDI_INDEX = 0xFD400750 + NV10_PGRAPH_RDI_DATA = 0xFD400754 + + xbox.write_u32(NV10_PGRAPH_RDI_INDEX, offset) + data = bytearray() + for _ in range(count): + word = xbox.read_u32(NV10_PGRAPH_RDI_DATA) + data += struct.pack(" 0: + # FIXME: Restore original RDI? + # FIXME: Assert the conditions from entry have not changed + return data - # If we reached target, we can't step again without leaving valid buffer - assert(parser_addr != self.real_dma_put_addr) - # Go where we want to go (equivalent to step) - self.run_fifo(xbox, xbox_helper, post_addr) +class HTMLLog: + """Manages the HTML log file.""" - # We have processed all bytes now - unprocessed_bytes = 0 + def __init__(self, path): + self.path = path - # Do all post callbacks - for callback in post_callbacks: - post_info += callback(xbox, method_info['data'][0]) + with open(path, "w", encoding="utf8") as logfile: + # pylint: disable=line-too-long + logfile.write( + "" + "
\n" + ) + # pylint: enable=line-too-long + + self.log(["#", "Opcode / Method", "..."]) + atexit.register(self._close_tags) - # Add the pushbuffer command to log - self.recordPushBufferCommand(xbox, parser_addr, method_info, pre_info, post_info) + def _close_tags(self): + with open(self.path, "a", encoding="utf8") as logfile: + logfile.write("
") + + def log(self, values): + """Append the given values to the HTML log.""" + with open(self.path, "a", encoding="utf8") as logfile: + logfile.write("") + for val in values: + logfile.write("%s" % val) + logfile.write("\n") + + def print_log(self, message): + """Print the given string and append it to the HTML log.""" + print(message) + self.log([message]) + + +class NV2ALog: + """Manages the nv2a log file.""" + + def __init__(self, path): + self.path = path + + with open(self.path, "w", encoding="utf8") as logfile: + logfile.write("xemu style NV2A log from nv2a-trace.py") + + def log(self, message): + """Append the given string to the nv2a log.""" + with open(self.path, "a", encoding="utf8") as logfile: + logfile.write(message) + + def log_method(self, method_info, data, pre_info, post_info): + """Append a line describing the given pgraph call to the nv2a log.""" + with open(self.path, "a", encoding="utf8") as logfile: + logfile.write( + "nv2a: pgraph method (%d): 0x97 -> 0x%x (0x%x)\n" + % (method_info["subchannel"], method_info["method"], data) + ) + + if Nv2aLogMethodDetails: + logfile.write("Method info:\n") + logfile.write("Address: 0x%X\n" % method_info["address"]) + logfile.write("Method: 0x%X\n" % method_info["method"]) + logfile.write("Nonincreasing: %d\n" % method_info["nonincreasing"]) + logfile.write("Subchannel: 0x%X\n" % method_info["subchannel"]) + logfile.write("data:\n") + logfile.write(str(data)) + logfile.write("\n\n") + logfile.write("pre_info: %s\n" % pre_info) + logfile.write("post_info: %s\n" % post_info) + + +class Tracer: + """Performs tracing of the xbox nv2a state.""" + + def __init__(self, dma_get_addr, dma_put_addr): + self.html_log = HTMLLog(os.path.join(OutputDir, "debug.html")) + self.nv2a_log = NV2ALog(os.path.join(OutputDir, "nv2a_log.txt")) + self.flip_stall_count = 0 + self.command_count = 0 + + self.real_dma_get_addr = dma_get_addr + self.real_dma_put_addr = dma_put_addr + self.target_dma_put_addr = dma_get_addr + + self.pgraph_dump = None + + self.method_callbacks = {} + self._hook_methods() + + def hook_method(self, _obj, method, pre_hooks, post_hooks): + """Registers pre- and post-run hooks for the given method.""" + # TODO: Respect object parameter. + print("Registering method hook for 0x%04X" % method) + self.method_callbacks[method] = pre_hooks, post_hooks + + @property + def recorded_flip_stall_count(self): + return self.flip_stall_count + + @property + def recorded_command_count(self): + return self.command_count + + def run_fifo(self, xbox, xbox_helper, put_target): + # Queue the commands + self._write_put(xbox, put_target) + + # FIXME: we can avoid this read in some cases, as we should know where we are + self.real_dma_get_addr = xbox.read_u32(XboxHelper.DMA_GET_ADDR) + + self.html_log.log( + [ + "WARNING", + "Running FIFO (GET: 0x%08X -- PUT: 0x%08X / 0x%08X)" + % (self.real_dma_get_addr, put_target, self.real_dma_put_addr), + ] + ) + + # Loop while this command is being ran. + # This is necessary because a whole command might not fit into CACHE. + # So we have to process it chunk by chunk. + # FIXME: This used to be a check which made sure that `v_dma_get_addr` did + # never leave the known PB. + while self.real_dma_get_addr != put_target: + if DebugPrint: + print( + "At 0x%08X, target is 0x%08X (Real: 0x%08X)" + % (self.real_dma_get_addr, put_target, self.real_dma_put_addr) + ) + + # Disable PGRAPH, so it can't run anything from CACHE. + xbox_helper.disable_pgraph_fifo() + xbox_helper.wait_until_pgraph_idle() - # Move parser to the next instruction - parser_addr = post_addr + # This scope should be atomic. + # Avoid running bad code, if the PUT was modified sometime during + # this command. + self._write_put(xbox, self.target_dma_put_addr) + + # Kick commands into CACHE. + KickFIFO.kick(xbox, self.target_dma_put_addr) + + # print("PUT STATE 0x%08X" % xbox.read_u32(0xFD003220)) + + # Run the commands we have moved to CACHE, by enabling PGRAPH. + xbox_helper.enable_pgraph_fifo() + + # Get the updated PB address. + self.real_dma_get_addr = xbox.read_u32(XboxHelper.DMA_GET_ADDR) + + # This is just to confirm that nothing was modified in the final chunk + self._write_put(xbox, self.target_dma_put_addr) + + def dump_textures(self, xbox, data, *args): + if not PixelDumping or not TextureDumping: + return [] + + extra_html = [] + + for i in range(4): + path = "command%d--tex_%d.png" % (self.command_count, i) + + offset = xbox.read_u32(0xFD401A24 + i * 4) # NV_PGRAPH_TEXOFFSET0 + pitch = ( + 0 # xbox.read_u32(0xFD4019DC + i * 4) # NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH + ) + fmt = xbox.read_u32(0xFD401A04 + i * 4) # NV_PGRAPH_TEXFMT0 + fmt_color = (fmt >> 8) & 0x7F + width_shift = (fmt >> 20) & 0xF + height_shift = (fmt >> 24) & 0xF + width = 1 << width_shift + height = 1 << height_shift + + # FIXME: self.out("tex-%d.bin" % (i), xbox.read(0x80000000 | offset, pitch * height)) + + print( + "Texture %d [0x%08X, %d x %d (pitch: 0x%X), format 0x%X]" + % (i, offset, width, height, pitch, fmt_color) + ) + img = Texture.dump_texture(xbox, offset, pitch, fmt_color, width, height) + + if img: + img.save(os.path.join(OutputDir, path)) + extra_html += ['%s' % (path, path)] + + return extra_html + + def dump_surfaces(self, xbox, data, *args): + if not PixelDumping or not SurfaceDumping: + return [] + + color_pitch = xbox.read_u32(0xFD400858) + depth_pitch = xbox.read_u32(0xFD40085C) + + color_offset = xbox.read_u32(0xFD400828) + depth_offset = xbox.read_u32(0xFD40082C) + + color_base = xbox.read_u32(0xFD400840) + depth_base = xbox.read_u32(0xFD400844) + + # FIXME: Is this correct? pbkit uses _base, but D3D seems to use _offset? + color_offset += color_base + depth_offset += depth_base + + surface_clip_x = xbox.read_u32(0xFD4019B4) + surface_clip_y = xbox.read_u32(0xFD4019B8) + + draw_format = xbox.read_u32(0xFD400804) + surface_type = xbox.read_u32(0xFD400710) + swizzle_unk = xbox.read_u32(0xFD400818) + + swizzle_unk2 = xbox.read_u32(0xFD40086C) + + clip_x = (surface_clip_x >> 0) & 0xFFFF + clip_y = (surface_clip_y >> 0) & 0xFFFF + + clip_w = (surface_clip_x >> 16) & 0xFFFF + clip_h = (surface_clip_y >> 16) & 0xFFFF + + surface_anti_aliasing = (surface_type >> 4) & 3 + + clip_x, clip_y = XboxHelper.apply_anti_aliasing_factor( + surface_anti_aliasing, clip_x, clip_y + ) + clip_w, clip_h = XboxHelper.apply_anti_aliasing_factor( + surface_anti_aliasing, clip_w, clip_h + ) + + width = clip_x + clip_w + height = clip_y + clip_h + + # FIXME: 128 x 128 [pitch = 256 (0x100)], at 0x01AA8000 [PGRAPH: 0x01AA8000?], format 0x5, type: 0x21000002, swizzle: 0x7070000 [used 0] + + # FIXME: This does not seem to be a good field for this + # FIXME: Patched to give 50% of coolness + swizzled = (surface_type & 3) == 2 + # FIXME: if surface_type is 0, we probably can't even draw.. + + format_color = (draw_format >> 12) & 0xF + # FIXME: Support 3D surfaces. + format_depth = (draw_format >> 18) & 0x3 + + fmt_color = Texture.surface_color_format_to_texture_format( + format_color, swizzled + ) + # fmt_depth = Texture.surface_zeta_format_to_texture_format(format_depth) + + # Dump stuff we might care about + self._write("pgraph.bin", _dump_pgraph(xbox)) + self._write("pfb.bin", _dump_pfb(xbox)) + if color_offset != 0x00000000: + self._write( + "mem-2.bin", + xbox.read(0x80000000 | color_offset, color_pitch * height), + ) + if depth_offset != 0x00000000: + self._write( + "mem-3.bin", + xbox.read(0x80000000 | depth_offset, depth_pitch * height), + ) + self._write( + "pgraph-rdi-vp-instructions.bin", + _read_pgraph_rdi(xbox, 0x100000, 136 * 4), + ) + self._write( + "pgraph-rdi-vp-constants0.bin", + _read_pgraph_rdi(xbox, 0x170000, 192 * 4), + ) + self._write( + "pgraph-rdi-vp-constants1.bin", + _read_pgraph_rdi(xbox, 0xCC0000, 192 * 4), + ) + + # FIXME: Respect anti-aliasing + + path = "command%d--color.png" % (self.command_count) + extra_html = [] + extra_html += ['%s' % (path, path)] + extra_html += [ + "%d x %d [pitch = %d (0x%X)], at 0x%08X, format 0x%X, type: 0x%X, swizzle: 0x%08X, 0x%08X [used %d]" + % ( + width, + height, + color_pitch, + color_pitch, + color_offset, + format_color, + surface_type, + swizzle_unk, + swizzle_unk2, + swizzled, + ) + ] + print(extra_html[-1]) + + try: + if color_offset == 0x00000000: + print("Color offset is null") + raise Exception() + + print("Attempting to dump surface; swizzle: %s" % (str(swizzled))) + img = Texture.dump_texture( + xbox, color_offset, color_pitch, fmt_color, width, height + ) + except: # pylint: disable=bare-except + img = None + print("Failed to dump color surface") + traceback.print_exc() + + if img: + # FIXME: Make this configurable or save an alpha preserving variant. + # Hack to remove alpha channel + img = img.convert("RGB") + + img.save(os.path.join(OutputDir, path)) + + return extra_html + + def _hook_methods(self): + """Installs hooks for methods interpreted by this class.""" + NV097_CLEAR_SURFACE = 0x1D94 + self.hook_method(0x97, NV097_CLEAR_SURFACE, [], [self.dump_surfaces]) + + NV097_SET_BEGIN_END = 0x17FC + self.hook_method( + 0x97, NV097_SET_BEGIN_END, [self._handle_begin], [self._handle_end] + ) + + # Check for texture address changes + # for i in range(4): + # methodHooks(0x1B00 + 64 * i, [], [HandleSetTexture], i) + + # Add the list of commands which might trigger CPU actions + NV097_FLIP_STALL = 0x0130 + self.hook_method(0x97, NV097_FLIP_STALL, [], [self._handle_flip_stall]) + + NV097_BACK_END_WRITE_SEMAPHORE_RELEASE = 0x1D70 + self.hook_method( + 0x97, NV097_BACK_END_WRITE_SEMAPHORE_RELEASE, [], [self.dump_surfaces] + ) + + def _handle_begin(self, xbox, data, *args): + + # Avoid handling End + if data == 0: + return [] + + print("BEGIN %d" % self.command_count) + + extra_html = [] + # extra_html += self.DumpSurfaces(xbox, data, *args) + # extra_html += self.DumpTextures(xbox, data, *args) + return extra_html + + def _handle_end(self, xbox, data, *args): + + # Avoid handling Begin + if data != 0: + return [] + + extra_html = [] + extra_html += self.dump_surfaces(xbox, data, *args) + return extra_html + + def _begin_pgraph_recording(self, xbox, data, *args): + self.pgraph_dump = _dump_pgraph(xbox) + self.html_log.log(["", "", "", "", "Dumped PGRAPH for later"]) + return [] + + def _end_pgraph_recording(self, xbox, data, *args): + # Debug feature to understand PGRAPH + if self.pgraph_dump is not None: + new_pgraph_dump = _dump_pgraph(xbox) + + # This blacklist was created from a CLEAR_COLOR, CLEAR + blacklist = [ + 0x0040000C, # 0xF3DF0479 → 0xF3DE04F9 + 0x0040002C, # 0xF3DF37FF → 0xF3DE37FF + 0x0040010C, # 0x03DF0000 → 0x020000F1 + 0x0040012C, # 0x13DF379F → 0x131A37FF + 0x00400704, # 0x00001D9C → 0x00001D94 + 0x00400708, # 0x01DF0000 → 0x000000F1 + 0x0040070C, # 0x01DF0000 → 0x000000F1 + 0x0040072C, # 0x01DF2700 → 0x000027F1 + 0x00400740, # 0x01DF37DD → 0x01DF37FF + 0x00400744, # 0x18111D9C → 0x18111D94 + 0x00400748, # 0x01DF0011 → 0x000000F1 + 0x0040074C, # 0x01DF0097 → 0x000000F7 + 0x00400750, # 0x00DF005C → 0x00DF0064 + 0x00400760, # 0x000000CC → 0x000000FF + 0x00400764, # 0x08001D9C → 0x08001D94 + 0x00400768, # 0x01DF0000 → 0x000000F1 + 0x0040076C, # 0x01DF0000 → 0x000000F1 + 0x00400788, # 0x01DF110A → 0x000011FB + 0x004007A0, # 0x00200100 → 0x00201D70 + 0x004007A4, # 0x00200100 → 0x00201D70 + 0x004007A8, # 0x00200100 → 0x00201D70 + 0x004007AC, # 0x00200100 → 0x00201D70 + 0x004007B0, # 0x00200100 → 0x00201D70 + 0x004007B4, # 0x00200100 → 0x00201D70 + 0x004007B8, # 0x00200100 → 0x00201D70 + 0x004007BC, # 0x00200100 → 0x00201D70 + 0x004007C0, # 0x00000000 → 0x000006C9 + 0x004007C4, # 0x00000000 → 0x000006C9 + 0x004007C8, # 0x00000000 → 0x000006C9 + 0x004007CC, # 0x00000000 → 0x000006C9 + 0x004007D0, # 0x00000000 → 0x000006C9 + 0x004007D4, # 0x00000000 → 0x000006C9 + 0x004007D8, # 0x00000000 → 0x000006C9 + 0x004007DC, # 0x00000000 → 0x000006C9 + 0x004007E0, # 0x00000000 → 0x000006C9 + 0x004007E4, # 0x00000000 → 0x000006C9 + 0x004007E8, # 0x00000000 → 0x000006C9 + 0x004007EC, # 0x00000000 → 0x000006C9 + 0x004007F0, # 0x00000000 → 0x000006C9 + 0x004007F4, # 0x00000000 → 0x000006C9 + 0x004007F8, # 0x00000000 → 0x000006C9 + 0x004007FC, # 0x00000000 → 0x000006C9 + 0x00400D6C, # 0x00000000 → 0xFF000000 + 0x0040110C, # 0x03DF0000 → 0x020000F1 + 0x0040112C, # 0x13DF379F → 0x131A37FF + 0x00401704, # 0x00001D9C → 0x00001D94 + 0x00401708, # 0x01DF0000 → 0x000000F1 + 0x0040170C, # 0x01DF0000 → 0x000000F1 + 0x0040172C, # 0x01DF2700 → 0x000027F1 + 0x00401740, # 0x01DF37FD → 0x01DF37FF + 0x00401744, # 0x18111D9C → 0x18111D94 + 0x00401748, # 0x01DF0011 → 0x000000F1 + 0x0040174C, # 0x01DF0097 → 0x000000F7 + 0x00401750, # 0x00DF0064 → 0x00DF006C + 0x00401760, # 0x000000CC → 0x000000FF + 0x00401764, # 0x08001D9C → 0x08001D94 + 0x00401768, # 0x01DF0000 → 0x000000F1 + 0x0040176C, # 0x01DF0000 → 0x000000F1 + 0x00401788, # 0x01DF110A → 0x000011FB + 0x004017A0, # 0x00200100 → 0x00201D70 + 0x004017A4, # 0x00200100 → 0x00201D70 + 0x004017A8, # 0x00200100 → 0x00201D70 + 0x004017AC, # 0x00200100 → 0x00201D70 + 0x004017B0, # 0x00200100 → 0x00201D70 + 0x004017B4, # 0x00200100 → 0x00201D70 + 0x004017B8, # 0x00200100 → 0x00201D70 + 0x004017BC, # 0x00200100 → 0x00201D70 + 0x004017C0, # 0x00000000 → 0x000006C9 + 0x004017C4, # 0x00000000 → 0x000006C9 + 0x004017C8, # 0x00000000 → 0x000006C9 + 0x004017CC, # 0x00000000 → 0x000006C9 + 0x004017D0, # 0x00000000 → 0x000006C9 + 0x004017D4, # 0x00000000 → 0x000006C9 + 0x004017D8, # 0x00000000 → 0x000006C9 + 0x004017DC, # 0x00000000 → 0x000006C9 + 0x004017E0, # 0x00000000 → 0x000006C9 + 0x004017E4, # 0x00000000 → 0x000006C9 + 0x004017E8, # 0x00000000 → 0x000006C9 + 0x004017EC, # 0x00000000 → 0x000006C9 + 0x004017F0, # 0x00000000 → 0x000006C9 + 0x004017F4, # 0x00000000 → 0x000006C9 + 0x004017F8, # 0x00000000 → 0x000006C9 + 0x004017FC, # 0x00000000 → 0x000006C9 + # 0x0040186C, # 0x00000000 → 0xFF000000 # CLEAR COLOR + 0x0040196C, # 0x00000000 → 0xFF000000 + 0x00401C6C, # 0x00000000 → 0xFF000000 + 0x00401D6C, # 0x00000000 → 0xFF000000 + ] + + for i in range(len(self.pgraph_dump) // 4): + off = 0x00400000 + i * 4 + if off in blacklist: + continue + word = struct.unpack_from("= MaxFrames: + raise MaxFlipExceeded() + return [] + + def _filter_pgraph_method(self, xbox, method): + # Do callback for pre-method + if method in self.method_callbacks: + return self.method_callbacks[method] + return [], [] + + def _record_pgraph_method(self, xbox, method_info, data, pre_info, post_info): + if data is not None: + dataf = struct.unpack("", + ] + + pre_info + + post_info + ) + + def _write_put(self, xbox, target): + + prev_target = self.target_dma_put_addr + prev_real = self.real_dma_put_addr + + real = _exchange_u32(xbox, XboxHelper.DMA_PUT_ADDR, target) + self.target_dma_put_addr = target + + # It must point where we pointed previously, otherwise something is broken + if real != prev_target: + self.html_log.print_log( + "New real PUT (0x%08X -> 0x%08X) while changing hook 0x%08X -> 0x%08X" + % (prev_real, real, prev_target, target) + ) + s1 = xbox.read_u32(XboxHelper.PUT_STATE) + if s1 & 1: + print("PUT was modified and pusher was already active!") + time.sleep(60.0) + self.real_dma_put_addr = real + # traceback.print_stack() + + def _parse_push_buffer_command(self, xbox, get_addr): + global DebugPrint + + # Retrieve command type from Xbox + word = xbox.read_u32(0x80000000 | get_addr) + self.html_log.log(["", "", "", "@0x%08X: DATA: 0x%08X" % (get_addr, word)]) + + # FIXME: Get where this command ends + next_parser_addr = XboxHelper.parse_command(get_addr, word, DebugPrint) + + # If we don't know where this command ends, we have to abort. + if next_parser_addr == 0: + return None, 0 + + # Check which method it is. + if ((word & 0xE0030003) == 0) or ((word & 0xE0030003) == 0x40000000): + # methods + method = word & 0x1FFF + subchannel = (word >> 13) & 7 + method_count = (word >> 18) & 0x7FF + method_nonincreasing = word & 0x40000000 + + method_info = {} + method_info["address"] = get_addr + method_info["method"] = method + method_info["nonincreasing"] = method_nonincreasing + method_info["subchannel"] = subchannel + + # Download this command from Xbox + if method_count == 0: + # Halo: CE has cases where method_count is 0?! + self.html_log.print_log( + "Warning: Command 0x%X with method_count == 0\n" % method + ) + data = [] + else: + command = xbox.read(0x80000000 | (get_addr + 4), method_count * 4) + + # FIXME: Unpack all of them? + data = struct.unpack("<%dL" % method_count, command) + assert len(data) == method_count + method_info["data"] = data + else: + method_info = None + + return method_info, next_parser_addr + + def _get_method_hooks(self, xbox, method_info): + + pre_callbacks = [] + post_callbacks = [] + + method = method_info["method"] + for data in method_info["data"]: + pre_callbacks_this, post_callbacks_this = self._filter_pgraph_method( + xbox, method + ) + + # Queue the callbacks + pre_callbacks += pre_callbacks_this + post_callbacks += post_callbacks_this + + if not method_info["nonincreasing"]: + method += 4 + + return pre_callbacks, post_callbacks + + def _record_push_buffer_command( + self, xbox, address, method_info, pre_info, post_info + ): + orig_method = method_info["method"] + + self.html_log.log(["%d" % self.command_count, "%s" % method_info]) + # Handle special case from Halo: CE where there are commands with no data. + if not method_info["data"]: + self._record_pgraph_method(xbox, method_info, None, pre_info, post_info) + else: + for data in method_info["data"]: + self._record_pgraph_method(xbox, method_info, data, pre_info, post_info) + if not method_info["nonincreasing"]: + method_info["method"] += 4 + + method_info["method"] = orig_method + self.command_count += 1 + + def process_push_buffer_command(self, xbox, xbox_helper, parser_addr): + self.html_log.log( + [ + "WARNING", + "Starting FIFO parsing from 0x%08X -- 0x%08X" + % (parser_addr, self.real_dma_put_addr), + ] + ) + + if parser_addr == self.real_dma_put_addr: + unprocessed_bytes = 0 + else: + + # Filter commands and check where it wants to go to + method_info, post_addr = self._parse_push_buffer_command(xbox, parser_addr) + + # We have a problem if we can't tell where to go next + assert post_addr != 0 + + # If we have a method, work with it + if method_info is None: + + self.html_log.log(["WARNING", "No method. Going to 0x%08X" % post_addr]) + unprocessed_bytes = 4 + + else: + + # Check what method this is + pre_callbacks, post_callbacks = self._get_method_hooks( + xbox, method_info + ) + + # Count number of bytes in instruction + unprocessed_bytes = 4 * (1 + len(method_info["data"])) + + # Go where we can do pre-callback + pre_info = [] + if len(pre_callbacks) > 0: + + # Go where we want to go + self.run_fifo(xbox, xbox_helper, parser_addr) + + # Do the pre callbacks before running the command + # FIXME: assert we are where we wanted to be + for callback in pre_callbacks: + pre_info += callback(xbox, method_info["data"][0]) + + # Go where we can do post-callback + post_info = [] + if len(post_callbacks) > 0: + + # If we reached target, we can't step again without leaving valid buffer + assert parser_addr != self.real_dma_put_addr + + # Go where we want to go (equivalent to step) + self.run_fifo(xbox, xbox_helper, post_addr) - _addHTML(["WARNING", "Sucessfully finished FIFO parsing 0x%08X -- 0x%08X (%d bytes unprocessed)" % (parser_addr, self.real_dma_put_addr, unprocessed_bytes)]) + # We have processed all bytes now + unprocessed_bytes = 0 - return parser_addr, unprocessed_bytes + # Do all post callbacks + for callback in post_callbacks: + post_info += callback(xbox, method_info["data"][0]) - def recordedFlipStallCount(self): - return self.flipStallCount + # Add the pushbuffer command to log + self._record_push_buffer_command( + xbox, parser_addr, method_info, pre_info, post_info + ) + + # Move parser to the next instruction + parser_addr = post_addr - def recordedPushBufferCommandCount(self): - return self.commandCount + self.html_log.log( + [ + "WARNING", + "Sucessfully finished FIFO parsing 0x%08X -- 0x%08X (%d bytes unprocessed)" + % (parser_addr, self.real_dma_put_addr, unprocessed_bytes), + ] + ) - def methodHooks(self, obj, method, pre_hooks, post_hooks, user = None): - print("Registering method hook for 0x%04X" % method) - self.method_callbacks[method] = (pre_hooks, post_hooks) - return + return parser_addr, unprocessed_bytes + + def _write(self, suffix, contents): + out_path = os.path.join(OutputDir, "command%d_" % self.command_count) + suffix + with open(out_path, "wb") as dumpfile: + dumpfile.write(contents) diff --git a/XboxHelper.py b/XboxHelper.py new file mode 100644 index 0000000..9ff6a6b --- /dev/null +++ b/XboxHelper.py @@ -0,0 +1,229 @@ +"""Various helper methods""" + +# pylint: disable=missing-function-docstring +# pylint: disable=consider-using-f-string +# pylint: disable=chained-comparison + +import atexit + +DMA_STATE = 0xFD003228 +DMA_PUT_ADDR = 0xFD003240 +DMA_GET_ADDR = 0xFD003244 +DMA_SUBROUTINE = 0xFD00324C + +PUT_ADDR = 0xFD003210 +PUT_STATE = 0xFD003220 +GET_ADDR = 0xFD003270 +GET_STATE = 0xFD003250 + +PGRAPH_STATE = 0xFD400720 +PGRAPH_STATUS = 0xFD400700 + + +def load_binary(xbox, data): + """Loads arbitrary data into a new contiguous memory block on the xbox.""" + code_addr = xbox.ke.MmAllocateContiguousMemory(len(data)) + print("load_binary: Allocated 0x%08X" % code_addr) + + def free_allocation(): + print("load_binary: Free'ing 0x%08X" % code_addr) + xbox.ke.MmFreeContiguousMemory(code_addr) + + atexit.register(free_allocation) + xbox.write(code_addr, data) + return code_addr + + +def parse_command(addr, word, display=False): + + prefix = "0x%08X: Opcode: 0x%08X" % (addr, word) + + if (word & 0xE0000003) == 0x20000000: + # state->get_jmp_shadow = control->dma_get; + # NV2A_DPRINTF("pb OLD_JMP 0x%" HWADDR_PRIx "\n", control->dma_get); + addr = word & 0x1FFFFFFC + print(prefix + "; old jump 0x%08X" % addr) + return addr + + if (word & 3) == 1: + addr = word & 0xFFFFFFFC + print(prefix + "; jump 0x%08X" % addr) + # state->get_jmp_shadow = control->dma_get; + return addr + + if (word & 3) == 2: + print(prefix + "; unhandled opcode type: call") + # if (state->subroutine_active) { + # state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL; + # break; + # } + # state->subroutine_return = control->dma_get; + # state->subroutine_active = true; + # control->dma_get = word & 0xfffffffc; + return 0 + + if word == 0x00020000: + # return + print(prefix + "; unhandled opcode type: return") + return 0 + + if not (word & 0xE0030003) or (word & 0xE0030003) == 0x40000000: + # methods + method = word & 0x1FFF + # subchannel = (word >> 13) & 7 + method_count = (word >> 18) & 0x7FF + # method_nonincreasing = word & 0x40000000 + # state->dcount = 0; + if display: + print(prefix + "; Method: 0x%04X (%d times)" % (method, method_count)) + addr += 4 + method_count * 4 + return addr + + print(prefix + "; unknown opcode type") + return addr + + +class XboxHelper: + """Provides various functions for interaction with XBOX""" + + def __init__(self, xbox): + self.xbox = xbox + + def delay(self): + # FIXME: if this returns `True`, the functions below should have their own + # loops which check for command completion + # time.sleep(0.01) + return False + + def disable_pgraph_fifo(self): + state_s1 = self.xbox.read_u32(PGRAPH_STATE) + self.xbox.write_u32(PGRAPH_STATE, state_s1 & 0xFFFFFFFE) + + def wait_until_pgraph_idle(self): + while self.xbox.read_u32(PGRAPH_STATUS) & 0x00000001: + pass + + def enable_pgraph_fifo(self): + state_s1 = self.xbox.read_u32(PGRAPH_STATE) + self.xbox.write_u32(PGRAPH_STATE, state_s1 | 0x00000001) + if self.delay(): + pass + + def wait_until_pusher_idle(self): + while self.xbox.read_u32(GET_STATE) & (1 << 4): + pass + + def pause_fifo_puller(self): + # Idle the puller and pusher + state_s1 = self.xbox.read_u32(GET_STATE) + self.xbox.write_u32(GET_STATE, state_s1 & 0xFFFFFFFE) + if self.delay(): + pass + # print("Puller State was 0x" + format(state_s1, '08X')) + + def pause_fifo_pusher(self): + state_s1 = self.xbox.read_u32(PUT_STATE) + self.xbox.write_u32(PUT_STATE, state_s1 & 0xFFFFFFFE) + if self.delay(): + pass + if False: + state_s1 = self.xbox.read_u32(0xFD003200) + self.xbox.write_u32(0xFD003200, state_s1 & 0xFFFFFFFE) + if self.delay(): + pass + # print("Pusher State was 0x" + format(state_s1, '08X')) + + def resume_fifo_puller(self): + # Resume puller and pusher + state_s2 = self.xbox.read_u32(GET_STATE) + self.xbox.write_u32( + GET_STATE, (state_s2 & 0xFFFFFFFE) | 1 + ) # Recover puller state + if self.delay(): + pass + + def resume_fifo_pusher(self): + if False: + state_s2 = self.xbox.read_u32(0xFD003200) + self.xbox.write_u32(0xFD003200, state_s2 & 0xFFFFFFFE | 1) + if self.delay(): + pass + state_s2 = self.xbox.read_u32(PUT_STATE) + self.xbox.write_u32( + PUT_STATE, (state_s2 & 0xFFFFFFFE) | 1 + ) # Recover pusher state + if self.delay(): + pass + + def _dump_pb(self, start, end): + offset = start + while offset != end: + word = self.xbox.read_u32(0x80000000 | offset) + offset = parse_command(offset, word, True) + if offset == 0: + break + + # FIXME: This works poorly if the method count is not 0 + def print_pb_state(self): + v_dma_get_addr = self.xbox.read_u32(DMA_GET_ADDR) + v_dma_put_addr = self.xbox.read_u32(DMA_PUT_ADDR) + v_dma_subroutine = self.xbox.read_u32(DMA_SUBROUTINE) + + print( + "PB-State: 0x%08X / 0x%08X / 0x%08X" + % (v_dma_get_addr, v_dma_put_addr, v_dma_subroutine) + ) + self._dump_pb(v_dma_get_addr, v_dma_put_addr) + print() + + def print_cache_state(self): + v_get_addr = self.xbox.read_u32(GET_ADDR) + v_put_addr = self.xbox.read_u32(PUT_ADDR) + + v_get_state = self.xbox.read_u32(GET_STATE) + v_put_state = self.xbox.read_u32(PUT_STATE) + + print("CACHE-State: 0x%X / 0x%X" % (v_get_addr, v_put_addr)) + + print("Put / Pusher enabled: %s" % ("Yes" if (v_put_state & 1) else "No")) + print("Get / Puller enabled: %s" % ("Yes" if (v_get_state & 1) else "No")) + + print("Cache:") + for i in range(128): + + cache1_method = self.xbox.read_u32(0xFD003800 + i * 8) + cache1_data = self.xbox.read_u32(0xFD003804 + i * 8) + + output = " [0x%02X] 0x%04X (0x%08X)" % (i, cache1_method, cache1_data) + v_get_offset = i * 8 - v_get_addr + if v_get_offset >= 0 and v_get_offset < 8: + output += " < get[%d]" % v_get_offset + v_put_offset = i * 8 - v_put_addr + if v_put_offset >= 0 and v_put_offset < 8: + output += " < put[%d]" % v_put_offset + + print(output) + print() + + def print_dma_state(self): + v_dma_state = self.xbox.read_u32(DMA_STATE) + v_dma_method = v_dma_state & 0x1FFC + # v_dma_subchannel = (v_dma_state >> 13) & 7 + v_dma_method_count = (v_dma_state >> 18) & 0x7FF + # v_dma_method_nonincreasing = v_dma_state & 1 + # higher bits are for error signalling? + + print("v_dma_method: 0x%04X (count: %d)" % (v_dma_method, v_dma_method_count)) + + +def apply_anti_aliasing_factor(surface_anti_aliasing, x, y): + if surface_anti_aliasing == 0: + return x, y + + if surface_anti_aliasing == 1: + return x * 2, y + + if surface_anti_aliasing == 2: + return x * 2, y * 2 + + assert False diff --git a/githooks/pre-commit b/githooks/pre-commit new file mode 100755 index 0000000..75eb042 --- /dev/null +++ b/githooks/pre-commit @@ -0,0 +1,65 @@ +#!/bin/bash +# +# To enable this hook, rename this file to "pre-commit" and copy into the +# ../.git/hooks directory. + + +function check_no_diffmarkers_or_whitespace_errors { + # If there are whitespace errors, print the offending file names and fail. + set -e + git diff-index --check --cached "${1}" -- + set +e +} + + +function run_black { + # Run black against any changed python files. + if ! which black > /dev/null; then + cat <<\EOF +Warning: black is not installed or is not in the PATH. + +Please install and amend this commit. + +Try: + pip3 install black +EOF + return + fi + + # Reformat the files in-place and re-add any that were changed. + # + # Note that this has the side effect of incorporating changes to staged files + # that were not themselves staged. E.g., if you edit a file, `git add`, then + # edit some more, then commit, all of the changes will be committed, not just + # the staged ones. Depending on typical workflows it might be better to do + # something more complicated here, or to just have the hook fail instead of + # perform an in-place fix. + files_to_format="$(echo "${changed_python_filenames}" | grep -v '3rdparty')" + if [[ -n "${files_to_format}" ]]; then + echo "${files_to_format}" | xargs black + echo "${files_to_format}" | xargs git add + fi +} + + +if git rev-parse --verify HEAD >/dev/null 2>&1; then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=$(git hash-object -t tree /dev/null) +fi + +# Redirect output to stderr. +exec 1>&2 + + +added_and_modified_filenames="$(git diff --cached --name-only --diff-filter=d)" +changed_python_filenames="$(echo "${added_and_modified_filenames}" | \ + grep -E '.*\.py$')" + + +# Allow blank line at EOF. +git config --local core.whitespace -blank-at-eof + +run_black +check_no_diffmarkers_or_whitespace_errors "${against}" diff --git a/helper.py b/helper.py deleted file mode 100644 index dcce592..0000000 --- a/helper.py +++ /dev/null @@ -1,189 +0,0 @@ -dma_state = 0xFD003228 -dma_put_addr = 0xFD003240 -dma_get_addr = 0xFD003244 -dma_subroutine = 0xFD00324C - -put_addr = 0xFD003210 -put_state = 0xFD003220 -get_addr = 0xFD003270 -get_state = 0xFD003250 - -pgraph_state = 0xFD400720 -pgraph_status = 0xFD400700 - - -def parseCommand(addr, word, display=False): - - s = "0x%08X: Opcode: 0x%08X" % (addr, word) - - if ((word & 0xe0000003) == 0x20000000): - #state->get_jmp_shadow = control->dma_get; - #NV2A_DPRINTF("pb OLD_JMP 0x%" HWADDR_PRIx "\n", control->dma_get); - addr = word & 0x1ffffffc - print(s + "; old jump 0x%08X" % addr) - elif ((word & 3) == 1): - addr = word & 0xfffffffc - print(s + "; jump 0x%08X" % addr) - #state->get_jmp_shadow = control->dma_get; - elif ((word & 3) == 2): - print(s + "; unhandled opcode type: call") - #if (state->subroutine_active) { - # state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL; - # break; - #} - #state->subroutine_return = control->dma_get; - #state->subroutine_active = true; - #control->dma_get = word & 0xfffffffc; - addr = 0 - elif (word == 0x00020000): - # return - print(s + "; unhandled opcode type: return") - addr = 0 - elif ((word & 0xe0030003) == 0) or ((word & 0xe0030003) == 0x40000000): - # methods - method = word & 0x1fff; - subchannel = (word >> 13) & 7; - method_count = (word >> 18) & 0x7ff; - method_nonincreasing = word & 0x40000000; - #state->dcount = 0; - if display: - print(s + "; Method: 0x%04X (%d times)" % (method, method_count)) - addr += 4 + method_count * 4 - - else: - print(s + "; unknown opcode type") - - return addr - - - -class XboxHelper(): - - def __init__(self, xbox): - self.xbox = xbox - - def delay(self): - #FIXME: if this returns `True`, the functions below should have their own - # loops which check for command completion - #time.sleep(0.01) - return False - - def disable_pgraph_fifo(self): - s1 = self.xbox.read_u32(pgraph_state) - self.xbox.write_u32(pgraph_state, s1 & 0xFFFFFFFE) - - def wait_until_pgraph_idle(self): - while(self.xbox.read_u32(pgraph_status) & 0x00000001): - pass - - def enable_pgraph_fifo(self): - s1 = self.xbox.read_u32(pgraph_state) - self.xbox.write_u32(pgraph_state, s1 | 0x00000001) - if self.delay(): pass - - def wait_until_pusher_idle(self): - while(self.xbox.read_u32(get_state) & (1 << 4)): - pass - - def pause_fifo_puller(self): - # Idle the puller and pusher - s1 = self.xbox.read_u32(get_state) - self.xbox.write_u32(get_state, s1 & 0xFFFFFFFE) - if self.delay(): pass - #print("Puller State was 0x" + format(s1, '08X')) - - def pause_fifo_pusher(self): - s1 = self.xbox.read_u32(put_state) - self.xbox.write_u32(put_state, s1 & 0xFFFFFFFE) - if self.delay(): pass - if False: - s1 = self.xbox.read_u32(0xFD003200) - self.xbox.write_u32(0xFD003200, s1 & 0xFFFFFFFE) - if self.delay(): pass - #print("Pusher State was 0x" + format(s1, '08X')) - - def resume_fifo_puller(self): - # Resume puller and pusher - s2 = self.xbox.read_u32(get_state) - self.xbox.write_u32(get_state, (s2 & 0xFFFFFFFE) | 1) # Recover puller state - if self.delay(): pass - - def resume_fifo_pusher(self): - if False: - s2 = self.xbox.read_u32(0xFD003200) - self.xbox.write_u32(0xFD003200, s2 & 0xFFFFFFFE | 1) - if self.delay(): pass - s2 = self.xbox.read_u32(put_state) - self.xbox.write_u32(put_state, (s2 & 0xFFFFFFFE) | 1) # Recover pusher state - if self.delay(): pass - - def dumpPB(self, start, end): - offset = start - while(offset != end): - word = self.xbox.read_u32(0x80000000 | offset) - offset = parseCommand(offset, word, True) - if offset == 0: - break - - #FIXME: This works poorly if the method count is not 0 - def dumpPBState(self): - v_dma_get_addr = self.xbox.read_u32(dma_get_addr) - v_dma_put_addr = self.xbox.read_u32(dma_put_addr) - v_dma_subroutine = self.xbox.read_u32(dma_subroutine) - - print("PB-State: 0x%08X / 0x%08X / 0x%08X" % (v_dma_get_addr, v_dma_put_addr, v_dma_subroutine)) - self.dumpPB(v_dma_get_addr, v_dma_put_addr) - print() - - def dumpCacheState(self): - v_get_addr = self.xbox.read_u32(get_addr) - v_put_addr = self.xbox.read_u32(put_addr) - - v_get_state = self.xbox.read_u32(get_state) - v_put_state = self.xbox.read_u32(put_state) - - print("CACHE-State: 0x%X / 0x%X" % (v_get_addr, v_put_addr)) - - print("Put / Pusher enabled: %s" % ("Yes" if (v_put_state & 1) else "No")) - print("Get / Puller enabled: %s" % ("Yes" if (v_get_state & 1) else "No")) - - print("Cache:") - for i in range(128): - - cache1_method = self.xbox.read_u32(0xFD003800 + i * 8) - cache1_data = self.xbox.read_u32(0xFD003804 + i * 8) - - s = " [0x%02X] 0x%04X (0x%08X)" % (i, cache1_method, cache1_data) - v_get_offset = i * 8 - v_get_addr - if v_get_offset >= 0 and v_get_offset < 8: - s += " < get[%d]" % v_get_offset - v_put_offset = i * 8 - v_put_addr - if v_put_offset >= 0 and v_put_offset < 8: - s += " < put[%d]" % v_put_offset - - print(s) - print() - - return - - def printDMAstate(self): - - v_dma_state = self.xbox.read_u32(dma_state) - v_dma_method = v_dma_state & 0x1FFC - v_dma_subchannel = (v_dma_state >> 13) & 7 - v_dma_method_count = (v_dma_state >> 18) & 0x7ff - v_dma_method_nonincreasing = v_dma_state & 1 - # higher bits are for error signalling? - - print("v_dma_method: 0x%04X (count: %d)" % (v_dma_method, v_dma_method_count)) - -def apply_anti_aliasing_factor(surface_anti_aliasing, x, y): - if surface_anti_aliasing == 0: - return x, y - elif surface_anti_aliasing == 1: - return x*2, y - elif surface_anti_aliasing == 2: - return x*2, y*2 - else: - assert(False) - return None diff --git a/nv2a-trace.py b/nv2a-trace.py index 2278e61..6f4d903 100755 --- a/nv2a-trace.py +++ b/nv2a-trace.py @@ -1,279 +1,310 @@ #!/usr/bin/env python3 -from xboxpy import * -from helper import * +"""Tool to capture nv2a activity from an xbox.""" + +# pylint: disable=missing-function-docstring +# pylint: disable=consider-using-f-string +# pylint: disable=too-few-public-methods +# pylint: disable=too-many-locals + import argparse import os import signal import sys -import struct import time import traceback -from helper import * +import xboxpy +import XboxHelper import Trace - -abortNow = False +# pylint: disable=invalid-name +abort_now = False _enable_experimental_disable_z_compression_and_tiling = True +# pylint: enable=invalid-name -def signal_handler(signal, frame): - global abortNow - if abortNow == False: - print('Got first SIGINT! Aborting..') - abortNow = True - else: - print('Got second SIGINT! Forcing exit') - sys.exit(0) +def signal_handler(_signal, _frame): + global abort_now + if not abort_now: + print("Got first SIGINT! Aborting..") + abort_now = True + else: + print("Got second SIGINT! Forcing exit") + sys.exit(0) + signal.signal(signal.SIGINT, signal_handler) -# Hack to pretend we have a better API in xboxpy class Xbox: - def __init__(self): - self.read_u32 = read_u32 - self.write_u32 = write_u32 - self.read = read - self.write = write - self.call = api.call - self.ke = ke -xbox = Xbox() + """Trivial wrapper around xboxpy""" -xbox_helper = XboxHelper(xbox) + def __init__(self): + self.read_u32 = xboxpy.read_u32 + self.write_u32 = xboxpy.write_u32 + self.read = xboxpy.read + self.write = xboxpy.write + self.call = xboxpy.api.call + self.ke = xboxpy.ke -def main(args): - # Create output folder - try: - os.mkdir(args.out) - Trace.OutputDir = args.out - except FileExistsError: - pass - - if args.no_surface: - Trace.SurfaceDumping = False +def _wait_for_stable_push_buffer_state(xbox, xbox_helper): + """Blocks until the push buffer reaches a stable state.""" - if args.no_texture: - Trace.TextureDumping = False + v_dma_get_addr = 0 + v_dma_put_addr_real = 0 - if args.no_pixel: - Trace.PixelDumping = False + while not abort_now: + # Stop consuming CACHE entries. + xbox_helper.disable_pgraph_fifo() + xbox_helper.wait_until_pgraph_idle() - Trace.MaxFrames = args.max_flip - - global abortNow + # Kick the pusher, so that it fills the CACHE. + xbox_helper.resume_fifo_pusher() + xbox_helper.pause_fifo_pusher() - print("\n\nSearching stable PB state\n\n") - - while True: + # Now drain the CACHE. + xbox_helper.enable_pgraph_fifo() - # Stop consuming CACHE entries. - xbox_helper.disable_pgraph_fifo() - xbox_helper.wait_until_pgraph_idle() + # Check out where the PB currently is and where it was supposed to go. + v_dma_put_addr_real = xbox.read_u32(XboxHelper.DMA_PUT_ADDR) + v_dma_get_addr = xbox.read_u32(XboxHelper.DMA_GET_ADDR) - # Kick the pusher, so that it fills the cache CACHE. - xbox_helper.resume_fifo_pusher() - xbox_helper.pause_fifo_pusher() + # Check if we have any methods left to run and skip those. + v_dma_state = xbox.read_u32(XboxHelper.DMA_STATE) + v_dma_method_count = (v_dma_state >> 18) & 0x7FF + v_dma_get_addr += v_dma_method_count * 4 - # Now drain the CACHE. - xbox_helper.enable_pgraph_fifo() + # Hide all commands from the PB by setting PUT = GET. + v_dma_put_addr_target = v_dma_get_addr + xbox.write_u32(XboxHelper.DMA_PUT_ADDR, v_dma_put_addr_target) - # Check out where the PB currently is and where it was supposed to go. - v_dma_put_addr_real = xbox.read_u32(dma_put_addr) - v_dma_get_addr = xbox.read_u32(dma_get_addr) + # Resume pusher - The PB can't run yet, as it has no commands to process. + xbox_helper.resume_fifo_pusher() - # Check if we have any methods left to run and skip those. - v_dma_state = xbox.read_u32(dma_state) - v_dma_method_count = (v_dma_state >> 18) & 0x7ff - v_dma_get_addr += v_dma_method_count * 4 + # We might get issues where the pusher missed our PUT (miscalculated). + # This can happen as `v_dma_method_count` is not the most accurate. + # Probably because the DMA is halfway through a transfer. + # So we pause the pusher again to validate our state + xbox_helper.pause_fifo_pusher() - # Hide all commands from the PB by setting PUT = GET. - v_dma_put_addr_target = v_dma_get_addr - xbox.write_u32(dma_put_addr, v_dma_put_addr_target) + time.sleep(1.0) - # Resume pusher - The PB can't run yet, as it has no commands to process. - xbox_helper.resume_fifo_pusher() - - # We might get issues where the pusher missed our PUT (miscalculated). - # This can happen as `v_dma_method_count` is not the most accurate. - # Probably because the DMA is halfway through a transfer. - # So we pause the pusher again to validate our state - xbox_helper.pause_fifo_pusher() - - time.sleep(1.0) + v_dma_put_addr_target_check = xbox.read_u32(XboxHelper.DMA_PUT_ADDR) + v_dma_get_addr_check = xbox.read_u32(XboxHelper.DMA_GET_ADDR) - v_dma_put_addr_target_check = xbox.read_u32(dma_put_addr) - v_dma_get_addr_check = xbox.read_u32(dma_get_addr) + # We want the PB to be paused + if v_dma_get_addr_check != v_dma_put_addr_target_check: + print( + "Oops GET (0x%08X) did not reach PUT (0x%08X)!" + % (v_dma_get_addr_check, v_dma_put_addr_target_check) + ) + continue - # We want the PB to be paused - if v_dma_get_addr_check != v_dma_put_addr_target_check: - print("Oops GET (0x%08X) did not reach PUT (0x%08X)!" % (v_dma_get_addr_check, v_dma_put_addr_target_check)) - continue + # Ensure that we are at the correct offset + if v_dma_put_addr_target_check != v_dma_put_addr_target: + print( + "Oops PUT was modified; got 0x%08X but expected 0x%08X!" + % (v_dma_put_addr_target_check, v_dma_put_addr_target) + ) + continue - # Ensure that we are at the correct offset - if v_dma_put_addr_target_check != v_dma_put_addr_target: - print("Oops PUT was modified; got 0x%08X but expected 0x%08X!" % (v_dma_put_addr_target_check, v_dma_put_addr_target)) - continue + break - break - - print("\n\nStepping through PB\n\n") + return v_dma_get_addr, v_dma_put_addr_real - # Start measuring time - begin_time = time.monotonic() - bytes_queued = 0 +def _run(xbox, xbox_helper, v_dma_get_addr, trace): + """Traces the push buffer until aborted.""" + global abort_now + bytes_queued = 0 - def ExperimentalDisableZCompressionAndTiling(): + while not abort_now: + try: + v_dma_get_addr, unprocessed_bytes = trace.process_push_buffer_command( + xbox, xbox_helper, v_dma_get_addr + ) + bytes_queued += unprocessed_bytes + + # time.sleep(0.5) + + # Avoid queuing up too many bytes: while the buffer is being processed, + # D3D might fixup the buffer if GET is still too far away. + if v_dma_get_addr == trace.real_dma_put_addr or bytes_queued >= 200: + print("Flushing buffer until (0x%08X)" % v_dma_get_addr) + trace.run_fifo(xbox, xbox_helper, v_dma_get_addr) + bytes_queued = 0 + + if v_dma_get_addr == trace.real_dma_put_addr: + print("Reached end of buffer?!") + # break + + # Verify we are where we think we are + if bytes_queued == 0: + v_dma_get_addr_real = xbox.read_u32(XboxHelper.DMA_GET_ADDR) + print( + "Verifying hw (0x%08X) is at parser (0x%08X)" + % (v_dma_get_addr_real, v_dma_get_addr) + ) + try: + assert v_dma_get_addr_real == v_dma_get_addr + except: + xbox_helper.print_pb_state() + raise + + except Trace.MaxFlipExceeded: + print("Max flip count reached") + abort_now = True + except: # pylint: disable=bare-except + traceback.print_exc() + abort_now = True + + +def experimental_disable_z_compression_and_tiling(xbox): # Disable Z-buffer compression and Tiling # FIXME: This is a dirty dirty hack which breaks PFB and PGRAPH state! NV10_PGRAPH_RDI_INDEX = 0xFD400750 NV10_PGRAPH_RDI_DATA = 0xFD400754 for i in range(8): - # This is from a discussion on nouveau IRC: - # mwk: the RDI copy is for texturing - # mwk: the mmio PGRAPH copy is for drawing to the framebuffer - - # Disabling Z-Compression seems to work fine - if True: - zcomp = xbox.read_u32(0xFD100300 + 4 * i) - zcomp &= 0x7FFFFFFF - xbox.write_u32(0xFD100300 + 4 * i, zcomp) # PFB - xbox.write_u32(0xFD400980 + 4 * i, zcomp) # PGRAPH - if True: # PGRAPH RDI - #FIXME: This scope should be atomic - xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + 4 * i) - xbox.write_u32(NV10_PGRAPH_RDI_DATA, zcomp) - - # Disabling tiling entirely - if True: - tile_addr = xbox.read_u32(0xFD100240 + 16 * i) - tile_addr &= 0xFFFFFFFE - xbox.write_u32(0xFD100240 + 16 * i, tile_addr) # PFB - xbox.write_u32(0xFD400900 + 16 * i, tile_addr) # PGRAPH - if True: # PGRAPH RDI - #FIXME: This scope should be atomic - xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i) - xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_addr) - #xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i) - #xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_limit) - #xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i) - #xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_pitch) - - if _enable_experimental_disable_z_compression_and_tiling: - # TODO: Enable after removing FIXME above. - ExperimentalDisableZCompressionAndTiling() - - # Create a new trace object - trace = Trace.Tracer(v_dma_get_addr, v_dma_put_addr_real) - - # Record initial state - trace.commandCount = -1 - trace.DumpSurfaces(xbox, None) - trace.commandCount = 0 - - # Step through the PB until we abort - while not abortNow: - - try: - v_dma_get_addr, unprocessed_bytes = trace.processPushBufferCommand(xbox, xbox_helper, v_dma_get_addr) - bytes_queued += unprocessed_bytes - - #time.sleep(0.5) - - # Avoid queuing up too many bytes: while the buffer is being processed, - # D3D might fixup the buffer if GET is still too far away. - if v_dma_get_addr == trace.real_dma_put_addr or bytes_queued >= 200: - print("Flushing buffer until (0x%08X)" % v_dma_get_addr) - trace.run_fifo(xbox, xbox_helper, v_dma_get_addr) - bytes_queued = 0 - if False: - xbox_helper.dumpPBState() - X = 4 - print(["PRE "] + ["%08X" % x for x in struct.unpack("<" + "L" * X, xbox.read(0x80000000 | (v_dma_get_addr - X * 4), X * 4))]) - print(["POST"] + ["%08X" % x for x in struct.unpack("<" + "L" * X, xbox.read(0x80000000 | (v_dma_get_addr ), X * 4))]) - - if v_dma_get_addr == trace.real_dma_put_addr: - print("Reached end of buffer?!") - #break - - # Verify we are where we think we are - if bytes_queued == 0: - v_dma_get_addr_real = xbox.read_u32(dma_get_addr) - print("Verifying hw (0x%08X) is at parser (0x%08X)" % (v_dma_get_addr_real, v_dma_get_addr)) - try: - assert(v_dma_get_addr_real == v_dma_get_addr) - except: - xbox_helper.dumpPBState() - raise - - except Trace.MaxFlipExceeded: - print("Max flip count reached") - abortNow = True - except: - traceback.print_exc() - abortNow = True - - # Recover the real address - xbox.write_u32(dma_put_addr, trace.real_dma_put_addr) - - print("\n\nFinished PB\n\n") - - # We can continue the cache updates now. - xbox_helper.resume_fifo_pusher() - - # Finish measuring time - end_time = time.monotonic() - duration = end_time - begin_time - - flipStallCount = trace.recordedFlipStallCount() - commandCount = trace.recordedPushBufferCommandCount() - - print("Recorded %d flip stalls and %d PB commands (%.2f commands / second)" % (flipStallCount, commandCount, commandCount / duration)) - - -if __name__ == '__main__': - def _parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument( - "-o", - "--out", - metavar="path", - default="out", - help="Set the output directory." - ) - - parser.add_argument( - "--no-surface", - help="Disable dumping of surfaces.", - action='store_true' - ) + # This is from a discussion on nouveau IRC: + # mwk: the RDI copy is for texturing + # mwk: the mmio PGRAPH copy is for drawing to the framebuffer + + # Disabling Z-Compression seems to work fine + def disable_z_compression(index): + zcomp = xbox.read_u32(0xFD100300 + 4 * index) + zcomp &= 0x7FFFFFFF + xbox.write_u32(0xFD100300 + 4 * index, zcomp) # PFB + xbox.write_u32(0xFD400980 + 4 * index, zcomp) # PGRAPH + # PGRAPH RDI + # FIXME: This scope should be atomic + xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + 4 * index) + xbox.write_u32(NV10_PGRAPH_RDI_DATA, zcomp) + + disable_z_compression(i) + + # Disabling tiling entirely + def disable_tiling(index): + tile_addr = xbox.read_u32(0xFD100240 + 16 * index) + tile_addr &= 0xFFFFFFFE + xbox.write_u32(0xFD100240 + 16 * index, tile_addr) # PFB + xbox.write_u32(0xFD400900 + 16 * index, tile_addr) # PGRAPH + # PGRAPH RDI + # FIXME: This scope should be atomic + xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * index) + xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_addr) + # xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i) + # xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_limit) + # xbox.write_u32(NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i) + # xbox.write_u32(NV10_PGRAPH_RDI_DATA, tile_pitch) + + disable_tiling(i) - parser.add_argument( - "--no-texture", - help="Disable dumping of textures.", - action='store_true' - ) - parser.add_argument( - "--no-pixel", - help="Disable dumping of all graphical resources (surfaces, textures).", - action='store_true' +def main(args): + + os.makedirs(args.out, exist_ok=True) + + if args.no_surface: + Trace.SurfaceDumping = False + + if args.no_texture: + Trace.TextureDumping = False + + if args.no_pixel: + Trace.PixelDumping = False + + Trace.MaxFrames = args.max_flip + + global abort_now # pylint: disable=C0103 + xbox = Xbox() + xbox_helper = XboxHelper.XboxHelper(xbox) + + print("\n\nSearching stable PB state\n\n") + v_dma_get_addr, v_dma_put_addr_real = _wait_for_stable_push_buffer_state( + xbox, xbox_helper ) - parser.add_argument( - "--max-flip", - metavar='frames', - default=0, - type=int, - help="Exit tracing after the given number of frame swaps." + if not v_dma_get_addr or not v_dma_put_addr_real: + if not abort_now: + print("\n\nFailed to reach stable state.\n\n") + return + + print("\n\nStepping through PB\n\n") + + # Start measuring time + begin_time = time.monotonic() + + if _enable_experimental_disable_z_compression_and_tiling: + # TODO: Enable after removing FIXME above. + experimental_disable_z_compression_and_tiling(xbox) + + # Create a new trace object + trace = Trace.Tracer(v_dma_get_addr, v_dma_put_addr_real) + + # Dump the initial state + trace.command_count = -1 + trace.dump_surfaces(xbox, None) + trace.command_count = 0 + + _run(xbox, xbox_helper, v_dma_get_addr, trace) + + # Recover the real address + xbox.write_u32(XboxHelper.DMA_PUT_ADDR, trace.real_dma_put_addr) + + print("\n\nFinished PB\n\n") + + # We can continue the cache updates now. + xbox_helper.resume_fifo_pusher() + + # Finish measuring time + end_time = time.monotonic() + duration = end_time - begin_time + + command_count = trace.recorded_command_count + print( + "Recorded %d flip stalls and %d PB commands (%.2f commands / second)" + % (trace.recorded_flip_stall_count, command_count, command_count / duration) ) - return parser.parse_args() - - sys.exit(main(_parse_args())) +if __name__ == "__main__": + + def _parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "-o", + "--out", + metavar="path", + default="out", + help="Set the output directory.", + ) + + parser.add_argument( + "--no-surface", help="Disable dumping of surfaces.", action="store_true" + ) + + parser.add_argument( + "--no-texture", help="Disable dumping of textures.", action="store_true" + ) + + parser.add_argument( + "--no-pixel", + help="Disable dumping of all graphical resources (surfaces, textures).", + action="store_true", + ) + + parser.add_argument( + "--max-flip", + metavar="frames", + default=0, + type=int, + help="Exit tracing after the given number of frame swaps.", + ) + + return parser.parse_args() + + sys.exit(main(_parse_args()))