Skip to content

Commit

Permalink
Patching a few bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathanolson committed Aug 10, 2024
1 parent 4a6eb7b commit 957abcd
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 67 deletions.
9 changes: 7 additions & 2 deletions js/webgpu/FaceRasterizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export const FACE_RASTERIZER_RUN_DEFAULT_OPTIONS = {

export type FaceRasterizerExecutionInfo = {
config: TwoPassConfig;
numTiles: number;
numBins: number;
initialRenderableFaces: TwoPassInitialRenderableFace[];
initialEdges: LinearEdge[];
Expand Down Expand Up @@ -154,8 +155,9 @@ export default class FaceRasterizer {
// Pick the opposite of the storage format, in case we can't write to it directly, and need to blit it over
const potentialBlitFormat = this.deviceContext.preferredStorageFormat === 'bgra8unorm' ? 'rgba8unorm' : 'bgra8unorm';
const blitShader = new BlitShader( this.deviceContext.device, potentialBlitFormat );
const wrapBlitModule = new CompositeModule( [ mainModule ], ( context, data: { numBins: number; numInitialRenderableFaces: number; textureBlit: [ GPUTextureView, GPUTextureView ] | null } ) => {
const wrapBlitModule = new CompositeModule( [ mainModule ], ( context, data: { numTiles: number; numBins: number; numInitialRenderableFaces: number; textureBlit: [ GPUTextureView, GPUTextureView ] | null } ) => {
mainModule.execute( context, {
numTiles: data.numTiles,
numBins: data.numBins,
numInitialRenderableFaces: data.numInitialRenderableFaces
} );
Expand All @@ -182,6 +184,7 @@ export default class FaceRasterizer {
context.setTypedBufferValue( renderProgramInstructionsSlot, input.renderProgramInstructions );

execute( context, {
numTiles: input.numTiles,
numBins: input.numBins,
numInitialRenderableFaces: input.initialRenderableFaces.length,
textureBlit: input.textureBlit
Expand Down Expand Up @@ -284,7 +287,8 @@ export default class FaceRasterizer {
const binWidth = Math.ceil( rasterWidth / binSize );
const binHeight = Math.ceil( rasterHeight / binSize );

const numBins = 256 * tileWidth * tileHeight;
const numTiles = tileWidth * tileHeight;
const numBins = 256 * numTiles;

const initialRenderableFaces: TwoPassInitialRenderableFace[] = [];
const initialEdges: LinearEdge[] = [];
Expand Down Expand Up @@ -386,6 +390,7 @@ export default class FaceRasterizer {
initialEdges: initialEdges,
renderProgramInstructions: renderProgramInstructions,
textureBlit: canOutputToCanvas ? null : [ fineOutputTextureView, canvasTextureView ],
numTiles: numTiles,
numBins: numBins
} );
}
Expand Down
3 changes: 2 additions & 1 deletion js/webgpu/modules/rasterize-two-pass/TiledTwoPassModule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export const TILED_TWO_PASS_MODULE_DEFAULTS = {
} as const;

export type TiledTwoPassRunSize = {
numTiles: number;
numBins: number;
numInitialRenderableFaces: number;
};
Expand Down Expand Up @@ -123,7 +124,7 @@ export default class TiledTwoPassModule extends CompositeModule<TiledTwoPassRunS
twoPassModule
], ( context, runSize: TiledTwoPassRunSize ) => {
initializeAddressesModule.execute( context, 0 );
tileModule.execute( context, runSize.numInitialRenderableFaces * runSize.numBins );
tileModule.execute( context, runSize.numInitialRenderableFaces * runSize.numTiles );
twoPassModule.execute( context, {
numBins: runSize.numBins,
numCoarseRenderableFaces: options.maxCoarseRenderableFaces
Expand Down
4 changes: 3 additions & 1 deletion js/webgpu/tests/rasterize-two-pass/TwoPassFaceRasterizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ export const evaluateTwoPassFaceRasterizer = async (
}

const filterType = PolygonFilterType.Bilinear;
const filterScale = LOOP ? ( 1 + Math.cos( elapsedTime / 100 ) * 0.5 ) * 30 + 1 : 50; // 25 box, 17 bilinear (comparison)
// const filterScale = LOOP ? randomNumbers[ index % ( randomNumbers.length ) ] : 50; // 25 box, 17 bilinear (comparison)
const filterScale = ( 1 + Math.cos( elapsedTime / 100 ) * 0.5 ) * 30 + 1; // 25 box, 17 bilinear (comparison)
// 50.51805795015657

const clippableFace = testPolygonalFace;

Expand Down
5 changes: 4 additions & 1 deletion js/webgpu/tests/rasterize-two-pass/TwoPassTiled.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export const evaluateTwoPassTiled = async (
const binWidth = Math.ceil( rasterWidth / binSize );
const binHeight = Math.ceil( rasterHeight / binSize );

const numTiles = tileWidth * tileHeight;
const numBins = 256 * tileWidth * tileHeight;

const initialRenderableFaces: TwoPassInitialRenderableFace[] = [];
Expand Down Expand Up @@ -151,8 +152,9 @@ export const evaluateTwoPassTiled = async (
// Pick the opposite of the storage format, in case we can't write to it directly, and need to blit it over
const potentialBlitFormat = deviceContext.preferredStorageFormat === 'bgra8unorm' ? 'rgba8unorm' : 'bgra8unorm';
const blitShader = new BlitShader( deviceContext.device, potentialBlitFormat );
const wrapBlitModule = new CompositeModule( [ mainModule ], ( context, data: { numBins: number; numInitialRenderableFaces: number; textureBlit: [ GPUTextureView, GPUTextureView ] | null } ) => {
const wrapBlitModule = new CompositeModule( [ mainModule ], ( context, data: { numTiles: number; numBins: number; numInitialRenderableFaces: number; textureBlit: [ GPUTextureView, GPUTextureView ] | null } ) => {
mainModule.execute( context, {
numTiles: data.numTiles,
numBins: data.numBins,
numInitialRenderableFaces: data.numInitialRenderableFaces
} );
Expand Down Expand Up @@ -184,6 +186,7 @@ export const evaluateTwoPassTiled = async (
context.setTypedBufferValue( renderProgramInstructionsSlot, input.renderProgramInstructions );

execute( context, {
numTiles: numTiles,
numBins: numBins,
numInitialRenderableFaces: input.initialRenderableFaces.length,
textureBlit: input.textureBlit
Expand Down
6 changes: 4 additions & 2 deletions js/webgpu/wgsl/rasterize-two-pass/mainTwoPassCoarseWGSL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
/**
* TODO: doc
*
* Should be dispatched with one workgroup PER coarse renderable face (one thread per face-X-bin).
*
* @author Jonathan Olson <[email protected]>
*/

Expand Down Expand Up @@ -51,7 +53,7 @@ const mainTwoPassCoarseWGSL = (
fineEdgesSlot,
addressesSlot
], wgsl`
const low_area_multiplier = 1e-4f;
const low_area_multiplier = 0.002f;
var<workgroup> coarse_face: ${TwoPassCoarseRenderableFaceWGSL};
var<workgroup> scratch_data: array<vec2u, 256>;
Expand Down Expand Up @@ -145,7 +147,7 @@ const mainTwoPassCoarseWGSL = (
// TODO: don't use low_area_multiplier with full area!
let is_full_area = is_source_full_area || area + low_area_multiplier >= max_area;
let needs_write_face = area > low_area_multiplier;
let needs_write_face = area > low_area_multiplier && ( num_clipped_edges > 0u || clipped_clip_counts[ 0u ] != 0i || clipped_clip_counts[ 1u ] != 0i || clipped_clip_counts[ 2u ] != 0i || clipped_clip_counts[ 3u ] != 0i );
let needs_write_edges = needs_write_face && !is_full_area;
let required_edge_count = select( 0u, num_clipped_edges, needs_write_edges );
Expand Down
61 changes: 4 additions & 57 deletions js/webgpu/wgsl/rasterize-two-pass/mainTwoPassFineWGSL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
* a full 16x16 grid of integrals (and colors), which will then be combined into the proper (e.g. 15x15) set of pixels.
* Thus the bin size can be 15x15 (if bilinear and filter_scale=1), or 13x13 (if Mitchell-Netravali and filter_scale=1).
*
* Should be dispatched with one workgroup PER bin (one thread per grid "pixel")
*
* TODO: optimize is_constant
*
* @author Jonathan Olson <[email protected]>
*/

import { blend_composeWGSL, bounds_clip_edgeWGSL, BufferBindingType, BufferSlot, decimalS, extend_f32WGSL, f32S, F32Type, gamut_map_linear_displayP3WGSL, gamut_map_linear_sRGBWGSL, linear_displayP3_to_linear_sRGBWGSL, linear_sRGB_to_linear_displayP3WGSL, linear_sRGB_to_oklabWGSL, linear_sRGB_to_sRGBWGSL, LinearEdge, LinearEdgeWGSL, logValueWGSL, oklab_to_linear_sRGBWGSL, premultiplyWGSL, RadialGradientType, RenderInstruction, sRGB_to_linear_sRGBWGSL, StorageTextureBindingType, TextureViewSlot, TwoPassConfig, TwoPassFineRenderableFace, TwoPassFineRenderableFaceWGSL, u32S, U32Type, unpremultiplyWGSL, wgsl, wgslBlueprint, WGSLExpressionU32, WGSLMainModule, WGSLSlot } from '../../../imports.js';
import { blend_composeWGSL, bounds_clip_edgeWGSL, BufferBindingType, BufferSlot, decimalS, extend_f32WGSL, f32S, gamut_map_linear_displayP3WGSL, gamut_map_linear_sRGBWGSL, linear_displayP3_to_linear_sRGBWGSL, linear_sRGB_to_linear_displayP3WGSL, linear_sRGB_to_oklabWGSL, linear_sRGB_to_sRGBWGSL, LinearEdge, LinearEdgeWGSL, oklab_to_linear_sRGBWGSL, premultiplyWGSL, RadialGradientType, RenderInstruction, sRGB_to_linear_sRGBWGSL, StorageTextureBindingType, TextureViewSlot, TwoPassConfig, TwoPassFineRenderableFace, TwoPassFineRenderableFaceWGSL, u32S, unpremultiplyWGSL, wgsl, wgslBlueprint, WGSLExpressionU32, WGSLMainModule, WGSLSlot } from '../../../imports.js';
import { optionize3 } from '../../../../../phet-core/js/optionize.js';

export type mainTwoPassFineWGSLOptions = {
Expand Down Expand Up @@ -64,10 +66,6 @@ const mainTwoPassFineWGSL = (
const stackSize = 10;
const instructionStackSize = 8;

// const logIndex = Math.floor( Math.random() * 1000 );
const logIndex = 4794;
console.log( logIndex );

const getInstructionWGSL = ( index: WGSLExpressionU32 ) => wgsl`render_program_instructions[ ${index} ]`;

// TODO: find a way so that this isn't needed(!)
Expand All @@ -87,7 +85,7 @@ const mainTwoPassFineWGSL = (
], wgsl`
const oops_inifinite_loop_code = vec4f( 0.5f, 0.5f, 0f, 0.5f );
const low_area_multiplier = 1e-4f;
const low_area_multiplier = 0.002f;
var<workgroup> bin_xy: vec2<u32>;
var<workgroup> workgroup_exit: bool;
Expand Down Expand Up @@ -134,30 +132,9 @@ const mainTwoPassFineWGSL = (
let pixel_xy = bin_xy * config.bin_size + vec2( local_id.x % 16u, local_id.x / 16u );
// 21, 13 ish
${logValueWGSL( {
value: 'pixel_xy.x',
type: U32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
${logValueWGSL( {
value: 'pixel_xy.y',
type: U32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
let skip_pixel = pixel_xy.x >= config.raster_width || pixel_xy.y >= config.raster_height;
var accumulation = vec4f( 0f, 0f, 0f, 0f );
//accumulation = vec4( f32( bin_xy.x ) / 16f, 0f, f32( bin_xy.y ) / 16f, 1f ); // TODO: remove
${logValueWGSL( {
value: 'next_address',
type: U32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
var oops_count = 0u;
while ( workgroupUniformLoad( &next_address ) != 0xffffffffu ) {
Expand All @@ -174,12 +151,6 @@ const mainTwoPassFineWGSL = (
workgroupBarrier();
${logValueWGSL( {
value: 'select( 0u, 1u, skip_pixel )',
type: U32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
let needs_centroid = ( current_face.bits & 0x10000000u ) != 0u;
let needs_face = ( current_face.bits & 0x20000000u ) != 0u;
let is_full_area = ( current_face.bits & 0x80000000u ) != 0u;
Expand Down Expand Up @@ -236,30 +207,6 @@ const mainTwoPassFineWGSL = (
// TODO: stuff integrals + color in workgroup memory, barrier, then have each pixel (subset of threads) sum up
}
${logValueWGSL( {
value: 'accumulation.r',
type: F32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
${logValueWGSL( {
value: 'accumulation.g',
type: F32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
${logValueWGSL( {
value: 'accumulation.b',
type: F32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
${logValueWGSL( {
value: 'accumulation.a',
type: F32Type,
lineToLog: line => line.dataArray.flat()[ logIndex ]
} )}
var will_store_pixel = !skip_pixel;
${options.supportsGridFiltering ? wgsl`
Expand Down
7 changes: 4 additions & 3 deletions js/webgpu/wgsl/rasterize-two-pass/mainTwoPassTileWGSL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const mainTwoPassTileWGSL = (
coarseEdgesSlot,
addressesSlot
], wgsl`
const low_area_multiplier = 1e-4f;
const low_area_multiplier = 0.002f;
var<workgroup> scratch_data: array<vec2u, 256>;
var<workgroup> base_indices: vec2u;
Expand Down Expand Up @@ -108,7 +108,8 @@ const mainTwoPassTileWGSL = (
let p0 = edge.startPoint;
let p1 = edge.endPoint;
area += ( p1.x + p0.x ) * ( p1.y - p0.y );
// Offset by the centroid, so that our bounds computations are more accurate.
area += ( p1.x + p0.x - 2f * bounds_centroid.x ) * ( p1.y - p0.y );
if ( is_edge_clipped_count( p0, p1, min, max ) ) {
// TODO: consider NOT writing the clip counts in this (hopefully faster) loop?
Expand All @@ -132,7 +133,7 @@ const mainTwoPassTileWGSL = (
// TODO: don't use low_area_multiplier with full area!
let is_full_area = is_source_full_area || area + low_area_multiplier >= max_area;
let needs_write_face = !skip_tile && area > low_area_multiplier;
let needs_write_face = !skip_tile && area > low_area_multiplier && ( num_clipped_edges > 0u || clipped_clip_counts[ 0u ] != 0i || clipped_clip_counts[ 1u ] != 0i || clipped_clip_counts[ 2u ] != 0i || clipped_clip_counts[ 3u ] != 0i );
let needs_write_edges = needs_write_face && !is_full_area;
let required_edge_count = select( 0u, num_clipped_edges, needs_write_edges );
Expand Down

0 comments on commit 957abcd

Please sign in to comment.