host_shaders: Simplifying a calculation

This commit is contained in:
¥IGA 2025-02-22 15:44:18 +01:00 committed by Xphalnos
parent 0aaeea4837
commit ad723492d0
9 changed files with 28 additions and 28 deletions

View File

@ -31,10 +31,10 @@ const uint lut_64bpp[16] = {
0x37363332, 0x3f3e3b3a,
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_SZ (512)
#define TEXELS_PER_ELEMENT (1)
#define BPP (64)
#define MICRO_TILE_DIM 8
#define MICRO_TILE_SZ 512
#define TEXELS_PER_ELEMENT 1
#define BPP 64
void main() {
uint x = gl_GlobalInvocationID.x % info.pitch;

View File

@ -64,10 +64,10 @@ const uint lut_32bpp[][16] = {
}
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_SZ (1024)
#define TEXELS_PER_ELEMENT (1)
#define BPP (32)
#define MICRO_TILE_DIM 8
#define MICRO_TILE_SZ 1024
#define TEXELS_PER_ELEMENT 1
#define BPP 32
void main() {
uint x = gl_GlobalInvocationID.x % info.pitch;

View File

@ -63,10 +63,10 @@ const uint lut_64bpp[][16] = {
},
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_SZ (2048)
#define TEXELS_PER_ELEMENT (1)
#define BPP (64)
#define MICRO_TILE_DIM 8
#define MICRO_TILE_SZ 2048
#define TEXELS_PER_ELEMENT 1
#define BPP 64
void main() {
uint x = gl_GlobalInvocationID.x % info.pitch;
@ -80,7 +80,7 @@ void main() {
uint byte_ofs = gl_LocalInvocationID.x & 3u;
uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8);
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
uint tile_row = y / MICRO_TILE_DIM;
uint tile_column = x / MICRO_TILE_DIM;
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;

View File

@ -63,10 +63,10 @@ const uint lut_8bpp[][16] = {
},
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_SZ (256)
#define TEXELS_PER_ELEMENT (1)
#define BPP (8)
#define MICRO_TILE_DIM 8
#define MICRO_TILE_SZ 256
#define TEXELS_PER_ELEMENT 1
#define BPP 8
shared uint scratch[16];

View File

@ -32,7 +32,7 @@ const uint rmort[16] = {
0x57475646, 0x77677666,
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_DIM 8
void main() {
uint block_ofs = 4 * gl_GlobalInvocationID.x;
@ -57,7 +57,7 @@ void main() {
uint dw_ofs_x = (target_tile_x * MICRO_TILE_DIM) + 4 * col;
uint dw_ofs_y = ((target_tile_y * tiles_per_pitch) * 64u) + ((row * tiles_per_pitch) * MICRO_TILE_DIM);
out_data[dw_ofs_x + dw_ofs_y] = p0;
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
out_data[dw_ofs_x + dw_ofs_y + 2] = p2;
out_data[dw_ofs_x + dw_ofs_y + 3] = p3;
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
out_data[dw_ofs_x + dw_ofs_y + 2] = p2;
out_data[dw_ofs_x + dw_ofs_y + 3] = p3;
}

View File

@ -52,7 +52,7 @@ void main() {
uint mip = 0u;
for (int m = 0; m < info.num_levels; ++m) {
mip += (gl_GlobalInvocationID.x * 4) >= info.sizes[m] ? 1 : 0;
}
}
uint tiles_per_pitch = max(((info.pitch >> mip) / 8u), 1u);
uint target_tile_x = gl_WorkGroupID.x % tiles_per_pitch;
uint target_tile_y = gl_WorkGroupID.x / tiles_per_pitch;

View File

@ -32,8 +32,8 @@ const uint rmort[16] = {
0x57475646, 0x77677666,
};
#define MICRO_TILE_DIM (8)
#define TEXELS_PER_ELEMENT (1)
#define MICRO_TILE_DIM 8
#define TEXELS_PER_ELEMENT 1
void main() {
uint tile_base = gl_GlobalInvocationID.x - gl_LocalInvocationID.x; // WG*16

View File

@ -32,13 +32,13 @@ const uint rmort[16] = {
0x57475646, 0x77677666,
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_DIM 8
void main() {
uint block_ofs = 2 * gl_GlobalInvocationID.x;
uint p0 = in_data[block_ofs + 0];
uint p1 = in_data[block_ofs + 1];
uint bit_ofs = 8 * (gl_LocalInvocationID.x % 4);
uint packed_pos = rmort[gl_LocalInvocationID.x >> 2] >> bit_ofs;
uint col = bitfieldExtract(packed_pos, 4, 4);
@ -55,5 +55,5 @@ void main() {
uint dw_ofs_x = target_tile_x * MICRO_TILE_DIM + col * 2;
uint dw_ofs_y = (target_tile_y * tiles_per_pitch * 64) + row * tiles_per_pitch * MICRO_TILE_DIM;
out_data[dw_ofs_x + dw_ofs_y] = p0;
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
out_data[dw_ofs_x + dw_ofs_y + 1] = p1;
}

View File

@ -16,7 +16,7 @@ layout(push_constant) uniform settings {
const float cutoff = 0.0031308, a = 1.055, b = 0.055, d = 12.92;
vec3 gamma(vec3 rgb) {
return mix(
a * pow(rgb, vec3(1.0 / (2.4 + 1.0 - pp.gamma))) - b,
a * pow(rgb, vec3(1.0 / (3.4 - pp.gamma))) - b,
d * rgb / pp.gamma,
lessThan(rgb, vec3(cutoff))
);