From f780626c35c7c3cac2e9aa7c2ec77ca587d6ab95 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Thu, 16 Dec 2010 18:41:13 +0100 Subject: [PATCH] [g3dvl] move scaling completely into matrix and use less temp registers --- src/gallium/auxiliary/vl/vl_idct.c | 89 ++++++++++--------- .../auxiliary/vl/vl_mpeg12_mc_renderer.c | 25 +++--- 2 files changed, 56 insertions(+), 58 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 5a32573dd66..168c2d7d945 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -42,9 +42,6 @@ #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) -#define STAGE1_SCALE 4.0f -#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE) - #define NR_RENDER_TARGETS 4 enum VS_INPUT @@ -171,24 +168,27 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) } static void -increment_addr(struct ureg_program *shader, struct ureg_dst addr[2], - bool right_side, bool transposed, float size) +increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], + struct ureg_src saddr[2], bool right_side, bool transposed, + int pos, float size) { + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; - /* addr[0..1]++ */ - ureg_ADD(shader, ureg_writemask(addr[0], wm_tc), - ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size)); - ureg_ADD(shader, ureg_writemask(addr[1], wm_tc), - ureg_src(addr[1]), ureg_imm1f(shader, 1.0f / size)); + /* + * daddr[0..1].(start) = saddr[0..1].(start) + * daddr[0..1].(tc) = saddr[0..1].(tc) + */ + + ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); + ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); + ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); + ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); } static void fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) { - m[0] = ureg_DECL_temporary(shader); - m[1] = ureg_DECL_temporary(shader); - ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); } @@ -218,9 +218,9 @@ create_matrix_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src l_addr[2], r_addr[2], saddr[2]; + struct ureg_src l_addr[2], r_addr[2]; - struct ureg_dst addr[2], l[4][2], r[2]; + struct ureg_dst l[4][2], r[2]; struct ureg_dst fragment[NR_RENDER_TARGETS]; unsigned i, j; @@ -229,12 +229,6 @@ create_matrix_frag_shader(struct vl_idct *idct) if (!shader) return NULL; - addr[0] = ureg_DECL_temporary(shader); - addr[1] = ureg_DECL_temporary(shader); - - saddr[0] = ureg_src(addr[0]); - saddr[1] = ureg_src(addr[1]); - l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); @@ -245,37 +239,44 @@ create_matrix_frag_shader(struct vl_idct *idct) fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); for (i = 0; i < 4; ++i) { - if(i == 0) { - ureg_MOV(shader, addr[0], l_addr[0]); - ureg_MOV(shader, addr[1], l_addr[1]); - } else - increment_addr(shader, addr, false, false, idct->buffer_height); + l[i][0] = ureg_DECL_temporary(shader); + l[i][1] = ureg_DECL_temporary(shader); + } - fetch_four(shader, l[i], saddr, ureg_DECL_sampler(shader, 1)); + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); + + for (i = 1; i < 4; ++i) { + increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height); + } + + for (i = 0; i < 4; ++i) { + struct ureg_src s_addr[2]; + s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]); + s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]); + fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1)); } for (i = 0; i < NR_RENDER_TARGETS; ++i) { - if(i == 0) { - ureg_MOV(shader, addr[0], r_addr[0]); - ureg_MOV(shader, addr[1], r_addr[1]); - } else - increment_addr(shader, addr, true, true, BLOCK_HEIGHT); + if(i > 0) + increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT); - fetch_four(shader, r, saddr, ureg_DECL_sampler(shader, 0)); + struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; + s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]); + s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]); + fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0)); for (j = 0; j < 4; ++j) { matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); } - ureg_release_temporary(shader, r[0]); - ureg_release_temporary(shader, r[1]); } for (i = 0; i < 4; ++i) { ureg_release_temporary(shader, l[i][0]); ureg_release_temporary(shader, l[i][1]); } - ureg_release_temporary(shader, addr[0]); - ureg_release_temporary(shader, addr[1]); + ureg_release_temporary(shader, r[0]); + ureg_release_temporary(shader, r[1]); ureg_END(shader); @@ -290,7 +291,7 @@ create_transpose_frag_shader(struct vl_idct *idct) struct ureg_src l_addr[2], r_addr[2]; struct ureg_dst l[2], r[2]; - struct ureg_dst tmp, fragment; + struct ureg_dst fragment; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) @@ -302,16 +303,18 @@ create_transpose_frag_shader(struct vl_idct *idct) r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + l[0] = ureg_DECL_temporary(shader); + l[1] = ureg_DECL_temporary(shader); + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); + fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - tmp = ureg_DECL_temporary(shader); - matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r); - ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE)); + matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); - ureg_release_temporary(shader, tmp); ureg_release_temporary(shader, l[0]); ureg_release_temporary(shader, l[1]); ureg_release_temporary(shader, r[0]); @@ -542,7 +545,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe) for(i = 0; i < BLOCK_HEIGHT; ++i) for(j = 0; j < BLOCK_WIDTH; ++j) // transpose and scale - f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE; + f[i * pitch + j] = const_matrix[j][i] * sqrtf(SCALE_FACTOR_16_TO_9); pipe->transfer_unmap(pipe, buf_transfer); pipe->transfer_destroy(pipe, buf_transfer); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index eec6a65ee79..25e97efec04 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -273,13 +273,12 @@ static struct ureg_dst fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field) { struct ureg_src tc[3], sampler[3], eb[2]; - struct ureg_dst texel, t_tc, t_eb_info, tmp; + struct ureg_dst texel, t_tc, t_eb_info; unsigned i, label; texel = ureg_DECL_temporary(shader); t_tc = ureg_DECL_temporary(shader); t_eb_info = ureg_DECL_temporary(shader); - tmp = ureg_DECL_temporary(shader); tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR); @@ -326,7 +325,6 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_release_temporary(shader, t_tc); ureg_release_temporary(shader, t_eb_info); - ureg_release_temporary(shader, tmp); return texel; } @@ -336,7 +334,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) { struct ureg_src ref_frames, bkwd_pred; struct ureg_src tc[4], sampler[2]; - struct ureg_dst ref[2], tmp, result; + struct ureg_dst ref[2], result; unsigned i, intra_label, bi_label, label; ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT); @@ -350,14 +348,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) ref[i] = ureg_DECL_temporary(shader); } - tmp = ureg_DECL_temporary(shader); result = ureg_DECL_temporary(shader); ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f)); - ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f)); - ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label); - ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), + ureg_SGE(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f)); + ureg_IF(shader, ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X), &intra_label); + ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]); @@ -367,10 +364,10 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0]) */ ureg_IF(shader, bkwd_pred, &label); - ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]); + ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ELSE(shader, &label); - ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]); + ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); @@ -383,12 +380,11 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) * else * ref[0..1] = tex(tc[2..3], sampler[0..1]) */ - ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]); - - ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), + ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[3], tc[2]); - ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]); + ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]); + ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]); ureg_LRP(shader, result, ureg_imm1f(shader, 0.5f), ureg_src(ref[0]), ureg_src(ref[1])); @@ -400,7 +396,6 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field) for (i = 0; i < 2; ++i) ureg_release_temporary(shader, ref[i]); - ureg_release_temporary(shader, tmp); return result; } -- 2.30.2