X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fi915%2Fi915_fpc_translate.c;h=70016ed51e6a0b6bb3b60e4123cdb6e26bb804d7;hb=31da39ddc92e780dc539bf34d2de7f82fc65fa86;hp=c75f6c5fd63ffe0ddc919c2086c625db49127db4;hpb=f8e6d19f3f40931be741b44d3edf210c38e13f0f;p=mesa.git diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index c75f6c5fd63..70016ed51e6 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -31,6 +31,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_fpc.h" +#include "i915_debug_private.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" @@ -49,24 +50,27 @@ * Simple pass-through fragment shader to use when we don't have * a real shader (or it fails to compile for some reason). */ -static unsigned passthrough[] = +static unsigned passthrough_decl[] = { _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), /* declare input color: */ - (D0_DCL | - (REG_TYPE_T << D0_TYPE_SHIFT) | - (T_DIFFUSE << D0_NR_SHIFT) | + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | D0_CHANNEL_ALL), 0, 0, +}; +static unsigned passthrough_program[] = +{ /* move to output color: */ - (A0_MOV | - (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | - A0_DEST_CHANNEL_ALL | + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 0x01230000, /* .xyzw */ @@ -107,7 +111,7 @@ static const float cos_constants[4] = { 1.0, /** * component-wise negation of ureg */ -static INLINE int +static inline int negate(int reg, int x, int y, int z, int w) { /* Another neat thing about the UREG representation */ @@ -125,10 +129,13 @@ negate(int reg, int x, int y, int z, int w) static void i915_use_passthrough_shader(struct i915_fragment_shader *fs) { - fs->program = (uint *) MALLOC(sizeof(passthrough)); + fs->program = (uint *) MALLOC(sizeof(passthrough_program)); + fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); if (fs->program) { - memcpy(fs->program, passthrough, sizeof(passthrough)); - fs->program_len = Elements(passthrough); + memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); + memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); + fs->program_len = ARRAY_SIZE(passthrough_program); + fs->decl_len = ARRAY_SIZE(passthrough_decl); } fs->num_constants = 0; } @@ -173,7 +180,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) static uint src_vector(struct i915_fp_compile *p, const struct i915_full_src_register *source, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; @@ -322,7 +329,7 @@ get_result_flags(const struct i915_full_instruction *inst) = inst->Dst[0].Register.WriteMask; uint flags = 0x0; - if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + if (inst->Instruction.Saturate) flags |= A0_DEST_SATURATE; if (writeMask & TGSI_WRITEMASK_X) @@ -372,6 +379,33 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) } } +/** + * Return the number of coords needed to access a given TGSI_TEXTURE_* + */ +uint +i915_num_coords(uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D: + return 1; + + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_RECT: + return 2; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + return 3; + + default: + debug_printf("Unknown texture target for num coords"); + return 2; + } +} + /** * Generate texel lookup instruction. @@ -393,7 +427,8 @@ emit_tex(struct i915_fp_compile *p, get_result_flags( inst ), sampler, coord, - opcode); + opcode, + i915_num_coords(texture) ); } @@ -406,7 +441,7 @@ static void emit_simple_arith(struct i915_fp_compile *p, const struct i915_full_instruction *inst, uint opcode, uint numArgs, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { uint arg1, arg2, arg3; @@ -431,7 +466,7 @@ static void emit_simple_arith_swap2(struct i915_fp_compile *p, const struct i915_full_instruction *inst, uint opcode, uint numArgs, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { struct i915_full_instruction inst2; @@ -454,7 +489,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, * SIN, COS -- could use another taylor step? * LIT -- results seem a little different to sw mesa * LOG -- different to mesa on negative numbers, but this is conformant. - */ + */ static void i915_translate_instruction(struct i915_fp_compile *p, const struct i915_full_instruction *inst, @@ -478,13 +513,29 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_ADD, 2, fs); break; + case TGSI_OPCODE_CEIL: + src0 = src_vector(p, &inst->Src[0], fs); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + i915_emit_arith(p, + A0_FLR, + tmp, + flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), 0, 0); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->Dst[0]), + flags, 0, + negate(tmp, 1, 1, 1, 1), 0, 0); + break; + case TGSI_OPCODE_CMP: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); src2 = src_vector(p, &inst->Src[2], fs); - i915_emit_arith(p, A0_CMP, + i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), + get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ break; @@ -500,7 +551,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 * result = DP4 t0, cos_constants @@ -612,7 +663,7 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_FRC, 1, fs); break; - case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KILL_IF: /* kill if src[0].x < 0 || src[0].y < 0 ... */ src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); @@ -622,11 +673,21 @@ i915_translate_instruction(struct i915_fp_compile *p, A0_DEST_CHANNEL_ALL, /* dest writemask */ 0, /* sampler */ src0, /* coord*/ - T0_TEXKILL); /* opcode */ + T0_TEXKILL, /* opcode */ + 1); /* num_coord */ break; - case TGSI_OPCODE_KILP: - assert(0); /* not tested yet */ + case TGSI_OPCODE_KILL: + /* unconditional kill */ + tmp = i915_get_utemp(p); + + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ + T0_TEXKILL, /* opcode */ + 1); /* num_coord */ break; case TGSI_OPCODE_LG2: @@ -703,21 +764,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->Src[0], fs); - src1 = src_vector(p, &inst->Src[1], fs); - tmp = i915_get_utemp(p); - flags = get_result_flags(inst); - - i915_emit_arith(p, - A0_MAX, - tmp, flags & A0_DEST_CHANNEL_ALL, 0, - negate(src0, 1, 1, 1, 1), - negate(src1, 1, 1, 1, 1), 0); - - i915_emit_arith(p, - A0_MOV, - get_result_vector(p, &inst->Dst[0]), - flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + emit_simple_arith(p, inst, A0_MIN, 2, fs); break; case TGSI_OPCODE_MOV: @@ -751,11 +798,11 @@ i915_translate_instruction(struct i915_fp_compile *p, get_result_vector(p, &inst->Dst[0]), flags, 0, swizzle(tmp, X, X, X, X), 0, 0); break; - + case TGSI_OPCODE_RET: /* XXX: no-op? */ break; - + case TGSI_OPCODE_RCP: src0 = src_vector(p, &inst->Src[0], fs); @@ -1048,7 +1095,7 @@ i915_translate_instruction(struct i915_fp_compile *p, static void i915_translate_token(struct i915_fp_compile *p, - const union i915_full_token* token, + const union i915_full_token *token, struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; @@ -1067,7 +1114,7 @@ static void i915_translate_token(struct i915_fp_compile *p, == TGSI_FILE_CONSTANT) { uint i; for (i = token->FullDeclaration.Range.First; - i <= token->FullDeclaration.Range.Last; + i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1); i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -1237,18 +1284,27 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) /* Copy compilation results to fragment program struct: */ + assert(!ifs->decl); assert(!ifs->program); + + ifs->decl + = (uint *) MALLOC(decl_size * sizeof(uint)); ifs->program - = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); - if (ifs->program) { - ifs->program_len = program_size + decl_size; + = (uint *) MALLOC(program_size * sizeof(uint)); - memcpy(ifs->program, - p->declarations, + if (ifs->decl) { + ifs->decl_len = decl_size; + + memcpy(ifs->decl, + p->declarations, decl_size * sizeof(uint)); + } + + if (ifs->program) { + ifs->program_len = program_size; - memcpy(ifs->program + decl_size, - p->program, + memcpy(ifs->program, + p->program, program_size * sizeof(uint)); } } @@ -1313,4 +1369,8 @@ i915_translate_fragment_program( struct i915_context *i915, i915_fini_compile(i915, p); i915_optimize_free(i_tokens); + +#if 0 + i915_disassemble_program(NULL, fs->program, fs->program_len); +#endif }