From: Corbin Simpson Date: Sun, 2 Aug 2009 23:56:52 +0000 (-0700) Subject: Merge commit 'nha/r300-compiler-gallium' X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=30bca7a4e6ad4e5a9fc74aba2eb854bb1341cca7;hp=d0c398a8e2985b855f923aec3470cef8734a622a;p=mesa.git Merge commit 'nha/r300-compiler-gallium' --- diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h index 0b19afedfd3..6b639b75c66 100644 --- a/src/egl/main/eglcompiler.h +++ b/src/egl/main/eglcompiler.h @@ -2,6 +2,39 @@ #define EGLCOMPILER_INCLUDED +/** + * Get standard integer types + */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +# include +#elif defined(_MSC_VER) + typedef __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef __int16 int16_t; + typedef unsigned __int16 uint16_t; +# ifndef __eglplatform_h_ + typedef __int32 int32_t; +# endif + typedef unsigned __int32 uint32_t; + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; + +# if defined(_WIN64) + typedef __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +# else + typedef __int32 intptr_t; + typedef unsigned __int32 uintptr_t; +# endif + +# define INT64_C(__val) __val##i64 +# define UINT64_C(__val) __val##ui64 +#else +/* hope the best instead of adding a bunch of ifdef's */ +# include +#endif + + /** * Function inlining */ diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index f2f32585c73..bbc585b55e9 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -34,7 +34,7 @@ void _eglInitConfig(_EGLConfig *config, EGLint id) { memset(config, 0, sizeof(*config)); - config->Handle = (EGLConfig) id; + config->Handle = (EGLConfig) _eglUIntToPointer((unsigned int) id); _eglSetConfigAttrib(config, EGL_CONFIG_ID, id); _eglSetConfigAttrib(config, EGL_BIND_TO_TEXTURE_RGB, EGL_DONT_CARE); _eglSetConfigAttrib(config, EGL_BIND_TO_TEXTURE_RGBA, EGL_DONT_CARE); diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 89de609d0b4..5304b84a26e 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -53,7 +53,7 @@ _eglLinkDisplay(_EGLDisplay *dpy) assert(key); /* "link" the display to the hash table */ _eglHashInsert(_eglGlobal.Displays, key, dpy); - dpy->Handle = (EGLDisplay) key; + dpy->Handle = (EGLDisplay) _eglUIntToPointer(key); return dpy->Handle; } @@ -66,7 +66,8 @@ _eglLinkDisplay(_EGLDisplay *dpy) void _eglUnlinkDisplay(_EGLDisplay *dpy) { - _eglHashRemove(_eglGlobal.Displays, (EGLuint) dpy->Handle); + EGLuint key = _eglPointerToUInt((void *) dpy->Handle); + _eglHashRemove(_eglGlobal.Displays, key); dpy->Handle = EGL_NO_DISPLAY; } @@ -91,7 +92,7 @@ _eglGetDisplayHandle(_EGLDisplay *display) _EGLDisplay * _eglLookupDisplay(EGLDisplay dpy) { - EGLuint key = (EGLuint) dpy; + EGLuint key = _eglPointerToUInt((void *) dpy); return (_EGLDisplay *) _eglHashLookup(_eglGlobal.Displays, key); } @@ -224,7 +225,7 @@ _eglUnlinkContext(_EGLContext *ctx) EGLContext _eglGetContextHandle(_EGLContext *ctx) { - return (EGLContext) (ctx && ctx->Display) ? ctx : EGL_NO_CONTEXT; + return (EGLContext) ((ctx && ctx->Display) ? ctx : EGL_NO_CONTEXT); } @@ -257,7 +258,7 @@ _eglLinkSurface(_EGLSurface *surf, _EGLDisplay *dpy) assert(key); _eglHashInsert(_eglGlobal.Surfaces, key, surf); - surf->Handle = (EGLSurface) key; + surf->Handle = (EGLSurface) _eglUIntToPointer(key); return surf->Handle; } @@ -270,8 +271,9 @@ void _eglUnlinkSurface(_EGLSurface *surf) { _EGLSurface *prev; + EGLuint key = _eglPointerToUInt((void *) surf->Handle); - _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surf->Handle); + _eglHashRemove(_eglGlobal.Surfaces, key); surf->Handle = EGL_NO_SURFACE; prev = surf->Display->SurfaceList; @@ -314,7 +316,6 @@ _eglGetSurfaceHandle(_EGLSurface *surface) _EGLSurface * _eglLookupSurface(EGLSurface surf) { - _EGLSurface *c = (_EGLSurface *) _eglHashLookup(_eglGlobal.Surfaces, - (EGLuint) surf); - return c; + EGLuint key = _eglPointerToUInt((void *) surf); + return (_EGLSurface *) _eglHashLookup(_eglGlobal.Surfaces, key); } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 372ed3cd79a..2ef5db8a184 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -125,4 +125,25 @@ _eglIsSurfaceLinked(_EGLSurface *surf) } +/** + * Cast an unsigned int to a pointer. + */ +static INLINE void * +_eglUIntToPointer(unsigned int v) +{ + return (void *) ((uintptr_t) v); +} + + +/** + * Cast a pointer to an unsigned int. The pointer must be one that is + * returned by _eglUIntToPointer. + */ +static INLINE unsigned int +_eglPointerToUInt(const void *p) +{ + return (unsigned int) ((uintptr_t) p); +} + + #endif /* EGLDISPLAY_INCLUDED */ diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 8d885e48be6..bf84401e112 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -516,7 +516,7 @@ translate_instruction(llvm::Module *module, return; //just update the state } break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: break; case TGSI_OPCODE_REP: break; @@ -532,7 +532,7 @@ translate_instruction(llvm::Module *module, return; //just update the state } break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: break; case TGSI_OPCODE_ENDREP: break; @@ -574,7 +574,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_ENDPRIM: break; - case TGSI_OPCODE_BGNLOOP2: { + case TGSI_OPCODE_BGNLOOP: { instr->beginLoop(); storage->setCurrentBlock(instr->currentBlock()); return; @@ -587,7 +587,7 @@ translate_instruction(llvm::Module *module, return; } break; - case TGSI_OPCODE_ENDLOOP2: { + case TGSI_OPCODE_ENDLOOP: { instr->endLoop(); storage->setCurrentBlock(instr->currentBlock()); return; @@ -890,7 +890,7 @@ translate_instructionir(llvm::Module *module, case TGSI_OPCODE_IF: { } break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: break; case TGSI_OPCODE_REP: break; @@ -900,7 +900,7 @@ translate_instructionir(llvm::Module *module, case TGSI_OPCODE_ENDIF: { } break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: break; case TGSI_OPCODE_ENDREP: break; @@ -941,13 +941,13 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_ENDPRIM: break; - case TGSI_OPCODE_BGNLOOP2: { + case TGSI_OPCODE_BGNLOOP: { } break; case TGSI_OPCODE_BGNSUB: { } break; - case TGSI_OPCODE_ENDLOOP2: { + case TGSI_OPCODE_ENDLOOP: { } break; case TGSI_OPCODE_ENDSUB: { diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 5f88cc2acac..802ec371189 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -665,14 +665,14 @@ TGSI Instruction Specification TBD -1.9.8 LOOP - Loop +1.9.8 BGNFOR - Begin a For-Loop dst.x = floor(src.x) dst.y = floor(src.y) dst.z = floor(src.z) if (dst.y <= 0) - pc = [matching ENDLOOP] + 1 + pc = [matching ENDFOR] + 1 endif Note: The destination must be a loop register. @@ -694,13 +694,13 @@ TGSI Instruction Specification TBD -1.9.12 ENDLOOP - End Loop +1.9.12 ENDFOR - End a For-Loop dst.x = dst.x + dst.z dst.y = dst.y - 1.0 if (dst.y > 0) - pc = [matching LOOP instruction] + 1 + pc = [matching BGNFOR instruction] + 1 endif Note: The destination must be a loop register. @@ -856,7 +856,7 @@ TGSI Instruction Specification ---------- -1.13.1 BGNLOOP2 - Begin Loop +1.13.1 BGNLOOP - Begin a Loop TBD @@ -866,7 +866,7 @@ TGSI Instruction Specification TBD -1.13.3 ENDLOOP2 - End Loop +1.13.3 ENDLOOP - End a Loop TBD diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 4ca3a16b8ae..f36b1114a95 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -473,8 +473,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_IF: case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP2: - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); UID( inst->InstructionExtLabel.Label ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0179bba5a21..5af0a947947 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3087,9 +3087,9 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; @@ -3097,9 +3097,9 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 3a47e9b84df..cd8871e32dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,11 +106,11 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, "LOOP", TGSI_OPCODE_LOOP }, + { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR }, { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP }, { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, + { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP }, { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, @@ -130,9 +130,9 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT }, { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT }, { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM }, - { 0, 0, 0, 1, "BGNLOOP2", TGSI_OPCODE_BGNLOOP2 }, + { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP }, { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB }, - { 0, 0, 0, 1, "ENDLOOP2", TGSI_OPCODE_ENDLOOP2 }, + { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index cb62a95fc0a..4fe8553c423 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -242,8 +242,8 @@ iter_instruction( } switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_LOOP: - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_BGNFOR: + case TGSI_OPCODE_ENDFOR: if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP || inst->FullDstRegisters[0].DstRegister.Index != 0) { report_error(ctx, "Destination register must be LOOP[0]"); @@ -252,7 +252,7 @@ iter_instruction( } switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT && inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { report_error(ctx, "Source register file must be either CONST or IMM"); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 16848f7cc5e..52186770e6a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2531,7 +2531,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: return 0; break; @@ -2547,7 +2547,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: return 0; break; diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index e5003af01d8..30e6e2f6b37 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -216,23 +216,23 @@ util_fast_exp2(float x) int32_t ipart; float fpart, mpart; union fi epart; - + if(x > 129.00000f) return 3.402823466e+38f; - + if(x < -126.99999f) return 0.0f; ipart = (int32_t) x; fpart = x - (float) ipart; - + /* same as * epart.f = (float) (1 << ipart) * but faster and without integer overflow for ipart > 31 */ epart.i = (ipart + 127 ) << 23; - + mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; - + return epart.f * mpart; } @@ -409,6 +409,19 @@ float_to_ubyte(float f) } +/** + * Calc log base 2 + */ +static INLINE unsigned +util_logbase2(unsigned n) +{ + unsigned log2 = 0; + while (n >>= 1) + ++log2; + return log2; +} + + #define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 7cd5656a7e6..58a8b5d0b0f 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1834,9 +1834,9 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ENDIF: return emit_ENDIF(gen, inst); - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: return emit_BGNLOOP(gen, inst); - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: return emit_ENDLOOP(gen, inst); case TGSI_OPCODE_BRK: return emit_BRK(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 570553e1d68..6db9501128c 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1758,9 +1758,9 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; @@ -1768,9 +1768,9 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index ab6410aa607..db759639328 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -947,7 +947,7 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c, #endif break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case TGSI_OPCODE_BRK: @@ -958,11 +958,11 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c, brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: c->loop_insn--; c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); /* patch all the BREAK instructions from - last BEGINLOOP */ + last BGNFOR */ while (c->inst0 > c->loop_inst[c->loop_insn]) { c->inst0--; if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c4be604e5a0..f0ba4fb308c 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -238,11 +238,11 @@ union tgsi_immediate_data #define TGSI_OPCODE_TXL 72 #define TGSI_OPCODE_BRK 73 #define TGSI_OPCODE_IF 74 -#define TGSI_OPCODE_LOOP 75 +#define TGSI_OPCODE_BGNFOR 75 #define TGSI_OPCODE_REP 76 #define TGSI_OPCODE_ELSE 77 #define TGSI_OPCODE_ENDIF 78 -#define TGSI_OPCODE_ENDLOOP 79 +#define TGSI_OPCODE_ENDFOR 79 #define TGSI_OPCODE_ENDREP 80 #define TGSI_OPCODE_PUSHA 81 #define TGSI_OPCODE_POPA 82 @@ -262,9 +262,9 @@ union tgsi_immediate_data #define TGSI_OPCODE_CONT 96 #define TGSI_OPCODE_EMIT 97 #define TGSI_OPCODE_ENDPRIM 98 -#define TGSI_OPCODE_BGNLOOP2 99 +#define TGSI_OPCODE_BGNLOOP 99 #define TGSI_OPCODE_BGNSUB 100 -#define TGSI_OPCODE_ENDLOOP2 101 +#define TGSI_OPCODE_ENDLOOP 101 #define TGSI_OPCODE_ENDSUB 102 #define TGSI_OPCODE_NOISE1 103 #define TGSI_OPCODE_NOISE2 104 diff --git a/src/gallium/state_trackers/vega/asm_filters.h b/src/gallium/state_trackers/vega/asm_filters.h index 49807b9ab41..9a49f2e12d0 100644 --- a/src/gallium/state_trackers/vega/asm_filters.h +++ b/src/gallium/state_trackers/vega/asm_filters.h @@ -60,7 +60,7 @@ static const char convolution_asm[] = "DCL SAMP[0], CONSTANT\n" "0: MOV TEMP[0], CONST[0].xxxx\n" "1: MOV TEMP[1], CONST[0].xxxx\n" - "2: BGNLOOP2 :14\n" + "2: BGNLOOP :14\n" "3: SGE TEMP[0].z, TEMP[0].yyyy, CONST[1].xxxx\n" "4: IF TEMP[0].zzzz :7\n" "5: BRK\n" @@ -72,7 +72,7 @@ static const char convolution_asm[] = "11: MOV TEMP[3], CONST[ADDR[0]+%d]\n" "12: MAD TEMP[1], TEMP[2], TEMP[3], TEMP[1]\n" "13: ADD TEMP[0].y, TEMP[0].yyyy, CONST[0].yyyy\n" - "14: ENDLOOP2 :2\n" + "14: ENDLOOP :2\n" "15: MAD OUT[0], TEMP[1], CONST[1].yyyy, CONST[1].zzzz\n" "16: END\n"; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 19f777fe32e..85a4237d5a7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -3007,7 +3007,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) loop_depth--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index af535037d06..bd46f9acf2e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -164,47 +164,46 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); - int notexture = 0; - - if (numtmus) { - int i; - - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - - if (!t) - notexture = 1; - } - - if (r300->radeon.radeonScreen->kernel_mm && notexture) { - return; - } - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - if (t && !t->image_override) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!t) { - /* Texture unit hasn't a texture bound nothings to do */ - } else { /* override cases */ - if (t->bo) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!r300->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH(t->override_offset); - END_BATCH(); - } else { - /* Texture unit hasn't a texture bound nothings to do */ - } - } + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound. + * We assign the current color buffer as a fakery to make + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } + } + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6f3aab986d2..db404b38479 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -451,11 +451,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitState(r300); r300InitShaderFunctions(r300); - if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || - screen->chip_family == CHIP_FAMILY_RS740) { - r300->radeon.texture_row_align = 64; - } - r300InitGLExtensions(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 5bded642ef8..7558f9e225b 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -168,18 +168,21 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, } #if 1 if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - assert(rrbd != 0); - cbpitch = (rrbd->pitch / rrbd->cpp); + uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_DEPTHMACROTILE_ENABLE; + zbpitch |= R300_DEPTHMACROTILE_ENABLE; } if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - cbpitch |= R300_DEPTHMICROTILE_TILED; + zbpitch |= R300_DEPTHMICROTILE_TILED; } BEGIN_BATCH_NO_AUTOSTATE(6); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(zbpitch); + else + OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } #endif diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index b868b624960..050e8cd2a7d 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1253,6 +1253,7 @@ static GLuint translate_lod_bias(GLfloat bias) return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; } + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1345,6 +1346,28 @@ static void r300SetupTextures(GLcontext * ctx) } } + /* R3xx and R4xx chips require that the texture unit corresponding to + * KIL instructions is really enabled. + * + * We do some fakery here and in the state atom emit logic to enable + * the texture without tripping up the CS checker in the kernel. + */ + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { + last_hw_tmu++; + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */ + r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */ + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0; + } + } + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = @@ -1362,16 +1385,6 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { - // The KILL operation requires the first texture unit - // to be enabled. - r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; - r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; - r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); - } - } r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) @@ -2234,6 +2247,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _mesa_update_draw_buffer_bounds(ctx); R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, zb); } r300->radeon.NewGLState |= new_state; diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 74fec02584b..dc2fb0144a2 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -78,8 +78,7 @@ int r600_cs_write_reloc(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domain, uint32_t write_domain, - uint32_t flags, - offset_modifiers* poffset_mod) + uint32_t flags) { struct r600_cs_reloc_legacy *relocs; int i; @@ -135,10 +134,6 @@ int r600_cs_write_reloc(struct radeon_cs *cs, cs->section_ndw += 2; cs->section_cdw += 2; - relocs[i].offset_mod.shift = poffset_mod->shift; - relocs[i].offset_mod.shiftbits = poffset_mod->shiftbits; - relocs[i].offset_mod.mask = poffset_mod->mask; - return 0; } } @@ -160,9 +155,6 @@ int r600_cs_write_reloc(struct radeon_cs *cs, { return -ENOMEM; } - relocs[cs->crelocs].offset_mod.shift = poffset_mod->shift; - relocs[cs->crelocs].offset_mod.shiftbits = poffset_mod->shiftbits; - relocs[cs->crelocs].offset_mod.mask = poffset_mod->mask; relocs[cs->crelocs].indices[0] = cs->cdw - 1; relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw; @@ -255,65 +247,44 @@ static int r600_cs_process_relocs(struct radeon_cs *cs, csm = (struct r600_cs_manager_legacy*)cs->csm; relocs = (struct r600_cs_reloc_legacy *)cs->relocs; restart: - for (i = 0; i < cs->crelocs; i++) - { - for (j = 0; j < relocs[i].cindices; j++) - { + for (i = 0; i < cs->crelocs; i++) { uint32_t soffset, eoffset, asicoffset; r = radeon_bo_legacy_validate(relocs[i].base.bo, - &soffset, &eoffset); - if (r == -EAGAIN) - { - goto restart; + &soffset, &eoffset); + if (r == -EAGAIN) { + goto restart; } - if (r) - { - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - return r; + if (r) { + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + return r; } asicoffset = soffset; - if (asicoffset >= eoffset) - { - /* radeon_bo_debug(relocs[i].base.bo, 12); */ - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", - relocs[i].base.bo, - cs->packets[relocs[i].indices[j]], - eoffset); - exit(0); - return -EINVAL; - } - /* apply offset operator */ - switch (relocs[i].offset_mod.shift) - { - case NO_SHIFT: - asicoffset = asicoffset & relocs[i].offset_mod.mask; - break; - case LEFT_SHIFT: - asicoffset = (asicoffset << relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; - break; - case RIGHT_SHIFT: - asicoffset = (asicoffset >> relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; - break; - default: - break; - }; - - /* pkt3 nop header in ib chunk */ - cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; - - /* reloc index in ib chunk */ - cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; - - /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ - reloc_chunk[offset_dw] = asicoffset; - reloc_chunk[offset_dw + 3] = 0; - - offset_dw += 4; - } + + for (j = 0; j < relocs[i].cindices; j++) { + if (asicoffset >= eoffset) { + /* radeon_bo_debug(relocs[i].base.bo, 12); */ + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", + relocs[i].base.bo, + cs->packets[relocs[i].indices[j]], + eoffset); + exit(0); + return -EINVAL; + } + /* pkt3 nop header in ib chunk */ + cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; + /* reloc index in ib chunk */ + cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; + } + + /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ + reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw + 3] = 0; + + offset_dw += 4; } *length_dw_reloc_chunk = offset_dw; @@ -351,10 +322,7 @@ static int r600_cs_emit(struct radeon_cs *cs) struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; struct drm_radeon_cs cs_cmd; struct drm_radeon_cs_chunk cs_chunk[2]; - drm_radeon_cmd_buffer_t cmd; - /* drm_r300_cmd_header_t age; */ uint32_t length_dw_reloc_chunk; - uint64_t ull; uint64_t chunk_ptrs[2]; uint32_t reloc_chunk[128]; int r; @@ -363,43 +331,13 @@ static int r600_cs_emit(struct radeon_cs *cs) /* TODO : put chip level things here if need. */ /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */ - BATCH_LOCALS(csm->ctx); - drm_radeon_getparam_t gp; - uint32_t current_scratchx_age; - - gp.param = RADEON_PARAM_LAST_CLEAR; - gp.value = (int *)¤t_scratchx_age; - r = drmCommandWriteRead(cs->csm->fd, - DRM_RADEON_GETPARAM, - &gp, - sizeof(gp)); - if (r) - { - fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); - exit(1); - } - - csm->pending_age = 0; csm->pending_count = 1; - current_scratchx_age++; - csm->pending_age = current_scratchx_age; - - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH((SCRATCH_REG2 - R600_SET_CONFIG_REG_OFFSET) >> 2); - R600_OUT_BATCH(current_scratchx_age); - END_BATCH(); - COMMIT_BATCH(); - - //TODO ioctl to get back cs id assigned in drm - //csm->pending_age = cs_id_back; - r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk); if (r) { return 0; } - + /* raw ib chunk */ cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB; cs_chunk[0].length_dw = cs->cdw; @@ -429,6 +367,8 @@ static int r600_cs_emit(struct radeon_cs *cs) return r; } + csm->pending_age = cs_cmd.cs_id; + r600_cs_set_age(cs); cs->csm->read_used = 0; @@ -514,8 +454,12 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ { radeonContextPtr rmesa = &r600->radeon; - GLuint size; + rmesa->hw.max_state_size = 4000; /* rough estimate */ + + rmesa->hw.all_dirty = GL_TRUE; + rmesa->hw.is_dirty = GL_TRUE; + /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, "command_buffer_size"); diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index bd1ed7fdff0..5df0cf1ab61 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -132,15 +132,13 @@ struct r600_cs_reloc_legacy { uint32_t cindices; uint32_t *indices; uint32_t *reloc_indices; - struct offset_modifiers offset_mod; }; extern int r600_cs_write_reloc(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domain, uint32_t write_domain, - uint32_t flags, - offset_modifiers* poffset_mod); + uint32_t flags); static inline void r600_cs_write_dword(struct radeon_cs *cs, uint32_t dword) { @@ -171,7 +169,7 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont /** * Write a relocated dword to the command buffer. */ -#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags, offset_mod) \ +#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \ do { \ if (0 && offset) { \ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ @@ -179,7 +177,7 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont } \ r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ r600_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ - bo, rd, wd, flags, offset_mod); \ + bo, rd, wd, flags); \ } while(0) /* R600/R700 */ diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index bcb33e1386f..fbb8164af59 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -128,13 +128,6 @@ enum RIGHT_SHIFT = 2, }; -typedef struct offset_modifiers -{ - GLuint shift; - GLuint shiftbits; - GLuint mask; -} offset_modifiers; - /** * \brief R600 context structure. */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index ebd5ff106be..0abf112b55f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1205,7 +1205,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized) { PVSSRC * texture_coordinate_source; PVSSRC * texture_unit_source; @@ -1227,10 +1227,18 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; - tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + if (normalized) { + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + } else { + /* XXX: UNNORMALIZED tex coords have limited wrap modes */ + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED; + } tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; @@ -2196,11 +2204,19 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) if( GL_TRUE == IsTex(pILInst->Opcode) ) { - if( GL_FALSE == assemble_tex_instruction(pAsm) ) - { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); - return GL_FALSE; - } + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } else { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } } else { //ALU diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index e9b21b802ec..f9c4d849c65 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -424,7 +424,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, GLboolean assemble_dst(r700_AssemblerBase *pAsm); GLboolean tex_dst(r700_AssemblerBase *pAsm); GLboolean tex_src(r700_AssemblerBase *pAsm); -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm); +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized); void initialize(r700_AssemblerBase *pAsm); GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index c083862f369..78779e841d7 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -38,6 +38,8 @@ #include "r700_vertprog.h" #include "r700_ioctl.h" +#include "radeon_mipmap_tree.h" + #define LINK_STATES(reg) \ do \ { \ @@ -241,6 +243,67 @@ GLboolean r700InitChipObject(context_t *context) return GL_TRUE; } +GLboolean r700SendTextureState(context_t *context) +{ + unsigned int i; + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct radeon_bo *bo = NULL; + BATCH_LOCALS(&context->radeon); + + for (i=0; itextures[i]; + if (t) { + if (!t->image_override) + bo = t->mt->bo; + else + bo = t->bo; + if (bo) { + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(i * 7); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, + bo, + r700->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + + COMMIT_BATCH(); + } + } + } + return GL_TRUE; +} + void r700SetupVTXConstants(GLcontext * ctx, unsigned int nStreamID, void * pAos, @@ -249,10 +312,7 @@ void r700SetupVTXConstants(GLcontext * ctx, unsigned int count) /* number of vectors in stream */ { context_t *context = R700_CONTEXT(ctx); - uint32_t *dest; struct radeon_aos * paos = (struct radeon_aos *)pAos; - offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF}; - BATCH_LOCALS(&context->radeon); unsigned int uSQ_VTX_CONSTANT_WORD0_0; @@ -261,6 +321,9 @@ void r700SetupVTXConstants(GLcontext * ctx, unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + if (!paos->bo) + return GL_FALSE; + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || @@ -294,7 +357,7 @@ void r700SetupVTXConstants(GLcontext * ctx, R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, paos->bo, uSQ_VTX_CONSTANT_WORD0_0, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); @@ -310,7 +373,6 @@ void r700SetupVTXConstants(GLcontext * ctx, int r700SetupStreams(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); struct r700_vertex_program *vpc @@ -324,7 +386,7 @@ int r700SetupStreams(GLcontext * ctx) BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); R600_OUT_BATCH(0); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); @@ -417,11 +479,10 @@ GLboolean r700SendContextStates(context_t *context) return GL_TRUE; } -GLboolean r700SendDepthTargetState(context_t *context, int id) +GLboolean r700SendDepthTargetState(context_t *context) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); rrb = radeon_get_depthbuffer(&context->radeon); @@ -430,10 +491,6 @@ GLboolean r700SendDepthTargetState(context_t *context, int id) return GL_FALSE; } - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; - BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); @@ -442,7 +499,7 @@ GLboolean r700SendDepthTargetState(context_t *context, int id) R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, - 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); + 0, RADEON_GEM_DOMAIN_VRAM, 0); R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH(r700->DB_HTILE_DATA_BASE.u32All); END_BATCH(); @@ -479,7 +536,6 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); rrb = radeon_get_colorbuffer(&context->radeon); @@ -494,16 +550,12 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) if (!r700->render_target[id].enabled) return GL_FALSE; - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; - BEGIN_BATCH_NO_AUTOSTATE(3); R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, rrb->bo, r700->render_target[id].CB_COLOR0_BASE.u32All, - 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); + 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && @@ -540,16 +592,13 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) GLboolean r700SendPSState(context_t *context) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return GL_FALSE; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); @@ -558,7 +607,7 @@ GLboolean r700SendPSState(context_t *context) R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, pbo, r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(9); @@ -575,16 +624,13 @@ GLboolean r700SendPSState(context_t *context) GLboolean r700SendVSState(context_t *context) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return GL_FALSE; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); @@ -593,7 +639,7 @@ GLboolean r700SendVSState(context_t *context) R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, pbo, r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); @@ -609,9 +655,7 @@ GLboolean r700SendVSState(context_t *context) GLboolean r700SendFSState(context_t *context) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); /* XXX fixme @@ -625,9 +669,8 @@ GLboolean r700SendFSState(context_t *context) r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0; /* XXX */ - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return GL_FALSE; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); @@ -636,7 +679,7 @@ GLboolean r700SendFSState(context_t *context) R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All, pbo, r700->fs.SQ_PGM_START_FS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); @@ -652,8 +695,6 @@ GLboolean r700SendFSState(context_t *context) GLboolean r700SendViewportState(context_t *context, int id) { R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); if (id > R700_MAX_VIEWPORTS) diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 180d980442b..f382686be4b 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -262,9 +262,8 @@ void * r700GetActiveFpShaderBo(GLcontext * ctx) GLboolean r700SetupFragmentProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - + BATCH_LOCALS(&context->radeon); struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); r700_AssemblerBase *pAsm = &(fp->r700AsmCode); @@ -339,7 +338,6 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } /* sent out shader constants. */ - paramList = fp->mesa_program.Base.Parameters; if(NULL != paramList) @@ -349,7 +347,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters * 4; BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData); - + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); /* assembler map const from very beginning. */ @@ -424,5 +422,3 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) return GL_TRUE; } - - diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c index c4795320012..23cc128d6db 100644 --- a/src/mesa/drivers/dri/r600/r700_ioctl.c +++ b/src/mesa/drivers/dri/r600/r700_ioctl.c @@ -43,7 +43,6 @@ static void r700Flush(GLcontext *ctx) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); - context_t * context = R700_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); @@ -59,7 +58,7 @@ static void r700Flush(GLcontext *ctx) if (radeon->dma.flush) radeon->dma.flush( ctx ); - r700SendContextStates(context); + r700EmitState(ctx); if (radeon->cmdbuf.cs->cdw) rcommonFlushCmdBuf(radeon, __FUNCTION__); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1810f4be0ee..20376d2c36d 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -44,7 +44,6 @@ #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" -#include "radeon_mipmap_tree.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -58,7 +57,7 @@ void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); void r700Start3D(context_t *context); GLboolean r700SendTextureState(context_t *context); -unsigned int r700PrimitiveType(int prim); +static unsigned int r700PrimitiveType(int prim); void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, @@ -138,68 +137,15 @@ static GLboolean r700SetupShaders(GLcontext * ctx) exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; - return GL_TRUE; -} + r600UpdateTextureState(ctx); -GLboolean r700SendTextureState(context_t *context) -{ - unsigned int i; - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF}; - struct radeon_bo *bo = NULL; - BATCH_LOCALS(&context->radeon); + r700SendFSState(context); // FIXME just a place holder for now + r700SendPSState(context); + r700SendVSState(context); + + r700SendTextureState(context); + r700SetupStreams(ctx); - for (i=0; itextures[i]; - if (t) { - if (!t->image_override) - bo = t->mt->bo; - else - bo = t->bo; - if (bo) { - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - r700->textures[i]->SQ_TEX_RESOURCE3, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - END_BATCH(); - - BEGIN_BATCH_NO_AUTOSTATE(5); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); - END_BATCH(); - - BEGIN_BATCH_NO_AUTOSTATE(2 + 4); - R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); - END_BATCH(); - - COMMIT_BATCH(); - } - } - } return GL_TRUE; } @@ -211,17 +157,15 @@ GLboolean r700SyncSurf(context_t *context, { BATCH_LOCALS(&context->radeon); uint32_t cp_coher_size; - offset_modifiers offset_mod; + + if (!pbo) + return GL_FALSE; if (pbo->size == 0xffffffff) cp_coher_size = 0xffffffff; else cp_coher_size = ((pbo->size + 255) >> 8); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; - BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); R600_OUT_BATCH(sync_type); @@ -229,7 +173,7 @@ GLboolean r700SyncSurf(context_t *context, R600_OUT_BATCH_RELOC(0, pbo, 0, - read_domain, write_domain, 0, &offset_mod); // ??? + read_domain, write_domain, 0); // ??? R600_OUT_BATCH(10); END_BATCH(); @@ -238,7 +182,7 @@ GLboolean r700SyncSurf(context_t *context, return GL_TRUE; } -unsigned int r700PrimitiveType(int prim) +static unsigned int r700PrimitiveType(int prim) { switch (prim & PRIM_MODE_MASK) { @@ -279,112 +223,112 @@ unsigned int r700PrimitiveType(int prim) } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - int lastIndex = 0; - BATCH_LOCALS(&context->radeon); + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices = end - start; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; - unsigned int i, j; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - r700Start3D(context); /* TODO : this is too much. */ - - r700SendSQConfig(context); - - r700UpdateShaders(ctx); - - r700SetScissor(context); - r700SetRenderTarget(context, 0); - r700SetDepthTarget(context); - - if(r700SetupStreams(ctx)) - { - return GL_TRUE; - } - - r600UpdateTextureState(ctx); - r700SendTextureState(context); - - r700SetupShaders(ctx); - - r700SendFSState(context); // FIXME just a place holder for now - r700SendPSState(context); - r700SendVSState(context); + type = r700PrimitiveType(prim); - r700SendUCPState(context); - r700SendContextStates(context); - r700SendViewportState(context, 0); - r700SendRenderTargetState(context, 0); - r700SendDepthTargetState(context); + if (type < 0 || num_indices <= 0) + return; - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) - { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - GLuint numIndices = vb->Primitive[i].count; - GLuint numEntires; - - unsigned int VGT_DRAW_INITIATOR = 0; - unsigned int VGT_INDEX_TYPE = 0; - unsigned int VGT_PRIMITIVE_TYPE = 0; - unsigned int VGT_NUM_INDICES = 0; - - if (numIndices < 1) - continue; - - numEntires = 3 /* VGT_PRIMITIVE_TYPE */ + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ - + numIndices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(numEntires); + + num_indices + 3; /* DRAW_INDEX_IMMD */ + BEGIN_BATCH_NO_AUTOSTATE(total_emit); // prim - VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift; + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(VGT_PRIMITIVE_TYPE); + R600_OUT_BATCH(vgt_primitive_type); // index type - VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift; + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(VGT_INDEX_TYPE); + R600_OUT_BATCH(vgt_index_type); // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); // draw packet - VGT_NUM_INDICES = numIndices; - VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift; - VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift; + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1))); - R600_OUT_BATCH(VGT_NUM_INDICES); - R600_OUT_BATCH(VGT_DRAW_INITIATOR); + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); - for (j = lastIndex; j < lastIndex + numIndices; j++) - { - R600_OUT_BATCH(j); + for (i = start; i < end; i++) { + R600_OUT_BATCH(i); } - lastIndex += numIndices; - END_BATCH(); COMMIT_BATCH(); + +} + +void r700EmitState(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + + if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) + return; + + rcommonEnsureCmdBufSpace(&context->radeon, + context->radeon.hw.max_state_size, __FUNCTION__); + + r700Start3D(context); + r700SendSQConfig(context); + + r700SendUCPState(context); + r700SendContextStates(context); + r700SendViewportState(context, 0); + r700SendRenderTargetState(context, 0); + r700SendDepthTargetState(context); + +} + +static GLboolean r700RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + context_t *context = R700_CONTEXT(ctx); + unsigned int i; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + r700UpdateShaders(ctx); + r700SetScissor(context); + r700SetupShaders(ctx); + + r700SetRenderTarget(context, 0); + r700SetDepthTarget(context); + + r700EmitState(ctx); + + /* richard test code */ + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r700RunRenderPrimitive(ctx, start, end, prim); } /* Flush render op cached for last several quads. */ r700WaitForIdleClean(context); - radeonReleaseArrays(ctx, 0); - - rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ ); + radeonReleaseArrays(ctx, ~0); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index e0a57425917..e95f52400a2 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -600,14 +600,46 @@ static void r700BlendFuncSeparate(GLcontext * ctx, /** * Translate LogicOp enums into hardware representation. - * Both use a very logical bit-wise layout, but unfortunately the order - * of bits is reversed. */ static GLuint translate_logicop(GLenum logicop) { - GLuint bits = logicop - GL_CLEAR; - bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); - return bits; + switch (logicop) { + case GL_CLEAR: + return 0x00; + case GL_SET: + return 0xff; + case GL_COPY: + return 0xcc; + case GL_COPY_INVERTED: + return 0x33; + case GL_NOOP: + return 0xaa; + case GL_INVERT: + return 0x55; + case GL_AND: + return 0x88; + case GL_NAND: + return 0x77; + case GL_OR: + return 0xee; + case GL_NOR: + return 0x11; + case GL_XOR: + return 0x66; + case GL_EQUIV: + return 0xaa; + case GL_AND_REVERSE: + return 0x44; + case GL_AND_INVERTED: + return 0x22; + case GL_OR_REVERSE: + return 0xdd; + case GL_OR_INVERTED: + return 0xbb; + default: + fprintf(stderr, "unknown blend logic operation %x\n", logicop); + return 0xcc; + } } /** @@ -1822,6 +1854,8 @@ void r700InitState(GLcontext * ctx) //------------------- /* Set up color compare mask */ r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; + context->radeon.hw.all_dirty = GL_TRUE; + } void r700InitStateFuncs(struct dd_function_table *functions) //----------------- diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index af6a6b8c295..31e71cdfa30 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -335,17 +335,13 @@ void * r700GetActiveVpShaderBo(GLcontext * ctx) GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - - BATCH_LOCALS(&context->radeon); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - + BATCH_LOCALS(&context->radeon); struct r700_vertex_program *vp = (struct r700_vertex_program *)ctx->VertexProgram._Current; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; - unsigned int ui; if(GL_FALSE == vp->loaded) @@ -394,7 +390,6 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) */ /* sent out shader constants. */ - paramList = vp->mesa_program.Base.Parameters; if(NULL != paramList) @@ -403,8 +398,8 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters * 4; - BEGIN_BATCH_NO_AUTOSTATE(unNumParamData + 2); - + BEGIN_BATCH_NO_AUTOSTATE(unNumParamData + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); /* assembler map const from very beginning. */ R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index dde615a4d9d..7f503a9ff71 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -887,10 +887,11 @@ void radeonUpdatePageFlipping(radeonContextPtr radeon) void radeon_window_moved(radeonContextPtr radeon) { + /* Cliprects has to be updated before doing anything else */ + radeonSetCliprects(radeon); if (!radeon->radeonScreen->driScreen->dri2.enabled) { radeonUpdatePageFlipping(radeon); } - radeonSetCliprects(radeon); } void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 4e4eba5d94c..2a017b59cfc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -245,9 +245,20 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_row_align = 256; radeon->texture_rect_row_align = 256; radeon->texture_compressed_row_align = 256; - } else { + } else if (IS_R200_CLASS(radeon->radeonScreen) || + IS_R100_CLASS(radeon->radeonScreen)) { radeon->texture_row_align = 32; radeon->texture_rect_row_align = 64; + radeon->texture_compressed_row_align = 32; + } else { /* R300 - not sure this is all correct */ + int chip_family = radeon->radeonScreen->chip_family; + if (chip_family == CHIP_FAMILY_RS600 || + chip_family == CHIP_FAMILY_RS690 || + chip_family == CHIP_FAMILY_RS740) + radeon->texture_row_align = 64; + else + radeon->texture_row_align = 32; + radeon->texture_rect_row_align = 64; radeon->texture_compressed_row_align = 64; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index cd1986e1fc3..d7e94a68949 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -445,7 +445,6 @@ struct radeon_context { GLuint numClipRects; /* Cliprects for the draw buffer */ drm_clip_rect_t *pClipRects; unsigned int lastStamp; - GLboolean lost_context; drm_radeon_sarea_t *sarea; /* Private SAREA data */ /* Mirrors of some DRI state */ diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 5774f7ebcf7..2f0ed1cfced 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -85,8 +85,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) } rmesa->vtbl.get_lock(rmesa); - - rmesa->lost_context = GL_TRUE; } void radeon_lock_hardware(radeonContextPtr radeon) diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ad501c454ce..6a065f04688 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -951,7 +951,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t fprintf(stderr, " Allocate new miptree\n"); radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel); if (!t->mt) { - _mesa_problem(ctx, "r300_validate_texture failed to alloc miptree"); + _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree"); return GL_FALSE; } } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 8b5094a04f3..b39403129d0 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -60,6 +60,7 @@ #include "pipe/p_inlines.h" #include "util/u_tile.h" #include "util/u_draw_quad.h" +#include "util/u_math.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" @@ -341,6 +342,7 @@ make_texture(struct st_context *st, enum pipe_format pipeFormat; GLuint cpp; GLenum baseFormat; + int ptw, pth; baseFormat = _mesa_base_format(format); @@ -355,7 +357,28 @@ make_texture(struct st_context *st, if (!pixels) return NULL; - pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, width, height, 1, + /* Need to use POT texture? */ + ptw = width; + pth = height; + if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { + int l2pt, maxSize; + + l2pt = util_logbase2(width); + if (1<screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(ptw <= maxSize); + assert(pth <= maxSize); + } + + pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, ptw, pth, 1, PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) { _mesa_unmap_drawpix_pbo(ctx, unpack); @@ -420,7 +443,7 @@ make_texture(struct st_context *st, static void draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, GLfloat x1, GLfloat y1, const GLfloat *color, - GLboolean invertTex) + GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) { struct st_context *st = ctx->st; struct pipe_context *pipe = ctx->st->pipe; @@ -435,8 +458,9 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; - const GLfloat sLeft = 0.0f, sRight = 1.0f; - const GLfloat tTop = invertTex, tBot = 1.0f - tTop; + const GLfloat sLeft = 0.0f, sRight = maxXcoord; + const GLfloat tTop = invertTex ? maxYcoord : 0.0f; + const GLfloat tBot = invertTex ? 0.0f : maxYcoord; GLuint tex, i; /* upper-left */ @@ -608,7 +632,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, y0 = (GLfloat) y; y1 = y + height * ctx->Pixel.ZoomY; - draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex); + draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, + (GLfloat) width / pt->width[0], + (GLfloat) height / pt->height[0]); /* restore state */ cso_restore_rasterizer(cso); @@ -648,7 +674,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, usage = PIPE_TRANSFER_READ_WRITE; else usage = PIPE_TRANSFER_WRITE; - + pt = st_cond_flush_get_tex_transfer(st_context(ctx), strb->texture, 0, 0, 0, usage, x, y, width, height); @@ -841,7 +867,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, usage = PIPE_TRANSFER_READ_WRITE; else usage = PIPE_TRANSFER_WRITE; - + ptDraw = st_cond_flush_get_tex_transfer(st_context(ctx), rbDraw->texture, 0, 0, 0, usage, dstx, dsty, @@ -849,7 +875,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, assert(ptDraw->block.width == 1); assert(ptDraw->block.height == 1); - + /* map the stencil buffer */ drawMap = screen->transfer_map(screen, ptDraw); @@ -923,6 +949,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, struct pipe_texture *pt; GLfloat *color; enum pipe_format srcFormat, texFormat; + int ptw, pth; pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -1004,13 +1031,34 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, height -= -srcy; srcy = 0; } - + if (height < 0) return; } + /* Need to use POT texture? */ + ptw = width; + pth = height; + if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { + int l2pt, maxSize; + + l2pt = util_logbase2(width); + if (1<screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(ptw <= maxSize); + assert(pth <= maxSize); + } + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, texFormat, 0, - width, height, 1, + ptw, pth, 1, PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) return; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index ee71c012c64..e8d7f70ad65 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -59,6 +59,7 @@ #include "util/u_tile.h" #include "util/u_blit.h" #include "util/u_surface.h" +#include "util/u_math.h" #define DBG if (0) printf @@ -237,16 +238,6 @@ do_memcpy(void *dest, const void *src, size_t n) } -static INLINE unsigned -logbase2(unsigned n) -{ - unsigned log2 = 0; - while (n >>= 1) - ++log2; - return log2; -} - - /** * Return default texture usage bitmask for the given texture format. */ @@ -340,9 +331,9 @@ guess_and_alloc_texture(struct st_context *st, lastLevel = firstLevel; } else { - GLuint l2width = logbase2(width); - GLuint l2height = logbase2(height); - GLuint l2depth = logbase2(depth); + GLuint l2width = util_logbase2(width); + GLuint l2height = util_logbase2(height); + GLuint l2depth = util_logbase2(depth); lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth); } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index c8fb39d558f..dd7e40be088 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -368,7 +368,7 @@ compile_instruction( fullinst->Instruction.Opcode = TGSI_OPCODE_ADD; break; case OPCODE_BGNLOOP: - fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP2; + fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP; fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; break; case OPCODE_BGNSUB: @@ -426,7 +426,7 @@ compile_instruction( fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF; break; case OPCODE_ENDLOOP: - fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP2; + fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP; fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; break; case OPCODE_ENDSUB: