support branch and loop in pixel shader
authorZou Nan hai <nanhai.zou@intel.com>
Thu, 21 Jun 2007 02:22:28 +0000 (10:22 +0800)
committerZou Nan hai <nanhai.zou@intel.com>
Thu, 21 Jun 2007 02:22:28 +0000 (10:22 +0800)
  most of the sample working with some small modification

17 files changed:
progs/glsl/CH06-brick.frag.txt
progs/glsl/CH11-bumpmap.frag.txt
progs/glsl/CH11-bumpmap.vert.txt
progs/glsl/CH11-toyball.frag.txt
progs/glsl/CH11-toyball.vert.txt
src/mesa/drivers/dri/i965/Makefile
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_vs.h
src/mesa/drivers/dri/i965/brw_vs_emit.c
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm.h
src/mesa/drivers/dri/i965/brw_wm_emit.c
src/mesa/drivers/dri/i965/brw_wm_fp.c
src/mesa/drivers/dri/i965/brw_wm_glsl.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_wm_pass2.c
src/mesa/drivers/dri/i965/brw_wm_state.c

index 06ef04e3afb0e317f4f2eb7d0f66b2c5aa110e09..388c5f9e660adaef0ec269c82c999cec162adef3 100644 (file)
@@ -23,7 +23,11 @@ void main()
     
     position = MCposition / BrickSize;
 
-    if (fract(position.y * 0.5) > 0.5)
+//    if (fract(position.y * 0.5) > 0.5)
+//        position.x += 0.5;
+    float tmp;
+    tmp  = fract(position.y * 0.5);
+    if (tmp > 0.5)
         position.x += 0.5;
 
     position = fract(position);
index 063576f5a3c29fa9ff95a27f5f89b0a3e09d16ca..1467f767ec4ce6328b17cf690cfd0eaf21d8eb9f 100644 (file)
@@ -33,7 +33,7 @@ void main()
     litColor = SurfaceColor * max(dot(normDelta, LightDir), 0.0);
     vec3 reflectDir = reflect(LightDir, normDelta);
     
-    float spec = max(dot(EyeDir, reflectDir), 0.0);
+    float spec = max(dot(normalize(EyeDir), reflectDir), 0.0);
     spec *= SpecularFactor;
     litColor = min(litColor + spec, vec3(1.0));
 
index d3d19f62ac36be3cf77fb4e850545389a5aa4731..55cf43ba7dd749861db145b66715571b00533fbf 100644 (file)
@@ -31,8 +31,9 @@ void main()
     v.z = dot(LightPosition, n);
     LightDir = normalize(v);
 
-    v.x = dot(EyeDir, t);
-    v.y = dot(EyeDir, b);
-    v.z = dot(EyeDir, n);
-    EyeDir = normalize(v);
+/*  v.x = dot(EyeDir, t);
+  v.y = dot(EyeDir, b);
+  v.z = dot(EyeDir, n);
+    EyeDir = normalize(EyeDir);
+*/
 }
index 90ec1c27fc1824b8ad310df232da1e0eeccd44ae..f3cac62fb3e210877fd7d7edad7a595b7c716d86 100644 (file)
@@ -49,14 +49,15 @@ void main()
     inorout += dot(distance, vec4(1.0));
 
     distance.x = dot(p, HalfSpace4);
-    distance.y = StripeWidth - abs(p.z);
+//    distance.y = StripeWidth - abs(p.z);
+    distance.y = StripeWidth - abs(p.y);
     distance = smoothstep(-FWidth, FWidth, distance);
     inorout += distance.x;
 
     inorout = clamp(inorout, 0.0, 1.0);
 
-    surfColor = mix(Yellow, Red, inorout);
-    surfColor = mix(surfColor, Blue, distance.y);
+    surfColor = mix(Yellow, Blue, distance.y);
+    surfColor = mix(surfColor, Red, inorout);
 
     // normal = point on surface for sphere at (0,0,0)
     normal = p;
index b7da3ac839ee8d3442abe2e6e2de1bf77eb12a75..a3ee1b03776e375e63551cf5e21b4bc45327f5fe 100644 (file)
@@ -14,10 +14,11 @@ uniform vec4 BallCenter;   // ball center in modelling coordinates
 
 void main()
 { 
-//orig:    ECposition   = gl_ModelViewMatrix * gl_Vertex;
+    ECposition   = gl_ModelViewMatrix * gl_Vertex;
 
-    ECposition = gl_TextureMatrix[0] * gl_Vertex;
-    ECposition = gl_ModelViewMatrix * ECposition;
+//  ECposition = gl_TextureMatrix[0] * gl_Vertex;
+//  ECposition = gl_MultiTexCoord0 * gl_Vertex;
+//    ECposition = gl_ModelViewMatrix * ECposition;
 
     ECballCenter = gl_ModelViewMatrix * BallCenter;
     gl_Position  = ftransform();
index 9e4ff112dc37e13e62e35360e1ce4015b053af90..66de6f583c33bf2014fcd3aeb6381d00afdc00e3 100644 (file)
@@ -70,6 +70,7 @@ DRIVER_SOURCES = \
        brw_wm_emit.c \
        brw_wm_fp.c \
        brw_wm_iz.c \
+       brw_wm_glsl.c \
        brw_wm_pass0.c \
        brw_wm_pass1.c \
        brw_wm_pass2.c \
index 144f209edabdd7792d7f45aa66e4c91a04fecf4e..9b6581fd9328593cbef5dc91776f52285b009de7 100644 (file)
@@ -668,7 +668,10 @@ static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offse
    return ptr;
 }
 
-
+static __inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+       return &p->store[p->nr_insn];
+}
 
 void brw_pop_insn_state( struct brw_compile *p );
 void brw_push_insn_state( struct brw_compile *p );
@@ -808,9 +811,10 @@ void brw_ENDIF(struct brw_compile *p,
 struct brw_instruction *brw_DO(struct brw_compile *p,
                               GLuint execute_size);
 
-void brw_WHILE(struct brw_compile *p, 
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
               struct brw_instruction *patch_insn);
 
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, 
index 0c04bf6c62433e2c2a299959108db76483fdf6f3..7b95fbdac03e2fea0c7e8f960effad7516be1363 100644 (file)
@@ -186,7 +186,7 @@ void brw_set_src1( struct brw_instruction *insn,
        * in the future:
        */
       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
         insn->bits3.da1.src1_subreg_nr = reg.subnr;
@@ -597,6 +597,20 @@ void brw_ENDIF(struct brw_compile *p,
    }
 }
 
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_BREAK);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
 /* DO/WHILE loop:
  */
 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
@@ -608,13 +622,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
       /* Override the defaults for this instruction:
        */
-      brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_dest(insn, brw_null_reg());
+      brw_set_src0(insn, brw_null_reg());
+      brw_set_src1(insn, brw_null_reg());
 
       insn->header.compression_control = BRW_COMPRESSION_NONE;
       insn->header.execution_size = execute_size;
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
       /* insn->header.mask_control = BRW_MASK_ENABLE; */
+      insn->header.mask_control = BRW_MASK_DISABLE;
 
       return insn;
    }
@@ -622,7 +638,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
 
 
-void brw_WHILE(struct brw_compile *p, 
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
               struct brw_instruction *do_insn)
 {
    struct brw_instruction *insn;
@@ -653,7 +669,9 @@ void brw_WHILE(struct brw_compile *p,
 
 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
 
+   insn->header.mask_control = BRW_MASK_DISABLE;
    p->current->header.predicate_control = BRW_PREDICATE_NONE;   
+   return insn;
 }
 
 
index 912ab563f4dc3c2aafe3ff4c05a153aedd22123c..36636b5ffd6a9f457d35cfc3253fb4ca034d463b 100644 (file)
@@ -69,6 +69,11 @@ struct brw_vs_compile {
    struct brw_reg tmp;
    struct brw_reg stack;
 
+   struct {    
+       GLboolean used_in_src;
+       struct brw_reg reg;
+   } output_regs[128];
+
    struct brw_reg userplane[6];
 
 };
index ffc1a0ea09f177cc03f4167803d85302f18a391a..fa94d5b1a87cd59b57f0adfbe27b69e6578280b9 100644 (file)
@@ -135,6 +135,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
       reg++;
    }
 
+   for (i = 0; i < 128; i++) {
+       if (c->output_regs[i].used_in_src) {
+            c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+            reg++;
+        }
+   }
+
    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
    reg += 2;
  
@@ -686,28 +693,28 @@ static void emit_arl( struct brw_vs_compile *c,
  * account.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
-                              struct prog_src_register src )
+                              struct prog_src_register *src )
 {
    struct brw_reg reg;
 
-   if (src.File == PROGRAM_UNDEFINED)
+   if (src->File == PROGRAM_UNDEFINED)
       return brw_null_reg();
 
-   if (src.RelAddr) 
-      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+   if (src->RelAddr) 
+      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
    else
-      reg = get_reg(c, src.File, src.Index);
+      reg = get_reg(c, src->File, src->Index);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0),
-                                      GET_SWZ(src.Swizzle, 1),
-                                      GET_SWZ(src.Swizzle, 2),
-                                      GET_SWZ(src.Swizzle, 3));
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+                                      GET_SWZ(src->Swizzle, 1),
+                                      GET_SWZ(src->Swizzle, 2),
+                                      GET_SWZ(src->Swizzle, 3));
 
    /* Note this is ok for non-swizzle instructions: 
     */
-   reg.negate = src.NegateBase ? 1 : 0;   
+   reg.negate = src->NegateBase ? 1 : 0;   
 
    return reg;
 }
@@ -921,10 +928,8 @@ post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
        inst1 = &c->vp->program.Base.Instructions[insn];
        brw_inst1 = inst1->Data;
        switch (inst1->Opcode) {
-          case OPCODE_BRA:
-          case OPCODE_BRK:
           case OPCODE_CAL:
-          case OPCODE_ENDLOOP:
+          case OPCODE_BRA:
               target_insn = inst1->BranchTarget;
               inst2 = &c->vp->program.Base.Instructions[target_insn];
               brw_inst2 = inst2->Data;
@@ -945,12 +950,12 @@ post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
  */
 void brw_vs_emit(struct brw_vs_compile *c )
 {
-#define MAX_IF_DEPTH 32
+#define MAX_IFSN 32
    struct brw_compile *p = &c->func;
    GLuint nr_insns = c->vp->program.Base.NumInstructions;
    GLuint insn, if_insn = 0;
    struct brw_instruction *end_inst;
-   struct brw_instruction *if_inst[MAX_IF_DEPTH];
+   struct brw_instruction *if_inst[MAX_IFSN];
    struct brw_indirect stack_index = brw_indirect(0, 0);   
 
    if (INTEL_DEBUG & DEBUG_VS) {
@@ -962,6 +967,20 @@ void brw_vs_emit(struct brw_vs_compile *c )
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
+   /* Message registers can't be read, so copy the output into GRF register
+      if they are used in source registers */
+   for (insn = 0; insn < nr_insns; insn++) {
+       GLuint i;
+       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+       for (i = 0; i < 3; i++) {
+          struct prog_src_register *src = &inst->SrcReg[i];
+          GLuint index = src->Index;
+          GLuint file = src->File;     
+          if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
+              c->output_regs[index].used_in_src = GL_TRUE;
+       }
+   }
+
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
@@ -977,8 +996,15 @@ void brw_vs_emit(struct brw_vs_compile *c )
        */
       inst->Data = &p->store[p->nr_insn];
       if (inst->Opcode != OPCODE_SWZ)
-        for (i = 0; i < 3; i++) 
-           args[i] = get_arg(c, inst->SrcReg[i]);
+         for (i = 0; i < 3; i++) {
+             struct prog_src_register *src = &inst->SrcReg[i];
+             GLuint index = src->Index;
+             GLuint file = src->File;  
+             if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src)
+                 args[i] = c->output_regs[index].reg;
+             else
+                 args[i] = get_arg(c, src);
+         }
 
       /* Get dest regs.  Note that it is possible for a reg to be both
        * dst and arg, given the static allocation of registers.  So
@@ -1085,13 +1111,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
       case OPCODE_XPD:
         emit_xpd(p, dst, args[0], args[1]);
         break;
-
-      case OPCODE_INT:
-        /* XXX TODO track type information in shader program */
-        brw_MOV(p, dst, args[0]);
-        break;
       case OPCODE_IF:
-        assert(if_insn < MAX_IF_DEPTH);
+        assert(if_insn < MAX_IFSN);
          if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
         break;
       case OPCODE_ELSE:
@@ -1101,6 +1122,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
          assert(if_insn > 0);
         brw_ENDIF(p, if_inst[--if_insn]);
         break;                 
+      case OPCODE_BRA:
+         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+         brw_set_predicate_control_flag_value(p, 0xff);
+        break;
       case OPCODE_CAL:
         brw_set_access_mode(p, BRW_ALIGN_1);
         brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
@@ -1116,13 +1142,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
         brw_set_access_mode(p, BRW_ALIGN_1);
          brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
         brw_set_access_mode(p, BRW_ALIGN_16);
-      case OPCODE_ENDLOOP:
-      case OPCODE_BRK:
-      case OPCODE_BRA:
       case OPCODE_END: 
          brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
         break;
-      case OPCODE_BGNLOOP:
       case OPCODE_PRINT:
       case OPCODE_BGNSUB:
       case OPCODE_ENDSUB:
@@ -1131,8 +1153,12 @@ void brw_vs_emit(struct brw_vs_compile *c )
         _mesa_printf("Unsupport opcode %d in vertex shader\n", inst->Opcode);
         break;
       }
-      brw_set_predicate_control(p,
-                inst->CondUpdate?BRW_PREDICATE_NORMAL:BRW_PREDICATE_NONE);
+
+      if (inst->DstReg.File == PROGRAM_OUTPUT
+             &&inst->DstReg.Index != VERT_RESULT_HPOS
+             &&c->output_regs[inst->DstReg.Index].used_in_src)
+         brw_MOV(p, get_dst(c, inst->DstReg), dst);
+
       release_tmps(c);
    }
 
index a02a0a23af6e3db12ad73bea01a1893401316f4b..45ba4eaf322dd1cc30c0ad63348037732fc8ac92 100644 (file)
@@ -154,47 +154,49 @@ static void do_wm_prog( struct brw_context *brw,
    c->fp = fp;
    c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
 
-
-   /* Augment fragment program.  Add instructions for pre- and
-    * post-fragment-program tasks such as interpolation and fogging.
-    */
-   brw_wm_pass_fp(c);
-   
-   /* Translate to intermediate representation.  Build register usage
-    * chains.
-    */
-   brw_wm_pass0(c);
-
-   /* Dead code removal.
-    */
-   brw_wm_pass1(c);
-
-   /* Hal optimization
-    */
-   brw_wm_pass_hal (c);
-   
-   /* Register allocation.
-    */
-   c->grf_limit = BRW_WM_MAX_GRF/2;
-
-   /* This is where we start emitting gen4 code:
-    */
-   brw_init_compile(&c->func);    
-
-   brw_wm_pass2(c);
-
-   c->prog_data.total_grf = c->max_wm_grf;
-   if (c->last_scratch) {
-      c->prog_data.total_scratch =
-        c->last_scratch + 0x40;
+   if (brw_wm_is_glsl(&c->fp->program)) {
+       brw_wm_glsl_emit(c);
    } else {
-      c->prog_data.total_scratch = 0;
+       /* Augment fragment program.  Add instructions for pre- and
+       * post-fragment-program tasks such as interpolation and fogging.
+       */
+       brw_wm_pass_fp(c);
+
+       /* Translate to intermediate representation.  Build register usage
+       * chains.
+       */
+       brw_wm_pass0(c);
+
+       /* Dead code removal.
+       */
+       brw_wm_pass1(c);
+
+       /* Hal optimization
+       */
+       brw_wm_pass_hal (c);
+
+       /* Register allocation.
+       */
+       c->grf_limit = BRW_WM_MAX_GRF/2;
+
+       /* This is where we start emitting gen4 code:
+       */
+       brw_init_compile(&c->func);    
+
+       brw_wm_pass2(c);
+
+       c->prog_data.total_grf = c->max_wm_grf;
+       if (c->last_scratch) {
+          c->prog_data.total_scratch =
+              c->last_scratch + 0x40;
+       } else {
+          c->prog_data.total_scratch = 0;
+       }
+
+       /* Emit GEN4 code.
+       */
+       brw_wm_emit(c);
    }
-
-   /* Emit GEN4 code.
-    */
-   brw_wm_emit(c);
-
    /* get the program
     */
    program = brw_get_program(&c->func, &program_size);
index f5fddfdb68a7c9dd5a8170d48c7b8f3c6df8cf19..4143d5be6cb0ffa53d11d91b2f0d64ba04b88c9b 100644 (file)
@@ -231,6 +231,14 @@ struct brw_wm_compile {
    GLuint grf_limit;
    GLuint max_wm_grf;
    GLuint last_scratch;
+
+   struct {
+       GLboolean inited;
+       struct brw_reg reg;
+   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+   struct brw_reg ret_reg;
+   GLuint reg_index;
+   GLuint tmp_index;
 };
 
 
@@ -259,4 +267,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
                       GLuint lookup,
                       struct brw_wm_prog_key *key );
 
+GLboolean brw_wm_is_glsl(struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_wm_compile *c);
 #endif
index 197a0ae13d2825e9421464f74f2cc535f445ebc7..5660b5551620b247a1fe1afe5eb18fa1cef59b84 100644 (file)
@@ -229,20 +229,20 @@ static void emit_cinterp( struct brw_compile *p,
                         GLuint mask,
                         const struct brw_reg *arg0 )
 {
-   struct brw_reg interp[4];
-   GLuint nr = arg0[0].nr;
-   GLuint i;
-
-   interp[0] = brw_vec1_grf(nr, 0);
-   interp[1] = brw_vec1_grf(nr, 4);
-   interp[2] = brw_vec1_grf(nr+1, 0);
-   interp[3] = brw_vec1_grf(nr+1, 4);
-
-   for(i = 0; i < 4; i++ ) {
-      if (mask & (1<<i)) {
-        brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
-      }
-   }
+       struct brw_reg interp[4];
+       GLuint nr = arg0[0].nr;
+       GLuint i;
+
+       interp[0] = brw_vec1_grf(nr, 0);
+       interp[1] = brw_vec1_grf(nr, 4);
+       interp[2] = brw_vec1_grf(nr+1, 0);
+       interp[3] = brw_vec1_grf(nr+1, 4);
+
+       for(i = 0; i < 4; i++ ) {
+               if (mask & (1<<i)) {
+                       brw_MOV(p, dst[i], suboffset(interp[i],3));     /* TODO: optimize away like other moves */
+               }
+       }
 }
 
 
index ff97d87dc45210f7f4ad1f5209b27e209dbcf62f..403160c49497889c4f927054a3777b146e52d5d6 100644 (file)
@@ -176,6 +176,7 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
 {
    struct prog_instruction *inst = get_fp_inst(c);
    *inst = *inst0;
+   inst->Data = (void *)inst0;
    return inst;
 }
 
@@ -201,7 +202,6 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
    inst->SrcReg[0] = src0;
    inst->SrcReg[1] = src1;
    inst->SrcReg[2] = src2;
-   
    return inst;
 }
    
@@ -907,8 +907,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
          */
         out->DstReg.WriteMask = 0;
         break;
-
       case OPCODE_END:
+        emit_fog(c);
+        emit_fb_write(c);
+        break;
       case OPCODE_PRINT:
         break;
         
@@ -917,15 +919,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
         break;
       }
    }
-   
-   emit_fog(c);
-   emit_fb_write(c);
-
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("\n\n\npass_fp:\n");
-      print_insns( c->prog_instructions, c->nr_fp_insns );
-      _mesa_printf("\n");
+          _mesa_printf("\n\n\npass_fp:\n");
+          print_insns( c->prog_instructions, c->nr_fp_insns );
+          _mesa_printf("\n");
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
new file mode 100644 (file)
index 0000000..cdd7976
--- /dev/null
@@ -0,0 +1,1177 @@
+#include "macros.h"
+#include "shader/prog_parameter.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+/* Only guess, need a flag in gl_fragment_program later */
+GLboolean brw_wm_is_glsl(struct gl_fragment_program *fp)
+{
+    int i;
+    for (i = 0; i < fp->Base.NumInstructions; i++) {
+       struct prog_instruction *inst = &fp->Base.Instructions[i];
+       switch (inst->Opcode) {
+           case OPCODE_IF:
+           case OPCODE_ENDIF:
+           case OPCODE_CAL:
+           case OPCODE_BRK:
+           case OPCODE_RET:
+           case OPCODE_BGNLOOP:
+               return GL_TRUE; 
+           default:
+               break;
+       }
+    }
+    return GL_FALSE; 
+}
+
+static void set_reg(struct brw_wm_compile *c, int file, int index, 
+       int component, struct brw_reg reg)
+{
+    c->wm_regs[file][index][component].reg = reg;
+    c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static int get_scalar_dst_index(struct prog_instruction *inst)
+{
+    int i;
+    for (i = 0; i < 4; i++)
+       if (inst->DstReg.WriteMask & (1<<i))
+           break;
+    return i;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+    struct brw_reg reg;
+    reg = brw_vec8_grf(c->tmp_index--, 0);
+    return reg;
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+    c->tmp_index = 127;
+}
+
+static struct brw_reg 
+get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
+{
+    struct brw_reg reg;
+    switch (file) {
+       case PROGRAM_STATE_VAR:
+       case PROGRAM_CONSTANT:
+       case PROGRAM_UNIFORM:
+           file = PROGRAM_STATE_VAR;
+           break;
+       case PROGRAM_UNDEFINED:
+           return brw_null_reg();      
+       default:
+           break;
+    }
+
+    if(c->wm_regs[file][index][component].inited)
+       reg = c->wm_regs[file][index][component].reg;
+    else 
+       reg = brw_vec8_grf(c->reg_index, 0);
+
+    if(!c->wm_regs[file][index][component].inited) {
+       set_reg(c, file, index, component, reg);
+       c->reg_index++;
+    }
+
+    if (neg & (1<< component)) {
+       reg = negate(reg);
+    }
+    if (abs)
+       reg = brw_abs(reg);
+    return reg;
+}
+
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+    int i, j;
+    struct brw_reg reg;
+    int nr_interp_regs = 0;
+    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+
+    for (i = 0; i < 4; i++) {
+       reg = (i < c->key.nr_depth_regs) 
+           ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+       set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+    }
+    c->reg_index += 2*c->key.nr_depth_regs;
+    {
+       int nr_params = c->fp->program.Base.Parameters->NumParameters;
+       struct gl_program_parameter_list *plist = 
+           c->fp->program.Base.Parameters;
+       int index = 0;
+       c->prog_data.nr_params = 4*nr_params;
+       for (i = 0; i < nr_params; i++) {
+           for (j = 0; j < 4; j++, index++) {
+               reg = brw_vec1_grf(c->reg_index + index/8, 
+                       index%8);
+               c->prog_data.param[index] = 
+                   &plist->ParameterValues[i][j];
+               set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+           }
+       }
+       c->nr_creg = 2*((4*nr_params+15)/16);
+       c->reg_index += c->nr_creg;
+    }
+    for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
+       if (inputs & (1<<i)) {
+           nr_interp_regs++;
+           reg = brw_vec8_grf(c->reg_index, 0);
+           for (j = 0; j < 4; j++)
+               set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
+           c->reg_index += 2;
+
+       }
+    }
+    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+    c->prog_data.urb_read_length = nr_interp_regs * 2;
+    c->prog_data.curb_read_length = c->nr_creg;
+    c->ret_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+    c->reg_index++;
+}
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 
+       struct prog_instruction *inst, int component, int nr)
+{
+    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+           0, 0);
+}
+
+static struct brw_reg get_src_reg(struct brw_wm_compile *c, 
+       struct prog_src_register *src, int index, int nr)
+{
+    int component = GET_SWZ(src->Swizzle, index);
+    return get_reg(c, src->File, src->Index, component, nr, 
+           src->NegateBase, src->Abs);
+}
+
+static void emit_abs( struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+       if (inst->DstReg.WriteMask & (1<<i)) {
+           struct brw_reg src, dst;
+           dst = get_dst_reg(c, inst, i, 1);
+           src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           brw_MOV(p, dst, brw_abs(src));
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           struct brw_reg src, dst;
+           dst = get_dst_reg(c, inst, i, 1);
+           src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           brw_MOV(p, dst, src);
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+    struct brw_reg dst0, dst1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0, 1);
+    dst1 = get_dst_reg(c, inst, 1, 1);
+    /* Calculate pixel centers by adding 1 or 0 to each of the
+     * micro-tile coordinates passed in r1.
+     */
+    if (mask & WRITEMASK_X) {
+       brw_ADD(p,
+               vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+               stride(suboffset(r1_uw, 4), 2, 4, 0),
+               brw_imm_v(0x10101010));
+    }
+
+    if (mask & WRITEMASK_Y) {
+       brw_ADD(p,
+               vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+               stride(suboffset(r1_uw, 5), 2, 4, 0),
+               brw_imm_v(0x11001100));
+    }
+
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg dst0, dst1, src0, src1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0, 1);
+    dst1 = get_dst_reg(c, inst, 1, 1);
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+    /* Calc delta X,Y by subtracting origin in r1 from the pixel
+     * centers.
+     */
+    if (mask & WRITEMASK_X) {
+       brw_ADD(p,
+               dst0,
+               retype(src0, BRW_REGISTER_TYPE_UW),
+               negate(r1));
+    }
+
+    if (mask & WRITEMASK_Y) {
+       brw_ADD(p,
+               dst1,
+               retype(src1, BRW_REGISTER_TYPE_UW),
+               negate(suboffset(r1,1)));
+
+    }
+
+}
+
+
+static void fire_fb_write( struct brw_wm_compile *c,
+                           GLuint base_reg,
+                           GLuint nr )
+{
+    struct brw_compile *p = &c->func;
+
+    /* Pass through control information:
+     */
+    /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+    {
+       brw_push_insn_state(p);
+       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+       brw_MOV(p,
+               brw_message_reg(base_reg + 1),
+               brw_vec8_grf(1, 0));
+       brw_pop_insn_state(p);
+    }
+    /* Send framebuffer write message: */
+    brw_fb_WRITE(p,
+           retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+           base_reg,
+           retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+           0,              /* render surface always 0 */
+           nr,
+           0,
+           1);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    int nr = 2;
+    int channel;
+    struct brw_reg src0;//, src1, src2, dst;
+
+    /* Reserve a space for AA - may not be needed:
+     */
+    if (c->key.aa_dest_stencil_reg)
+       nr += 1;
+    {
+       brw_push_insn_state(p);
+       for (channel = 0; channel < 4; channel++) {
+           src0 = get_src_reg(c,  &inst->SrcReg[0], channel, 1);
+           /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+           /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+           brw_MOV(p, brw_message_reg(nr + channel), src0);
+       }
+       /* skip over the regs populated above: */
+       nr += 8;
+       brw_pop_insn_state(p);
+    }
+    fire_fb_write(c, 0, nr);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    if (mask & WRITEMASK_W) {
+       struct brw_reg dst, src0, delta0, delta1;
+       struct brw_reg interp3;
+
+       dst = get_dst_reg(c, inst, 3, 1);
+       src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+       delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+       delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+
+       interp3 = brw_vec1_grf(src0.nr+1, 4);
+       /* Calc 1/w - just linterp wpos[3] optimized by putting the
+        * result straight into a message reg.
+        */
+       brw_LINE(p, brw_null_reg(), interp3, delta0);
+       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+       /* Calc w */
+       brw_math_16( p, dst,
+               BRW_MATH_FUNCTION_INV,
+               BRW_MATH_SATURATE_NONE,
+               2, brw_null_reg(),
+               BRW_MATH_PRECISION_FULL);
+    }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           brw_LINE(p, brw_null_reg(), interp[i], delta0);
+           brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+       }
+    }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, src0;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           brw_MOV(p, dst, suboffset(interp[i],3));
+       }
+    }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0, w;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+    w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           brw_LINE(p, brw_null_reg(), interp[i], delta0);
+           brw_MAC(p, dst, suboffset(interp[i],1), 
+                   delta1);
+           brw_MUL(p, dst, dst, w);
+       }
+    }
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    for (i = 0; i < 4; i++) {
+       GLuint i2 = (i+2)%3;
+       GLuint i1 = (i+1)%3;
+       if (mask & (1<<i)) {
+           struct brw_reg src0, src1, dst;
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
+           src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
+           brw_MUL(p, brw_null_reg(), src0, src1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
+           brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+           brw_MAC(p, dst, src0, src1);
+           brw_set_saturate(p, 0);
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_reg src0[3], src1[3], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 3; i++) {
+       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 4; i++) {
+       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[3], src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 4; i++) {
+       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_ADD(p, dst, src0[3], src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_math1(struct brw_wm_compile *c,
+               struct prog_instruction *inst, GLuint func)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_math(p,
+           dst,
+           func,
+           (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+           2,
+           brw_null_reg(),
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_ADD(p, dst, src0, src1);
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_sub(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_ADD(p, dst, src0, negate(src1));
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_mul(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_MUL(p, dst, src0, src1);
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           brw_FRC(p, dst, src0);
+       }
+    }
+    if (inst->SaturateMode != SATURATE_OFF)
+       brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           brw_RNDD(p, dst, src0);
+       }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_max(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0, src1, dst;
+    int i;
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_MOV(p, dst, src0);
+           brw_set_saturate(p, 0);
+
+           brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+           brw_MOV(p, dst, src1);
+           brw_set_saturate(p, 0);
+           brw_set_predicate_control_flag_value(p, 0xff);
+       }
+    }
+    brw_pop_insn_state(p);
+}
+
+static void emit_min(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0, src1, dst;
+    int i;
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_MOV(p, dst, src0);
+           brw_set_saturate(p, 0);
+
+           brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+           brw_MOV(p, dst, src1);
+           brw_set_saturate(p, 0);
+           brw_set_predicate_control_flag_value(p, 0xff);
+       }
+    }
+    brw_pop_insn_state(p);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst, src0, src1;
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_MOV(p, brw_message_reg(3), src1);
+
+    brw_math(p,
+           dst,
+           BRW_MATH_FUNCTION_POW,
+           (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+           2,
+           brw_null_reg(),
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+    int i;
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+
+           if (src1.nr == dst.nr) {
+               tmp1 = alloc_tmp(c);
+               brw_MOV(p, tmp1, src1);
+           } else
+               tmp1 = src1;
+
+           src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+           if (src2.nr == dst.nr) {
+               tmp2 = alloc_tmp(c);
+               brw_MOV(p, tmp2, src2);
+           } else
+               tmp2 = src2;
+
+           brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+           brw_MUL(p, brw_null_reg(), dst, tmp2);
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_MAC(p, dst, src0, tmp1);
+           brw_set_saturate(p, 0);
+       }
+       release_tmps(c);
+    }
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1, src2;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+           brw_MUL(p, dst, src0, src1);
+
+           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+           brw_ADD(p, dst, dst, src2);
+           brw_set_saturate(p, 0);
+       }
+    }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+               struct prog_instruction *inst, GLuint cond)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+       if (mask & (1<<i)) {
+           dst = get_dst_reg(c, inst, i, 1);
+           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+           brw_MOV(p, dst, brw_imm_f(0));
+           brw_CMP(p, brw_null_reg(), cond, src0, src1);
+           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+           brw_MOV(p, dst, brw_imm_f(1.0));
+           brw_set_predicate_control_flag_value(p, 0xff);
+       }
+    }
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+/* TODO
+   BIAS on SIMD8 not workind yet...
+ */    
+static void emit_txb(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    GLuint i;
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+    for (i = 0; i < 4; i++) 
+       dst[i] = get_dst_reg(c, inst, i, 1);
+    for (i = 0; i < 4; i++)
+       src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+    switch (inst->TexSrcTarget) {
+       case TEXTURE_1D_INDEX:
+           brw_MOV(p, brw_message_reg(2), src[0]);
+           brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+           brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+           break;
+       case TEXTURE_2D_INDEX:
+       case TEXTURE_RECT_INDEX:
+           brw_MOV(p, brw_message_reg(2), src[0]);
+           brw_MOV(p, brw_message_reg(3), src[1]);
+           brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+           break;
+       default:
+           brw_MOV(p, brw_message_reg(2), src[0]);
+           brw_MOV(p, brw_message_reg(3), src[1]);
+           brw_MOV(p, brw_message_reg(4), src[2]);
+           break;
+    }
+    brw_MOV(p, brw_message_reg(5), src[3]);
+    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+    brw_SAMPLE(p,
+           retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+           1,
+           retype(payload_reg, BRW_REGISTER_TYPE_UW),
+           inst->TexSrcUnit + 1, /* surface */
+           inst->TexSrcUnit,     /* sampler */
+           inst->DstReg.WriteMask,
+           BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+           4,
+           4,
+           0);
+}
+
+static void emit_tex(struct brw_wm_compile *c,
+               struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    GLuint msg_len;
+    GLuint i, nr;
+    GLuint emit;
+
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+    for (i = 0; i < 4; i++) 
+       dst[i] = get_dst_reg(c, inst, i, 1);
+    for (i = 0; i < 4; i++)
+       src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+
+    switch (inst->TexSrcTarget) {
+       case TEXTURE_1D_INDEX:
+           emit = WRITEMASK_X;
+           nr = 1;
+           break;
+       case TEXTURE_2D_INDEX:
+       case TEXTURE_RECT_INDEX:
+           emit = WRITEMASK_XY;
+           nr = 2;
+           break;
+       default:
+           emit = WRITEMASK_XYZ;
+           nr = 3;
+           break;
+    }
+    msg_len = 1;
+
+    for (i = 0; i < nr; i++) {
+       static const GLuint swz[4] = {0,1,2,2};
+       if (emit & (1<<i))
+           brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+       else
+           brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+       msg_len += 1;
+    }
+
+    brw_SAMPLE(p,
+           retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+           1,
+           retype(payload_reg, BRW_REGISTER_TYPE_UW),
+           inst->TexSrcUnit + 1, /* surface */
+           inst->TexSrcUnit,     /* sampler */
+           inst->DstReg.WriteMask,
+           BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+           4,
+           4,
+           0);
+}
+
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+    GLuint nr_insns = c->fp->program.Base.NumInstructions;
+    GLuint insn, target_insn;
+    struct prog_instruction *inst1, *inst2;
+    struct brw_instruction *brw_inst1, *brw_inst2;
+    int offset;
+    for (insn = 0; insn < nr_insns; insn++) {
+       inst1 = &c->fp->program.Base.Instructions[insn];
+       brw_inst1 = inst1->Data;
+       switch (inst1->Opcode) {
+           case OPCODE_CAL:
+               target_insn = inst1->BranchTarget;
+               inst2 = &c->fp->program.Base.Instructions[target_insn];
+               brw_inst2 = inst2->Data;
+               offset = brw_inst2 - brw_inst1;
+               brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+               break;
+           default:
+               break;
+       }
+    }
+}
+
+static void brw_wm_emit_glsl(struct brw_wm_compile *c)
+
+{
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+    struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
+    struct brw_instruction *inst0, *inst1;
+    int i, if_insn = 0, loop_insn = 0;
+    struct brw_compile *p = &c->func;
+    brw_init_compile(&c->func);
+    c->reg_index = 0;
+    prealloc_reg(c);
+    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+    for (i = 0; i < c->nr_fp_insns; i++) {
+       struct prog_instruction *inst = &c->prog_instructions[i];
+       struct prog_instruction *orig_inst;
+
+       if ((orig_inst = inst->Data) != 0)
+           orig_inst->Data = current_insn(p);
+
+       switch (inst->Opcode) {
+           case WM_PIXELXY:
+               emit_pixel_xy(c, inst);
+               break;
+           case WM_DELTAXY: 
+               emit_delta_xy(c, inst);
+               break;
+           case WM_PIXELW:
+               emit_pixel_w(c, inst);
+               break;  
+           case WM_LINTERP:
+               emit_linterp(c, inst);
+               break;
+           case WM_PINTERP:
+               emit_pinterp(c, inst);
+               break;
+           case WM_CINTERP:
+               emit_cinterp(c, inst);
+               break;
+           case WM_FB_WRITE:
+               emit_fb_write(c, inst);
+               break;
+           case OPCODE_ABS:
+               emit_abs(c, inst);
+               break;
+           case OPCODE_ADD:
+               emit_add(c, inst);
+               break;
+           case OPCODE_SUB:
+               emit_sub(c, inst);
+               break;
+           case OPCODE_FRC:
+               emit_frc(c, inst);
+               break;
+           case OPCODE_FLR:
+               emit_flr(c, inst);
+               break;
+           case OPCODE_LRP:
+               emit_lrp(c, inst);
+               break;
+           case OPCODE_MOV:
+               emit_mov(c, inst);
+               break;
+           case OPCODE_DP3:
+               emit_dp3(c, inst);
+               break;
+           case OPCODE_DP4:
+               emit_dp4(c, inst);
+               break;
+           case OPCODE_XPD:
+               emit_xpd(c, inst);
+               break;
+           case OPCODE_DPH:
+               emit_dph(c, inst);
+               break;
+           case OPCODE_RCP:
+               emit_rcp(c, inst);
+               break;
+           case OPCODE_RSQ:
+               emit_rsq(c, inst);
+               break;
+           case OPCODE_SIN:
+               emit_sin(c, inst);
+               break;
+           case OPCODE_COS:
+               emit_cos(c, inst);
+               break;
+           case OPCODE_EX2:
+               emit_ex2(c, inst);
+               break;
+           case OPCODE_LG2:
+               emit_lg2(c, inst);
+               break;
+           case OPCODE_MAX:    
+               emit_max(c, inst);
+               break;
+           case OPCODE_MIN:    
+               emit_min(c, inst);
+               break;
+           case OPCODE_SLT:
+               emit_slt(c, inst);
+               break;
+           case OPCODE_SLE:
+               emit_sle(c, inst);
+               break;
+           case OPCODE_SGT:
+               emit_sgt(c, inst);
+               break;
+           case OPCODE_SGE:
+               emit_sge(c, inst);
+               break;
+           case OPCODE_SEQ:
+               emit_seq(c, inst);
+               break;
+           case OPCODE_SNE:
+               emit_sne(c, inst);
+               break;
+           case OPCODE_MUL:
+               emit_mul(c, inst);
+               break;
+           case OPCODE_POW:
+               emit_pow(c, inst);
+               break;
+           case OPCODE_MAD:
+               emit_mad(c, inst);
+               break;
+           case OPCODE_TEX:
+               emit_tex(c, inst);
+               break;
+           case OPCODE_TXB:
+               emit_txb(c, inst);
+               break;
+           case OPCODE_IF:
+               assert(if_insn < MAX_IFSN);
+               if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+               break;
+           case OPCODE_ELSE:
+               if_inst[if_insn-1]  = brw_ELSE(p, if_inst[if_insn-1]);
+               break;
+           case OPCODE_ENDIF:
+               assert(if_insn > 0);
+               brw_ENDIF(p, if_inst[--if_insn]);
+               break;
+           case OPCODE_BGNSUB:
+           case OPCODE_ENDSUB:
+               break;
+           case OPCODE_CAL: 
+               brw_push_insn_state(p);
+               brw_set_mask_control(p, BRW_MASK_DISABLE);
+               brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+               brw_ADD(p, c->ret_reg, brw_ip_reg(), brw_imm_d(2*16));
+               orig_inst = inst->Data;
+               orig_inst->Data = current_insn(p);
+               brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+               brw_pop_insn_state(p);
+               break;
+           case OPCODE_RET:
+               brw_push_insn_state(p);
+               brw_set_mask_control(p, BRW_MASK_DISABLE);
+               brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+               brw_MOV(p, brw_ip_reg(), c->ret_reg);
+               brw_pop_insn_state(p);
+               break;
+           case OPCODE_BGNLOOP:
+               loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+               break;
+           case OPCODE_BRK:
+               brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+               brw_BREAK(p);
+               brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+               break;
+           case OPCODE_ENDLOOP: 
+               loop_insn--;
+               inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
+               /* patch all the BREAK instructions from
+                  last BEGINLOOP */
+               while (inst0 > loop_inst[loop_insn]) {
+                   inst0--;
+                   if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                       inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+                       inst0->bits3.if_else.pop_count = 0;
+                   }
+               }
+               break;
+           default:
+               _mesa_printf("unsupported IR in fragment shader %d\n",
+                       inst->Opcode);
+       }
+    }
+    post_wm_emit(c);
+}
+
+void brw_wm_glsl_emit(struct brw_wm_compile *c)
+{
+    brw_wm_pass_fp(c);
+    c->tmp_index = 127;
+    brw_wm_emit_glsl(c);
+    c->prog_data.total_grf = c->reg_index;
+    c->prog_data.total_scratch = 0;
+}
index a1edbd6168d7bafa3fe621822b6c77305e3e2584..c1ce6a9b6b554c6e71a16d6343e2736d4c524be1 100644 (file)
@@ -328,7 +328,7 @@ void brw_wm_pass2( struct brw_wm_compile *c )
    c->state = PASS2_DONE;
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      brw_wm_print_program(c, "pass2/done");
+       brw_wm_print_program(c, "pass2/done");
    }
 }
 
index ff5cb31bdd148fb749c9b5b14aa2a25525482f18..401864fc4e143ecd2e89dd5dda83690ef65ed768 100644 (file)
@@ -34,6 +34,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_wm.h"
 #include "bufmgr.h"
 
 /***********************************************************************
@@ -134,9 +135,13 @@ static void upload_wm_unit(struct brw_context *brw )
       if (fp->UsesKill || 
          brw->attribs.Color->AlphaEnabled) 
         wm.wm5.program_uses_killpixel = 1; 
+      
+      if (brw_wm_is_glsl(fp))
+         wm.wm5.enable_8_pix = 1;
+      else
+         wm.wm5.enable_16_pix = 1;
    }
 
-   wm.wm5.enable_16_pix = 1;
    wm.wm5.thread_dispatch_enable = 1;  /* AKA: color_write */
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;