Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
index ce4cf46cfa6fe8d4b64c0c126a96ed20ac99a907..60ea44f7a9688cf75cbd272446404084289e0781 100644 (file)
@@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn,
 static void brw_set_dest( struct brw_instruction *insn,
                          struct brw_reg dest )
 {
+   if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(dest.nr < 128);
+
    insn->bits1.da1.dest_reg_file = dest.file;
    insn->bits1.da1.dest_reg_type = dest.type;
    insn->bits1.da1.dest_address_mode = dest.address_mode;
@@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn,
 }
 
 static void brw_set_src0( struct brw_instruction *insn,
-                     struct brw_reg reg )
+                          struct brw_reg reg )
 {
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
+   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(reg.nr < 128);
+
    insn->bits1.da1.src0_reg_file = reg.file;
    insn->bits1.da1.src0_reg_type = reg.type;
    insn->bits2.da1.src0_abs = reg.abs;
@@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn,
 
 
 void brw_set_src1( struct brw_instruction *insn,
-                         struct brw_reg reg )
+                   struct brw_reg reg )
 {
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
+   assert(reg.nr < 128);
+
    insn->bits1.da1.src1_reg_file = reg.file;
    insn->bits1.da1.src1_reg_type = reg.type;
    insn->bits3.da1.src1_abs = reg.abs;
@@ -312,24 +320,25 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
 {
    brw_set_src1(insn, brw_imm_d(0));
 
-   insn->bits3.dp_read.binding_table_index = binding_table_index;
-   insn->bits3.dp_read.msg_control = msg_control;
-   insn->bits3.dp_read.msg_type = msg_type;
-   insn->bits3.dp_read.target_cache = target_cache;
-   insn->bits3.dp_read.response_length = response_length;
-   insn->bits3.dp_read.msg_length = msg_length;
-   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
-   insn->bits3.dp_read.end_of_thread = end_of_thread;
+   insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+   insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
+   insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
+   insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
+   insn->bits3.dp_read.response_length = response_length;  /*16:19*/
+   insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
+   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+   insn->bits3.dp_read.pad1 = 0;  /*28:30*/
+   insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
 }
 
 static void brw_set_sampler_message(struct brw_context *brw,
-                 struct brw_instruction *insn,
-                                    GLuint binding_table_index,
-                                    GLuint sampler,
-                                    GLuint msg_type,
-                                    GLuint response_length,
-                                    GLuint msg_length,
-                                    GLboolean eot)
+                                    struct brw_instruction *insn,
+                                    GLuint binding_table_index,
+                                    GLuint sampler,
+                                    GLuint msg_type,
+                                    GLuint response_length,
+                                    GLuint msg_length,
+                                    GLboolean eot)
 {
    brw_set_src1(insn, brw_imm_d(0));
 
@@ -407,7 +416,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
  * Convenience routines.
  */
 #define ALU1(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p,        \
              struct brw_reg dest,                      \
              struct brw_reg src0)                      \
 {                                                      \
@@ -415,7 +424,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,                     \
 }
 
 #define ALU2(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p,        \
              struct brw_reg dest,                      \
              struct brw_reg src0,                      \
              struct brw_reg src1)                      \
@@ -439,6 +448,7 @@ ALU2(ADD)
 ALU2(MUL)
 ALU1(FRC)
 ALU1(RNDD)
+ALU1(RNDZ)
 ALU2(MAC)
 ALU2(MACH)
 ALU1(LZD)
@@ -468,9 +478,9 @@ void brw_NOP(struct brw_compile *p)
  */
 
 struct brw_instruction *brw_JMPI(struct brw_compile *p, 
-             struct brw_reg dest,
-             struct brw_reg src0,
-             struct brw_reg src1)
+                                 struct brw_reg dest,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
 {
    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
 
@@ -673,7 +683,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
 
 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
-              struct brw_instruction *do_insn)
+                                  struct brw_instruction *do_insn)
 {
    struct brw_instruction *insn;
 
@@ -761,7 +771,7 @@ void brw_CMP(struct brw_compile *p,
  * Helpers for the various SEND message types:
  */
 
-/* Invert 8 values
+/** Extended math function, float[8].
  */
 void brw_math( struct brw_compile *p,
               struct brw_reg dest,
@@ -793,7 +803,9 @@ void brw_math( struct brw_compile *p,
                        data_type);
 }
 
-/* Use 2 send instructions to invert 16 elements
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
  */
 void brw_math_16( struct brw_compile *p,
                  struct brw_reg dest,
@@ -846,8 +858,11 @@ void brw_math_16( struct brw_compile *p,
 }
 
 
-
-
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
 void brw_dp_WRITE_16( struct brw_compile *p,
                      struct brw_reg src,
                      GLuint msg_reg_nr,
@@ -858,6 +873,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
       brw_set_mask_control(p, BRW_MASK_DISABLE);
       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
+      /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
              retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
              brw_imm_d(scratch_offset));
@@ -878,7 +894,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
       brw_set_src0(insn, src);
 
       brw_set_dp_write_message(insn,
-                              255, /* bti */
+                              255, /* binding table index (255=stateless) */
                               BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
                               BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
                               msg_length,
@@ -886,10 +902,14 @@ void brw_dp_WRITE_16( struct brw_compile *p,
                               0, /* response_length */
                               0); /* eot */
    }
-
 }
 
 
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
 void brw_dp_READ_16( struct brw_compile *p,
                      struct brw_reg dest,
                      GLuint msg_reg_nr,
@@ -900,6 +920,7 @@ void brw_dp_READ_16( struct brw_compile *p,
       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       brw_set_mask_control(p, BRW_MASK_DISABLE);
 
+      /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
              retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
              brw_imm_d(scratch_offset));
@@ -918,10 +939,10 @@ void brw_dp_READ_16( struct brw_compile *p,
       brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
 
       brw_set_dp_read_message(insn,
-                             255, /* bti */
-                             3,  /* msg_control */
+                             255, /* binding table index (255=stateless) */
+                             3,  /* msg_control (3 means 4 Owords) */
                              BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                             1, /* target cache */
+                             1, /* target cache (render/scratch) */
                              1, /* msg_length */
                              2, /* response_length */
                              0); /* eot */
@@ -929,14 +950,138 @@ void brw_dp_READ_16( struct brw_compile *p,
 }
 
 
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+                    struct brw_reg dest,
+                    GLuint msg_reg_nr,
+                    GLboolean relAddr,
+                    GLuint location,
+                    GLuint bind_table_index )
+{
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+      /* set message header global offset field (reg 0, element 2) */
+      /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */
+      brw_MOV(p,
+             retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+             brw_imm_d(location));
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+  
+      /* cast dest to a uword[8] vector */
+      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+      brw_set_dp_read_message(insn,
+                             bind_table_index,
+                             0,  /* msg_control (0 means 1 Oword) */
+                             BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                             0, /* source cache = data cache */
+                             1, /* msg_length */
+                             1, /* response_length (1 Oword) */
+                             0); /* eot */
+   }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      GLuint oword,
+                      GLboolean relAddr,
+                      struct brw_reg addrReg,
+                      GLuint location,
+                      GLuint bind_table_index)
+{
+   GLuint msg_reg_nr = 1;
+
+   assert(oword < 2);
+   /*
+   printf("vs const read msg, location %u, msg_reg_nr %d\n",
+          location, msg_reg_nr);
+   */
+
+   /* Setup MRF[1] with location/offset into const buffer */
+   {
+      struct brw_reg b;
+
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
+       */
+      b = brw_message_reg(msg_reg_nr);
+      b = retype(b, BRW_REGISTER_TYPE_UD);
+      /*b = get_element_ud(b, 2);*/
+      if (relAddr) {
+         brw_ADD(p, b, addrReg, brw_imm_ud(location));
+      }
+      else {
+         brw_MOV(p, b, brw_imm_ud(location));
+      }
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+      /*insn->header.access_mode = BRW_ALIGN_16;*/
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, brw_null_reg());
+
+      brw_set_dp_read_message(insn,
+                             bind_table_index,
+                             oword,  /* 0 = lower Oword, 1 = upper Oword */
+                             BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                             0, /* source cache = data cache */
+                             1, /* msg_length */
+                             1, /* response_length (1 Oword) */
+                             0); /* eot */
+   }
+}
+
+
+
 void brw_fb_WRITE(struct brw_compile *p,
-                  struct brw_reg dest,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  GLuint binding_table_index,
-                  GLuint msg_length,
-                  GLuint response_length,
-                  GLboolean eot)
+                  struct brw_reg dest,
+                  GLuint msg_reg_nr,
+                  struct brw_reg src0,
+                  GLuint binding_table_index,
+                  GLuint msg_length,
+                  GLuint response_length,
+                  GLboolean eot)
 {
    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
    
@@ -957,7 +1102,11 @@ void brw_fb_WRITE(struct brw_compile *p,
 }
 
 
-
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed.  See volume 4, page 161 of docs.
+ */
 void brw_SAMPLE(struct brw_compile *p,
                struct brw_reg dest,
                GLuint msg_reg_nr,
@@ -972,8 +1121,8 @@ void brw_SAMPLE(struct brw_compile *p,
 {
    GLboolean need_stall = 0;
    
-   if(writemask == 0) {
-/*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+   if (writemask == 0) {
+      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
       return;
    }
    
@@ -1005,7 +1154,7 @@ void brw_SAMPLE(struct brw_compile *p,
 
       if (newmask != writemask) {
         need_stall = 1;
-/*      _mesa_printf("need stall %x %x\n", newmask , writemask); */
+         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
         struct brw_reg m1 = brw_message_reg(msg_reg_nr);
@@ -1046,14 +1195,13 @@ void brw_SAMPLE(struct brw_compile *p,
                              eot);
    }
 
-   if (need_stall)
-   {
+   if (need_stall) {
       struct brw_reg reg = vec8(offset(dest, response_length-1));
 
       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
        */
       brw_push_insn_state(p);
-      brw_set_compression_control(p, GL_FALSE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       brw_MOV(p, reg, reg);          
       brw_pop_insn_state(p);
    }