i965: implement relative addressing for VS constant buffer reads
authorBrian Paul <brianp@vmware.com>
Wed, 15 Apr 2009 22:49:18 +0000 (16:49 -0600)
committerBrian Paul <brianp@vmware.com>
Thu, 16 Apr 2009 17:08:23 +0000 (11:08 -0600)
A scatter-read should be possible, but we're just using two READs for
the time being.

src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_vs_emit.c

index 66f8eb840c1ecea601adba0301402c9a27cac7d2..896e67dbfe914523c0f33e47a3d9fd5b832c5377 100644 (file)
@@ -868,6 +868,7 @@ void brw_dp_READ_4( struct brw_compile *p,
 void brw_dp_READ_4_vs( struct brw_compile *p,
                        struct brw_reg dest,
                        GLboolean relAddr,
+                       struct brw_reg addrReg,
                        GLuint location,
                        GLuint bind_table_index );
 
index c731a93a8d6d39a2f4579dc3f3191b6a2f938195..df2141660c088f63d54d1c5af1830a7ac18f83c3 100644 (file)
@@ -1003,15 +1003,18 @@ void brw_dp_READ_4( struct brw_compile *p,
 
 
 /**
- * Read float[4] constant from VS constant buffer.
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
  */
 void brw_dp_READ_4_vs(struct brw_compile *p,
                       struct brw_reg dest,
                       GLboolean relAddr,
+                      struct brw_reg addrReg,
                       GLuint location,
                       GLuint bind_table_index)
 {
-   const GLuint msg_reg_nr = 1;
+   GLuint msg_reg_nr = 1;
 
    /*
    printf("vs const read msg, location %u, msg_reg_nr %d\n",
@@ -1034,7 +1037,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
       b = brw_message_reg(msg_reg_nr);
       b = retype(b, BRW_REGISTER_TYPE_UD);
       /*b = get_element_ud(b, 2);*/
-      brw_MOV(p, b, brw_imm_ud(location));
+      if (relAddr) {
+         brw_ADD(p, b, addrReg, brw_imm_ud(location));
+      }
+      else {
+         brw_MOV(p, b, brw_imm_ud(location));
+      }
 
       brw_pop_insn_state(p);
    }
@@ -1053,13 +1061,56 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
 
       brw_set_dp_read_message(insn,
                              bind_table_index,
-                             0,  /* msg_control (0 means 1 Oword) */
+                             0,  /* msg_control (0 means 1 Oword, lower half) */
                              BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
                              0, /* source cache = data cache */
                              1, /* msg_length */
                              1, /* response_length (1 Oword) */
                              0); /* eot */
    }
+
+   if (relAddr) {
+      /* second read to get second constant */
+      msg_reg_nr++;
+      {
+         /* Setup MRF[1] with location/offset into const buffer */
+         struct brw_reg b;
+
+         brw_push_insn_state(p);
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+         brw_set_mask_control(p, BRW_MASK_DISABLE);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+         b = brw_message_reg(msg_reg_nr);
+         b = retype(b, BRW_REGISTER_TYPE_UD);
+         addrReg = suboffset(addrReg, 1); /* upper half of addrReg */
+         brw_ADD(p, b, addrReg, brw_imm_ud(location));
+
+         brw_pop_insn_state(p);
+      }
+
+      {
+         struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+         insn->header.predicate_control = BRW_PREDICATE_NONE;
+         insn->header.compression_control = BRW_COMPRESSION_NONE; 
+         insn->header.destreg__conditonalmod = msg_reg_nr;
+         insn->header.mask_control = BRW_MASK_DISABLE;
+         /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+         brw_set_dest(insn, dest);
+         brw_set_src0(insn, brw_null_reg());
+
+         brw_set_dp_read_message(insn,
+                                 bind_table_index,
+                                 1,  /* msg_control (1 means 1 Oword, upper half) */
+                                 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                                 0, /* source cache = data cache */
+                                 1, /* msg_length */
+                                 1, /* response_length (1 Oword) */
+                                 0); /* eot */
+      }
+   }
 }
 
 
index 19ead73d8ca537066189c2cb2e535ab3e29cafa5..98fbdf5064d0afe0b2c55024e812ea848d7e5e7c 100644 (file)
@@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c,
 
    assert(argIndex < 3);
 
-   if (c->current_const[argIndex].index != src->Index) {
+   if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
 
       c->current_const[argIndex].index = src->Index;
 
@@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c,
       brw_dp_READ_4_vs(p,
                        c->current_const[argIndex].reg, /* writeback dest */
                        src->RelAddr,                   /* relative indexing? */
+                       c->regs[PROGRAM_ADDRESS][0],    /* address register */
                        16 * src->Index,                /* byte offset */
                        SURF_INDEX_VERT_CONST_BUFFER    /* binding table index */
                        );
    }
 
-   /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
    const_reg = c->current_const[argIndex].reg;
-   const_reg = stride(const_reg, 0, 4, 0);
-   const_reg.subnr = 0;
+   if (!src->RelAddr) {
+      /* replicate lower four floats into upper half (to get XYZWXYZW) */
+      const_reg = stride(const_reg, 0, 4, 0);
+      const_reg.subnr = 0;
+   }
 
    return const_reg;
 }
@@ -771,6 +774,42 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
 }
 
 
+/**
+ * Indirect addressing:  get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+                            struct brw_reg arg,
+                            GLint offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = vec4(get_tmp(c));
+   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+   struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+   GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+   struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+   {
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      /* This is pretty clunky - load the address register twice and
+       * fetch each 4-dword value in turn.  There must be a way to do
+       * this in a single pass, but I couldn't get it to work.
+       */
+      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+      brw_MOV(p, tmp, indirect);
+
+      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+      brw_MOV(p, suboffset(tmp, 4), indirect);
+
+      brw_pop_insn_state(p);
+   }
+   
+   /* NOTE: tmp not released */
+   return vec8(tmp);
+}
+
+
 /**
  * Get brw reg corresponding to the instruction's [argIndex] src reg.
  * TODO: relative addressing!
@@ -782,19 +821,29 @@ get_src_reg( struct brw_vs_compile *c,
 {
    const GLuint file = inst->SrcReg[argIndex].File;
    const GLint index = inst->SrcReg[argIndex].Index;
+   const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
 
    switch (file) {
    case PROGRAM_TEMPORARY:
    case PROGRAM_INPUT:
    case PROGRAM_OUTPUT:
-      assert(c->regs[file][index].nr != 0);
-      return c->regs[file][index];
+      if (relAddr) {
+         return deref(c, c->regs[file][0], index);
+      }
+      else {
+         assert(c->regs[file][index].nr != 0);
+         return c->regs[file][index];
+      }
+
    case PROGRAM_STATE_VAR:
    case PROGRAM_CONSTANT:
    case PROGRAM_UNIFORM:
       if (c->use_const_buffer) {
          return get_constant(c, inst, argIndex);
       }
+      else if (relAddr) {
+         return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+      }
       else {
          assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
          return c->regs[PROGRAM_STATE_VAR][index];
@@ -817,42 +866,6 @@ get_src_reg( struct brw_vs_compile *c,
 }
 
 
-/**
- * Indirect addressing:  get reg[[arg] + offset].
- */
-static struct brw_reg deref( struct brw_vs_compile *c,
-                            struct brw_reg arg,
-                            GLint offset)
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg tmp = vec4(get_tmp(c));
-   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
-   struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
-   GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
-   struct brw_reg indirect = brw_vec4_indirect(0,0);
-
-   {
-      brw_push_insn_state(p);
-      brw_set_access_mode(p, BRW_ALIGN_1);
-
-      /* This is pretty clunky - load the address register twice and
-       * fetch each 4-dword value in turn.  There must be a way to do
-       * this in a single pass, but I couldn't get it to work.
-       */
-      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
-      brw_MOV(p, tmp, indirect);
-
-      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
-      brw_MOV(p, suboffset(tmp, 4), indirect);
-
-      brw_pop_insn_state(p);
-   }
-   
-   /* NOTE: tmp not released */
-   return vec8(tmp);
-}
-
-
 static void emit_arl( struct brw_vs_compile *c,
                      struct brw_reg dst,
                      struct brw_reg arg0 )
@@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c,
    if (need_tmp) 
       tmp = get_tmp(c);
 
-   brw_RNDD(p, tmp, arg0);
-   brw_MUL(p, dst, tmp, brw_imm_d(16));
+   brw_RNDD(p, tmp, arg0);               /* tmp = round(arg0) */
+   brw_MUL(p, dst, tmp, brw_imm_d(16));  /* dst = tmp * 16 */
 
    if (need_tmp)
       release_tmp(c, tmp);
@@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
    if (src->File == PROGRAM_UNDEFINED)
       return brw_null_reg();
 
-   if (src->RelAddr) {
-      /* XXX fix */
-      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
-   }
-   else {
-      reg = get_src_reg(c, inst, argIndex);
-   }
+   reg = get_src_reg(c, inst, argIndex);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
@@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c,
    if (src_mask) {
       struct brw_reg arg0;
 
-      if (src.RelAddr) 
-        arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
-      else
-        arg0 = get_src_reg(c, inst, argIndex);
+      arg0 = get_src_reg(c, inst, argIndex);
 
       arg0 = brw_swizzle(arg0, 
                         src_swz[0], src_swz[1],