src/gallium/drivers/i965simple/brw_wm_decl.c

   1
   2 #include "brw_context.h"
   3 #include "brw_eu.h"
   4 #include "brw_wm.h"
   5 #include "pipe/p_util.h"
   6 #include "pipe/p_shader_tokens.h"
   7 #include "tgsi/tgsi_parse.h"
   8
   9 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
  10 {
  11    c->tmp_index++;
  12    c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index);
  13    return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
  14 }
  15
  16 static void release_tmps(struct brw_wm_compile *c)
  17 {
  18    c->tmp_index = 0;
  19 }
  20
  21
  22
  23 static int is_null( struct brw_reg reg )
  24 {
  25    return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  26            reg.nr == BRW_ARF_NULL);
  27 }
  28
  29 static void emit_pixel_xy( struct brw_wm_compile *c )
  30 {
  31    if (is_null(c->pixel_xy[0])) {
  32
  33       struct brw_compile *p = &c->func;
  34       struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
  35
  36       c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
  37       c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
  38
  39       /* Calculate pixel centers by adding 1 or 0 to each of the
  40        * micro-tile coordinates passed in r1.
  41        */
  42       brw_ADD(p,
  43               c->pixel_xy[0],
  44               stride(suboffset(r1_uw, 4), 2, 4, 0),
  45               brw_imm_v(0x10101010));
  46
  47       brw_ADD(p,
  48               c->pixel_xy[1],
  49               stride(suboffset(r1_uw, 5), 2, 4, 0),
  50               brw_imm_v(0x11001100));
  51    }
  52 }
  53
  54
  55
  56
  57
  58
  59 static void emit_delta_xy( struct brw_wm_compile *c )
  60 {
  61    if (is_null(c->delta_xy[0])) {
  62       struct brw_compile *p = &c->func;
  63       struct brw_reg r1 = brw_vec1_grf(1, 0);
  64
  65       emit_pixel_xy(c);
  66
  67       c->delta_xy[0] = alloc_tmp(c);
  68       c->delta_xy[1] = alloc_tmp(c);
  69
  70       /* Calc delta X,Y by subtracting origin in r1 from the pixel
  71        * centers.
  72        */
  73       brw_ADD(p,
  74               c->delta_xy[0],
  75               retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
  76               negate(r1));
  77
  78       brw_ADD(p,
  79               c->delta_xy[1],
  80               retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
  81               negate(suboffset(r1,1)));
  82    }
  83 }
  84
  85
  86
  87 #if 0
  88 static void emit_pixel_w( struct brw_wm_compile *c )
  89 {
  90    if (is_null(c->pixel_w)) {
  91       struct brw_compile *p = &c->func;
  92
  93       struct brw_reg interp_wpos = c->coef_wpos;
  94
  95       c->pixel_w = alloc_tmp(c);
  96
  97       emit_delta_xy(c);
  98
  99       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 100        * result straight into a message reg.
 101        */
 102       struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
 103       brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
 104       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
 105
 106       /* Calc w */
 107       brw_math_16( p,
 108                    c->pixel_w,
 109                    BRW_MATH_FUNCTION_INV,
 110                    BRW_MATH_SATURATE_NONE,
 111                    2,
 112                    brw_null_reg(),
 113                    BRW_MATH_PRECISION_FULL);
 114    }
 115 }
 116 #endif
 117
 118
 119 static void emit_cinterp(struct brw_wm_compile *c,
 120                          int idx,
 121                          int mask )
 122 {
 123    struct brw_compile *p = &c->func;
 124    struct brw_reg interp[4];
 125    struct brw_reg coef = c->payload_coef[idx];
 126    int i;
 127
 128    interp[0] = brw_vec1_grf(coef.nr, 0);
 129    interp[1] = brw_vec1_grf(coef.nr, 4);
 130    interp[2] = brw_vec1_grf(coef.nr+1, 0);
 131    interp[3] = brw_vec1_grf(coef.nr+1, 4);
 132
 133    for(i = 0; i < 4; i++ ) {
 134       if (mask & (1<<i)) {
 135          struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
 136          brw_MOV(p, dst, suboffset(interp[i],3));
 137       }
 138    }
 139 }
 140
 141 static void emit_linterp(struct brw_wm_compile *c,
 142                          int idx,
 143                          int mask )
 144 {
 145    struct brw_compile *p = &c->func;
 146    struct brw_reg interp[4];
 147    struct brw_reg coef = c->payload_coef[idx];
 148    int i;
 149
 150    emit_delta_xy(c);
 151
 152    interp[0] = brw_vec1_grf(coef.nr, 0);
 153    interp[1] = brw_vec1_grf(coef.nr, 4);
 154    interp[2] = brw_vec1_grf(coef.nr+1, 0);
 155    interp[3] = brw_vec1_grf(coef.nr+1, 4);
 156
 157    for(i = 0; i < 4; i++ ) {
 158       if (mask & (1<<i)) {
 159          struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
 160          brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
 161          brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
 162       }
 163    }
 164 }
 165
 166 #if 0
 167 static void emit_pinterp(struct brw_wm_compile *c,
 168                          int idx,
 169                          int mask )
 170 {
 171    struct brw_compile *p = &c->func;
 172    struct brw_reg interp[4];
 173    struct brw_reg coef = c->payload_coef[idx];
 174    int i;
 175
 176    get_delta_xy(c);
 177    get_pixel_w(c);
 178
 179    interp[0] = brw_vec1_grf(coef.nr, 0);
 180    interp[1] = brw_vec1_grf(coef.nr, 4);
 181    interp[2] = brw_vec1_grf(coef.nr+1, 0);
 182    interp[3] = brw_vec1_grf(coef.nr+1, 4);
 183
 184    for(i = 0; i < 4; i++ ) {
 185       if (mask & (1<<i)) {
 186          struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
 187          brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
 188          brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
 189          brw_MUL(p, dst, dst, c->pixel_w);
 190       }
 191    }
 192 }
 193 #endif
 194
 195
 196
 197 #if 0
 198 static void emit_wpos( )
 199 {
 200    struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
 201    struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
 202    struct tgsi_full_src_register deltas = get_delta_xy(c);
 203    struct tgsi_full_src_register arg2;
 204    unsigned opcode;
 205
 206    opcode = WM_LINTERP;
 207    arg2 = src_undef();
 208
 209    /* Have to treat wpos.xy specially:
 210     */
 211    emit_op(c,
 212            WM_WPOSXY,
 213            dst_mask(dst, WRITEMASK_XY),
 214            0, 0, 0,
 215            get_pixel_xy(c),
 216            src_undef(),
 217            src_undef());
 218
 219    dst = dst_mask(dst, WRITEMASK_ZW);
 220
 221    /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
 222     */
 223    emit_op(c,
 224            WM_LINTERP,
 225            dst,
 226            0, 0, 0,
 227            interp,
 228            deltas,
 229            arg2);
 230 }
 231 #endif
 232
 233
 234
 235
 236 /* Perform register allocation:
 237  *
 238  *  -- r0???
 239  *  -- passthrough depth regs (and stencil/aa??)
 240  *  -- curbe ??
 241  *  -- inputs (coefficients)
 242  *
 243  * Use a totally static register allocation.  This will perform poorly
 244  * but is an easy way to get started (again).
 245  */
 246 static void prealloc_reg(struct brw_wm_compile *c)
 247 {
 248    int i, j;
 249    int nr_curbe_regs = 0;
 250
 251    /* R0, then some depth related regs:
 252     */
 253    for (i = 0; i < c->key.nr_depth_regs; i++) {
 254       c->payload_depth[i] =  brw_vec8_grf(i*2, 0);
 255       c->reg_index += 2;
 256    }
 257
 258
 259    /* Then a copy of our part of the CURBE entry:
 260     */
 261    {
 262       int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1;
 263       int index = 0;
 264
 265       /* XXX number of constants, or highest numbered constant? */
 266       assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]);
 267
 268       c->prog_data.max_const = 4*nr_constants;
 269       for (i = 0; i < nr_constants; i++) {
 270          for (j = 0; j < 4; j++, index++)
 271             c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
 272                                                                 index%8);
 273       }
 274
 275       nr_curbe_regs = 2*((4*nr_constants+15)/16);
 276       c->reg_index += nr_curbe_regs;
 277    }
 278
 279    /* Adjust for parameter coefficients for position, which are
 280     * currently always provided.
 281     */
 282 //   c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
 283    c->reg_index += 2;
 284
 285    /* Next we receive the plane coefficients for parameter
 286     * interpolation:
 287     */
 288    assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs);
 289    for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
 290       c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
 291       c->reg_index += 2;
 292    }
 293
 294    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
 295    c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2;
 296    c->prog_data.curb_read_length = nr_curbe_regs;
 297
 298    /* That's the end of the payload, now we can start allocating registers.
 299     */
 300    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
 301    c->reg_index++;
 302
 303    c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
 304    c->reg_index += 2;
 305
 306    /* Now allocate room for the interpolated inputs and staging
 307     * registers for the outputs:
 308     */
 309    /* XXX do we want to loop over the _number_ of inputs/outputs or loop
 310     * to the highest input/output index that's used?
 311     *  Probably the same, actually.
 312     */
 313    assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs);
 314    assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs);
 315    for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++)
 316       for (j = 0; j < 4; j++)
 317          c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
 318
 319    for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++)
 320       for (j = 0; j < 4; j++)
 321          c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
 322
 323    /* Beyond this we should only need registers for internal temporaries:
 324     */
 325    c->tmp_start = c->reg_index;
 326 }
 327
 328
 329
 330
 331
 332 /* Need to interpolate fragment program inputs in as a preamble to the
 333  * shader.  A more sophisticated compiler would do this on demand, but
 334  * we'll do it up front:
 335  */
 336 void brw_wm_emit_decls(struct brw_wm_compile *c)
 337 {
 338    struct tgsi_parse_context parse;
 339    int done = 0;
 340
 341    prealloc_reg(c);
 342
 343    tgsi_parse_init( &parse, c->fp->program.tokens );
 344
 345    while( !done &&
 346           !tgsi_parse_end_of_tokens( &parse ) )
 347    {
 348       tgsi_parse_token( &parse );
 349
 350       switch( parse.FullToken.Token.Type ) {
 351       case TGSI_TOKEN_TYPE_DECLARATION:
 352       {
 353          const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
 354          unsigned first = decl->DeclarationRange.First;
 355          unsigned last = decl->DeclarationRange.Last;
 356          unsigned mask = decl->Declaration.UsageMask; /* ? */
 357          unsigned i;
 358
 359          if (decl->Declaration.File != TGSI_FILE_INPUT)
 360             break;
 361
 362          for( i = first; i <= last; i++ ) {
 363             switch (decl->Declaration.Interpolate) {
 364             case TGSI_INTERPOLATE_CONSTANT:
 365                emit_cinterp(c, i, mask);
 366                break;
 367
 368             case TGSI_INTERPOLATE_LINEAR:
 369                emit_linterp(c, i, mask);
 370                break;
 371
 372             case TGSI_INTERPOLATE_PERSPECTIVE:
 373                //emit_pinterp(c, i, mask);
 374                emit_linterp(c, i, mask);
 375                break;
 376             }
 377          }
 378          break;
 379       }
 380       case TGSI_TOKEN_TYPE_IMMEDIATE:
 381       case TGSI_TOKEN_TYPE_INSTRUCTION:
 382       default:
 383          done = 1;
 384          break;
 385       }
 386    }
 387
 388    tgsi_parse_free (&parse);
 389
 390    release_tmps(c);
 391 }