2 #include "brw_context.h"
5 #include "pipe/p_util.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "pipe/tgsi/util/tgsi_parse.h"
9 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
12 c
->reg_index
= MAX2(c
->reg_index
, c
->tmp_start
+ c
->tmp_index
);
13 return brw_vec8_grf(c
->tmp_start
+ c
->tmp_index
, 0);
16 static void release_tmps(struct brw_wm_compile
*c
)
23 static int is_null( struct brw_reg reg
)
25 return (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
26 reg
.nr
== BRW_ARF_NULL
);
29 static void emit_pixel_xy( struct brw_wm_compile
*c
)
31 if (is_null(c
->pixel_xy
[0])) {
33 struct brw_compile
*p
= &c
->func
;
34 struct brw_reg r1_uw
= retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW
);
36 c
->pixel_xy
[0] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
37 c
->pixel_xy
[1] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
39 /* Calculate pixel centers by adding 1 or 0 to each of the
40 * micro-tile coordinates passed in r1.
44 stride(suboffset(r1_uw
, 4), 2, 4, 0),
45 brw_imm_v(0x10101010));
49 stride(suboffset(r1_uw
, 5), 2, 4, 0),
50 brw_imm_v(0x11001100));
59 static void emit_delta_xy( struct brw_wm_compile
*c
)
61 if (is_null(c
->delta_xy
[0])) {
62 struct brw_compile
*p
= &c
->func
;
63 struct brw_reg r1
= brw_vec1_grf(1, 0);
67 c
->delta_xy
[0] = alloc_tmp(c
);
68 c
->delta_xy
[1] = alloc_tmp(c
);
70 /* Calc delta X,Y by subtracting origin in r1 from the pixel
75 retype(c
->pixel_xy
[0], BRW_REGISTER_TYPE_UW
),
80 retype(c
->pixel_xy
[1], BRW_REGISTER_TYPE_UW
),
81 negate(suboffset(r1
,1)));
88 static void emit_pixel_w( struct brw_wm_compile
*c
)
90 if (is_null(c
->pixel_w
)) {
91 struct brw_compile
*p
= &c
->func
;
93 struct brw_reg interp_wpos
= c
->coef_wpos
;
95 c
->pixel_w
= alloc_tmp(c
);
99 /* Calc 1/w - just linterp wpos[3] optimized by putting the
100 * result straight into a message reg.
102 struct brw_reg interp3
= brw_vec1_grf(interp_wpos
.nr
+1, 4);
103 brw_LINE(p
, brw_null_reg(), interp3
, c
->delta_xy
[0]);
104 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), c
->delta_xy
[1]);
109 BRW_MATH_FUNCTION_INV
,
110 BRW_MATH_SATURATE_NONE
,
113 BRW_MATH_PRECISION_FULL
);
119 static void emit_cinterp(struct brw_wm_compile
*c
,
123 struct brw_compile
*p
= &c
->func
;
124 struct brw_reg interp
[4];
125 struct brw_reg coef
= c
->payload_coef
[idx
];
128 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
129 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
130 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
131 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
133 for(i
= 0; i
< 4; i
++ ) {
135 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
136 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
141 static void emit_linterp(struct brw_wm_compile
*c
,
145 struct brw_compile
*p
= &c
->func
;
146 struct brw_reg interp
[4];
147 struct brw_reg coef
= c
->payload_coef
[idx
];
152 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
153 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
154 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
155 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
157 for(i
= 0; i
< 4; i
++ ) {
159 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
160 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
161 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
167 static void emit_pinterp(struct brw_wm_compile
*c
,
171 struct brw_compile
*p
= &c
->func
;
172 struct brw_reg interp
[4];
173 struct brw_reg coef
= c
->payload_coef
[idx
];
179 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
180 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
181 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
182 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
184 for(i
= 0; i
< 4; i
++ ) {
186 struct brw_reg dst
= allocate_reg(c
, TGSI_FILE_INPUT
, idx
, i
);
187 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
188 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
189 brw_MUL(p
, dst
, dst
, c
->pixel_w
);
198 static void emit_wpos( )
200 struct prog_dst_register dst
= dst_reg(PROGRAM_INPUT
, idx
);
201 struct tgsi_full_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
202 struct tgsi_full_src_register deltas
= get_delta_xy(c
);
203 struct tgsi_full_src_register arg2
;
209 /* Have to treat wpos.xy specially:
213 dst_mask(dst
, WRITEMASK_XY
),
219 dst
= dst_mask(dst
, WRITEMASK_ZW
);
221 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
236 /* Perform register allocation:
239 * -- passthrough depth regs (and stencil/aa??)
241 * -- inputs (coefficients)
243 * Use a totally static register allocation. This will perform poorly
244 * but is an easy way to get started (again).
246 static void prealloc_reg(struct brw_wm_compile
*c
)
249 int nr_curbe_regs
= 0;
251 /* R0, then some depth related regs:
253 for (i
= 0; i
< c
->key
.nr_depth_regs
; i
++) {
254 c
->payload_depth
[i
] = brw_vec8_grf(i
*2, 0);
259 /* Then a copy of our part of the CURBE entry:
262 int nr_constants
= c
->fp
->info
.nr_regs
[TGSI_FILE_CONSTANT
];
265 c
->prog_data
.max_const
= 4*nr_constants
;
266 for (i
= 0; i
< nr_constants
; i
++) {
267 for (j
= 0; j
< 4; j
++, index
++)
268 c
->wm_regs
[TGSI_FILE_CONSTANT
][i
][j
] = brw_vec1_grf(c
->reg_index
+ index
/8,
272 nr_curbe_regs
= 2*((4*nr_constants
+15)/16);
273 c
->reg_index
+= nr_curbe_regs
;
276 /* Adjust for parameter coefficients for position, which are
277 * currently always provided.
279 // c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
282 /* Next we receive the plane coefficients for parameter
285 for (i
= 0; i
< c
->fp
->info
.nr_regs
[TGSI_FILE_INPUT
]; i
++) {
286 c
->payload_coef
[i
] = brw_vec8_grf(c
->reg_index
, 0);
290 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
291 c
->prog_data
.urb_read_length
= (c
->fp
->program
.num_inputs
+ 1) * 2;
292 c
->prog_data
.curb_read_length
= nr_curbe_regs
;
294 /* That's the end of the payload, now we can start allocating registers.
296 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
299 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
302 /* Now allocate room for the interpolated inputs and staging
303 * registers for the outputs:
305 for (i
= 0; i
< c
->fp
->info
.nr_regs
[TGSI_FILE_INPUT
]; i
++)
306 for (j
= 0; j
< 4; j
++)
307 c
->wm_regs
[TGSI_FILE_INPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
309 for (i
= 0; i
< c
->fp
->info
.nr_regs
[TGSI_FILE_OUTPUT
]; i
++)
310 for (j
= 0; j
< 4; j
++)
311 c
->wm_regs
[TGSI_FILE_OUTPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
313 /* Beyond this we should only need registers for internal temporaries:
315 c
->tmp_start
= c
->reg_index
;
322 /* Need to interpolate fragment program inputs in as a preamble to the
323 * shader. A more sophisticated compiler would do this on demand, but
324 * we'll do it up front:
326 void brw_wm_emit_decls(struct brw_wm_compile
*c
)
328 struct tgsi_parse_context parse
;
333 tgsi_parse_init( &parse
, c
->fp
->program
.tokens
);
336 !tgsi_parse_end_of_tokens( &parse
) )
338 tgsi_parse_token( &parse
);
340 switch( parse
.FullToken
.Token
.Type
) {
341 case TGSI_TOKEN_TYPE_DECLARATION
:
343 const struct tgsi_full_declaration
*decl
= &parse
.FullToken
.FullDeclaration
;
344 unsigned first
= decl
->u
.DeclarationRange
.First
;
345 unsigned last
= decl
->u
.DeclarationRange
.Last
;
346 unsigned mask
= decl
->Declaration
.UsageMask
; /* ? */
349 if (decl
->Declaration
.File
!= TGSI_FILE_INPUT
)
352 assert(decl
->Declaration
.Interpolate
);
354 for( i
= first
; i
<= last
; i
++ ) {
355 switch (decl
->Interpolation
.Interpolate
) {
356 case TGSI_INTERPOLATE_CONSTANT
:
357 emit_cinterp(c
, i
, mask
);
360 case TGSI_INTERPOLATE_LINEAR
:
361 emit_linterp(c
, i
, mask
);
364 case TGSI_INTERPOLATE_PERSPECTIVE
:
365 //emit_pinterp(c, i, mask);
366 emit_linterp(c
, i
, mask
);
372 case TGSI_TOKEN_TYPE_IMMEDIATE
:
373 case TGSI_TOKEN_TYPE_INSTRUCTION
:
380 tgsi_parse_free (&parse
);