2 #include "brw_context.h"
5 #include "pipe/p_util.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_parse.h"
9 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
12 c
->reg_index
= MAX2(c
->reg_index
, c
->tmp_start
+ c
->tmp_index
);
13 return brw_vec8_grf(c
->tmp_start
+ c
->tmp_index
, 0);
16 static void release_tmps(struct brw_wm_compile
*c
)
23 static int is_null( struct brw_reg reg
)
25 return (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
26 reg
.nr
== BRW_ARF_NULL
);
29 static void emit_pixel_xy( struct brw_wm_compile
*c
)
31 if (is_null(c
->pixel_xy
[0])) {
33 struct brw_compile
*p
= &c
->func
;
34 struct brw_reg r1_uw
= retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW
);
36 c
->pixel_xy
[0] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
37 c
->pixel_xy
[1] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
39 /* Calculate pixel centers by adding 1 or 0 to each of the
40 * micro-tile coordinates passed in r1.
44 stride(suboffset(r1_uw
, 4), 2, 4, 0),
45 brw_imm_v(0x10101010));
49 stride(suboffset(r1_uw
, 5), 2, 4, 0),
50 brw_imm_v(0x11001100));
59 static void emit_delta_xy( struct brw_wm_compile
*c
)
61 if (is_null(c
->delta_xy
[0])) {
62 struct brw_compile
*p
= &c
->func
;
63 struct brw_reg r1
= brw_vec1_grf(1, 0);
67 c
->delta_xy
[0] = alloc_tmp(c
);
68 c
->delta_xy
[1] = alloc_tmp(c
);
70 /* Calc delta X,Y by subtracting origin in r1 from the pixel
75 retype(c
->pixel_xy
[0], BRW_REGISTER_TYPE_UW
),
80 retype(c
->pixel_xy
[1], BRW_REGISTER_TYPE_UW
),
81 negate(suboffset(r1
,1)));
88 static void emit_pixel_w( struct brw_wm_compile
*c
)
90 if (is_null(c
->pixel_w
)) {
91 struct brw_compile
*p
= &c
->func
;
93 struct brw_reg interp_wpos
= c
->coef_wpos
;
95 c
->pixel_w
= alloc_tmp(c
);
99 /* Calc 1/w - just linterp wpos[3] optimized by putting the
100 * result straight into a message reg.
102 struct brw_reg interp3
= brw_vec1_grf(interp_wpos
.nr
+1, 4);
103 brw_LINE(p
, brw_null_reg(), interp3
, c
->delta_xy
[0]);
104 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), c
->delta_xy
[1]);
109 BRW_MATH_FUNCTION_INV
,
110 BRW_MATH_SATURATE_NONE
,
113 BRW_MATH_PRECISION_FULL
);
119 static void emit_cinterp(struct brw_wm_compile
*c
,
123 struct brw_compile
*p
= &c
->func
;
124 struct brw_reg interp
[4];
125 struct brw_reg coef
= c
->payload_coef
[idx
];
128 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
129 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
130 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
131 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
133 for(i
= 0; i
< 4; i
++ ) {
135 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
136 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
141 static void emit_linterp(struct brw_wm_compile
*c
,
145 struct brw_compile
*p
= &c
->func
;
146 struct brw_reg interp
[4];
147 struct brw_reg coef
= c
->payload_coef
[idx
];
152 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
153 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
154 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
155 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
157 for(i
= 0; i
< 4; i
++ ) {
159 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
160 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
161 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
167 static void emit_pinterp(struct brw_wm_compile
*c
,
171 struct brw_compile
*p
= &c
->func
;
172 struct brw_reg interp
[4];
173 struct brw_reg coef
= c
->payload_coef
[idx
];
179 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
180 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
181 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
182 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
184 for(i
= 0; i
< 4; i
++ ) {
186 struct brw_reg dst
= allocate_reg(c
, TGSI_FILE_INPUT
, idx
, i
);
187 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
188 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
189 brw_MUL(p
, dst
, dst
, c
->pixel_w
);
198 static void emit_wpos( )
200 struct prog_dst_register dst
= dst_reg(PROGRAM_INPUT
, idx
);
201 struct tgsi_full_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
202 struct tgsi_full_src_register deltas
= get_delta_xy(c
);
203 struct tgsi_full_src_register arg2
;
209 /* Have to treat wpos.xy specially:
213 dst_mask(dst
, WRITEMASK_XY
),
219 dst
= dst_mask(dst
, WRITEMASK_ZW
);
221 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
236 /* Perform register allocation:
239 * -- passthrough depth regs (and stencil/aa??)
241 * -- inputs (coefficients)
243 * Use a totally static register allocation. This will perform poorly
244 * but is an easy way to get started (again).
246 static void prealloc_reg(struct brw_wm_compile
*c
)
249 int nr_curbe_regs
= 0;
251 /* R0, then some depth related regs:
253 for (i
= 0; i
< c
->key
.nr_depth_regs
; i
++) {
254 c
->payload_depth
[i
] = brw_vec8_grf(i
*2, 0);
259 /* Then a copy of our part of the CURBE entry:
262 int nr_constants
= c
->fp
->info
.file_max
[TGSI_FILE_CONSTANT
] + 1;
265 /* XXX number of constants, or highest numbered constant? */
266 assert(nr_constants
== c
->fp
->info
.file_count
[TGSI_FILE_CONSTANT
]);
268 c
->prog_data
.max_const
= 4*nr_constants
;
269 for (i
= 0; i
< nr_constants
; i
++) {
270 for (j
= 0; j
< 4; j
++, index
++)
271 c
->wm_regs
[TGSI_FILE_CONSTANT
][i
][j
] = brw_vec1_grf(c
->reg_index
+ index
/8,
275 nr_curbe_regs
= 2*((4*nr_constants
+15)/16);
276 c
->reg_index
+= nr_curbe_regs
;
279 /* Adjust for parameter coefficients for position, which are
280 * currently always provided.
282 // c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
285 /* Next we receive the plane coefficients for parameter
288 assert(c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] == c
->fp
->info
.num_inputs
);
289 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1; i
++) {
290 c
->payload_coef
[i
] = brw_vec8_grf(c
->reg_index
, 0);
294 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
295 c
->prog_data
.urb_read_length
= (c
->fp
->info
.num_inputs
+ 1) * 2;
296 c
->prog_data
.curb_read_length
= nr_curbe_regs
;
298 /* That's the end of the payload, now we can start allocating registers.
300 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
303 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
306 /* Now allocate room for the interpolated inputs and staging
307 * registers for the outputs:
309 /* XXX do we want to loop over the _number_ of inputs/outputs or loop
310 * to the highest input/output index that's used?
311 * Probably the same, actually.
313 assert(c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1 == c
->fp
->info
.num_inputs
);
314 assert(c
->fp
->info
.file_max
[TGSI_FILE_OUTPUT
] + 1 == c
->fp
->info
.num_outputs
);
315 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1; i
++)
316 for (j
= 0; j
< 4; j
++)
317 c
->wm_regs
[TGSI_FILE_INPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
319 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_OUTPUT
] + 1; i
++)
320 for (j
= 0; j
< 4; j
++)
321 c
->wm_regs
[TGSI_FILE_OUTPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
323 /* Beyond this we should only need registers for internal temporaries:
325 c
->tmp_start
= c
->reg_index
;
332 /* Need to interpolate fragment program inputs in as a preamble to the
333 * shader. A more sophisticated compiler would do this on demand, but
334 * we'll do it up front:
336 void brw_wm_emit_decls(struct brw_wm_compile
*c
)
338 struct tgsi_parse_context parse
;
343 tgsi_parse_init( &parse
, c
->fp
->program
.tokens
);
346 !tgsi_parse_end_of_tokens( &parse
) )
348 tgsi_parse_token( &parse
);
350 switch( parse
.FullToken
.Token
.Type
) {
351 case TGSI_TOKEN_TYPE_DECLARATION
:
353 const struct tgsi_full_declaration
*decl
= &parse
.FullToken
.FullDeclaration
;
354 unsigned first
= decl
->DeclarationRange
.First
;
355 unsigned last
= decl
->DeclarationRange
.Last
;
356 unsigned mask
= decl
->Declaration
.UsageMask
; /* ? */
359 if (decl
->Declaration
.File
!= TGSI_FILE_INPUT
)
362 for( i
= first
; i
<= last
; i
++ ) {
363 switch (decl
->Declaration
.Interpolate
) {
364 case TGSI_INTERPOLATE_CONSTANT
:
365 emit_cinterp(c
, i
, mask
);
368 case TGSI_INTERPOLATE_LINEAR
:
369 emit_linterp(c
, i
, mask
);
372 case TGSI_INTERPOLATE_PERSPECTIVE
:
373 //emit_pinterp(c, i, mask);
374 emit_linterp(c
, i
, mask
);
380 case TGSI_TOKEN_TYPE_IMMEDIATE
:
381 case TGSI_TOKEN_TYPE_INSTRUCTION
:
388 tgsi_parse_free (&parse
);