2 #include "brw_context.h"
5 #include "util/u_math.h"
6 #include "util/u_memory.h"
7 #include "pipe/p_shader_tokens.h"
8 #include "tgsi/tgsi_parse.h"
10 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
13 c
->reg_index
= MAX2(c
->reg_index
, c
->tmp_start
+ c
->tmp_index
);
14 return brw_vec8_grf(c
->tmp_start
+ c
->tmp_index
, 0);
17 static void release_tmps(struct brw_wm_compile
*c
)
24 static int is_null( struct brw_reg reg
)
26 return (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
27 reg
.nr
== BRW_ARF_NULL
);
30 static void emit_pixel_xy( struct brw_wm_compile
*c
)
32 if (is_null(c
->pixel_xy
[0])) {
34 struct brw_compile
*p
= &c
->func
;
35 struct brw_reg r1_uw
= retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW
);
37 c
->pixel_xy
[0] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
38 c
->pixel_xy
[1] = vec8(retype(alloc_tmp(c
), BRW_REGISTER_TYPE_UW
));
40 /* Calculate pixel centers by adding 1 or 0 to each of the
41 * micro-tile coordinates passed in r1.
45 stride(suboffset(r1_uw
, 4), 2, 4, 0),
46 brw_imm_v(0x10101010));
50 stride(suboffset(r1_uw
, 5), 2, 4, 0),
51 brw_imm_v(0x11001100));
60 static void emit_delta_xy( struct brw_wm_compile
*c
)
62 if (is_null(c
->delta_xy
[0])) {
63 struct brw_compile
*p
= &c
->func
;
64 struct brw_reg r1
= brw_vec1_grf(1, 0);
68 c
->delta_xy
[0] = alloc_tmp(c
);
69 c
->delta_xy
[1] = alloc_tmp(c
);
71 /* Calc delta X,Y by subtracting origin in r1 from the pixel
76 retype(c
->pixel_xy
[0], BRW_REGISTER_TYPE_UW
),
81 retype(c
->pixel_xy
[1], BRW_REGISTER_TYPE_UW
),
82 negate(suboffset(r1
,1)));
89 static void emit_pixel_w( struct brw_wm_compile
*c
)
91 if (is_null(c
->pixel_w
)) {
92 struct brw_compile
*p
= &c
->func
;
94 struct brw_reg interp_wpos
= c
->coef_wpos
;
96 c
->pixel_w
= alloc_tmp(c
);
100 /* Calc 1/w - just linterp wpos[3] optimized by putting the
101 * result straight into a message reg.
103 struct brw_reg interp3
= brw_vec1_grf(interp_wpos
.nr
+1, 4);
104 brw_LINE(p
, brw_null_reg(), interp3
, c
->delta_xy
[0]);
105 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), c
->delta_xy
[1]);
110 BRW_MATH_FUNCTION_INV
,
111 BRW_MATH_SATURATE_NONE
,
114 BRW_MATH_PRECISION_FULL
);
120 static void emit_cinterp(struct brw_wm_compile
*c
,
124 struct brw_compile
*p
= &c
->func
;
125 struct brw_reg interp
[4];
126 struct brw_reg coef
= c
->payload_coef
[idx
];
129 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
130 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
131 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
132 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
134 for(i
= 0; i
< 4; i
++ ) {
136 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
137 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
142 static void emit_linterp(struct brw_wm_compile
*c
,
146 struct brw_compile
*p
= &c
->func
;
147 struct brw_reg interp
[4];
148 struct brw_reg coef
= c
->payload_coef
[idx
];
153 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
154 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
155 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
156 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
158 for(i
= 0; i
< 4; i
++ ) {
160 struct brw_reg dst
= c
->wm_regs
[TGSI_FILE_INPUT
][idx
][i
];
161 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
162 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
168 static void emit_pinterp(struct brw_wm_compile
*c
,
172 struct brw_compile
*p
= &c
->func
;
173 struct brw_reg interp
[4];
174 struct brw_reg coef
= c
->payload_coef
[idx
];
180 interp
[0] = brw_vec1_grf(coef
.nr
, 0);
181 interp
[1] = brw_vec1_grf(coef
.nr
, 4);
182 interp
[2] = brw_vec1_grf(coef
.nr
+1, 0);
183 interp
[3] = brw_vec1_grf(coef
.nr
+1, 4);
185 for(i
= 0; i
< 4; i
++ ) {
187 struct brw_reg dst
= allocate_reg(c
, TGSI_FILE_INPUT
, idx
, i
);
188 brw_LINE(p
, brw_null_reg(), interp
[i
], c
->delta_xy
[0]);
189 brw_MAC(p
, dst
, suboffset(interp
[i
],1), c
->delta_xy
[1]);
190 brw_MUL(p
, dst
, dst
, c
->pixel_w
);
199 static void emit_wpos( )
201 struct prog_dst_register dst
= dst_reg(PROGRAM_INPUT
, idx
);
202 struct tgsi_full_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
203 struct tgsi_full_src_register deltas
= get_delta_xy(c
);
204 struct tgsi_full_src_register arg2
;
210 /* Have to treat wpos.xy specially:
214 dst_mask(dst
, WRITEMASK_XY
),
220 dst
= dst_mask(dst
, WRITEMASK_ZW
);
222 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
237 /* Perform register allocation:
240 * -- passthrough depth regs (and stencil/aa??)
242 * -- inputs (coefficients)
244 * Use a totally static register allocation. This will perform poorly
245 * but is an easy way to get started (again).
247 static void prealloc_reg(struct brw_wm_compile
*c
)
250 int nr_curbe_regs
= 0;
252 /* R0, then some depth related regs:
254 for (i
= 0; i
< c
->key
.nr_depth_regs
; i
++) {
255 c
->payload_depth
[i
] = brw_vec8_grf(i
*2, 0);
260 /* Then a copy of our part of the CURBE entry:
263 int nr_constants
= c
->fp
->info
.file_max
[TGSI_FILE_CONSTANT
] + 1;
266 /* XXX number of constants, or highest numbered constant? */
267 assert(nr_constants
== c
->fp
->info
.file_count
[TGSI_FILE_CONSTANT
]);
269 c
->prog_data
.max_const
= 4*nr_constants
;
270 for (i
= 0; i
< nr_constants
; i
++) {
271 for (j
= 0; j
< 4; j
++, index
++)
272 c
->wm_regs
[TGSI_FILE_CONSTANT
][i
][j
] = brw_vec1_grf(c
->reg_index
+ index
/8,
276 nr_curbe_regs
= 2*((4*nr_constants
+15)/16);
277 c
->reg_index
+= nr_curbe_regs
;
280 /* Adjust for parameter coefficients for position, which are
281 * currently always provided.
283 // c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
286 /* Next we receive the plane coefficients for parameter
289 assert(c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] == c
->fp
->info
.num_inputs
);
290 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1; i
++) {
291 c
->payload_coef
[i
] = brw_vec8_grf(c
->reg_index
, 0);
295 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
296 c
->prog_data
.urb_read_length
= (c
->fp
->info
.num_inputs
+ 1) * 2;
297 c
->prog_data
.curb_read_length
= nr_curbe_regs
;
299 /* That's the end of the payload, now we can start allocating registers.
301 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
304 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
307 /* Now allocate room for the interpolated inputs and staging
308 * registers for the outputs:
310 /* XXX do we want to loop over the _number_ of inputs/outputs or loop
311 * to the highest input/output index that's used?
312 * Probably the same, actually.
314 assert(c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1 == c
->fp
->info
.num_inputs
);
315 assert(c
->fp
->info
.file_max
[TGSI_FILE_OUTPUT
] + 1 == c
->fp
->info
.num_outputs
);
316 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_INPUT
] + 1; i
++)
317 for (j
= 0; j
< 4; j
++)
318 c
->wm_regs
[TGSI_FILE_INPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
320 for (i
= 0; i
< c
->fp
->info
.file_max
[TGSI_FILE_OUTPUT
] + 1; i
++)
321 for (j
= 0; j
< 4; j
++)
322 c
->wm_regs
[TGSI_FILE_OUTPUT
][i
][j
] = brw_vec8_grf( c
->reg_index
++, 0 );
324 /* Beyond this we should only need registers for internal temporaries:
326 c
->tmp_start
= c
->reg_index
;
333 /* Need to interpolate fragment program inputs in as a preamble to the
334 * shader. A more sophisticated compiler would do this on demand, but
335 * we'll do it up front:
337 void brw_wm_emit_decls(struct brw_wm_compile
*c
)
339 struct tgsi_parse_context parse
;
344 tgsi_parse_init( &parse
, c
->fp
->program
.tokens
);
347 !tgsi_parse_end_of_tokens( &parse
) )
349 tgsi_parse_token( &parse
);
351 switch( parse
.FullToken
.Token
.Type
) {
352 case TGSI_TOKEN_TYPE_DECLARATION
:
354 const struct tgsi_full_declaration
*decl
= &parse
.FullToken
.FullDeclaration
;
355 unsigned first
= decl
->DeclarationRange
.First
;
356 unsigned last
= decl
->DeclarationRange
.Last
;
357 unsigned mask
= decl
->Declaration
.UsageMask
; /* ? */
360 if (decl
->Declaration
.File
!= TGSI_FILE_INPUT
)
363 for( i
= first
; i
<= last
; i
++ ) {
364 switch (decl
->Declaration
.Interpolate
) {
365 case TGSI_INTERPOLATE_CONSTANT
:
366 emit_cinterp(c
, i
, mask
);
369 case TGSI_INTERPOLATE_LINEAR
:
370 emit_linterp(c
, i
, mask
);
373 case TGSI_INTERPOLATE_PERSPECTIVE
:
374 //emit_pinterp(c, i, mask);
375 emit_linterp(c
, i
, mask
);
381 case TGSI_TOKEN_TYPE_IMMEDIATE
:
382 case TGSI_TOKEN_TYPE_INSTRUCTION
:
389 tgsi_parse_free (&parse
);