2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
36 #include "brw_structs.h"
37 #include "brw_defines.h"
39 #include "pipe/p_compiler.h"
40 #include "pipe/p_shader_tokens.h"
42 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
43 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
45 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
46 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
47 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
48 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
51 #define REG_SIZE (8*4)
54 /* These aren't hardware structs, just something useful for us to pass around:
56 * Align1 operation has a lot of control over input ranges. Used in
57 * WM programs to implement shaders decomposed into "channel serial"
58 * or "structure of array" form:
65 unsigned subnr
:5; /* :1 in align16 */
66 unsigned negate
:1; /* source only */
67 unsigned abs
:1; /* source only */
68 unsigned vstride
:4; /* source only */
69 unsigned width
:3; /* src only, align1 only */
70 unsigned hstride
:2; /* src only, align1 only */
71 unsigned address_mode
:1; /* relative addressing, hopefully! */
76 unsigned swizzle
:8; /* src only, align16 only */
77 unsigned writemask
:4; /* dest only, align16 only */
78 int indirect_offset
:10; /* relative addressing offset */
79 unsigned pad1
:10; /* two dwords total */
90 unsigned addr_subnr
:4;
96 #define BRW_EU_MAX_INSN_STACK 5
97 #define BRW_EU_MAX_INSN 1200
100 struct brw_instruction store
[BRW_EU_MAX_INSN
];
103 /* Allow clients to push/pop instruction state:
105 struct brw_instruction stack
[BRW_EU_MAX_INSN_STACK
];
106 struct brw_instruction
*current
;
109 boolean single_program_flow
;
114 static __inline
int type_sz( unsigned type
)
117 case BRW_REGISTER_TYPE_UD
:
118 case BRW_REGISTER_TYPE_D
:
119 case BRW_REGISTER_TYPE_F
:
121 case BRW_REGISTER_TYPE_HF
:
122 case BRW_REGISTER_TYPE_UW
:
123 case BRW_REGISTER_TYPE_W
:
125 case BRW_REGISTER_TYPE_UB
:
126 case BRW_REGISTER_TYPE_B
:
133 static __inline
struct brw_reg
brw_reg( unsigned file
,
148 reg
.subnr
= subnr
* type_sz(type
);
151 reg
.vstride
= vstride
;
153 reg
.hstride
= hstride
;
154 reg
.address_mode
= BRW_ADDRESS_DIRECT
;
157 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
158 * set swizzle and writemask to W, as the lower bits of subnr will
159 * be lost when converted to align16. This is probably too much to
160 * keep track of as you'd want it adjusted by suboffset(), etc.
161 * Perhaps fix up when converting to align16?
163 reg
.dw1
.bits
.swizzle
= swizzle
;
164 reg
.dw1
.bits
.writemask
= writemask
;
165 reg
.dw1
.bits
.indirect_offset
= 0;
166 reg
.dw1
.bits
.pad1
= 0;
170 static __inline
struct brw_reg
brw_vec16_reg( unsigned file
,
178 BRW_VERTICAL_STRIDE_16
,
180 BRW_HORIZONTAL_STRIDE_1
,
182 TGSI_WRITEMASK_XYZW
);
185 static __inline
struct brw_reg
brw_vec8_reg( unsigned file
,
193 BRW_VERTICAL_STRIDE_8
,
195 BRW_HORIZONTAL_STRIDE_1
,
197 TGSI_WRITEMASK_XYZW
);
201 static __inline
struct brw_reg
brw_vec4_reg( unsigned file
,
209 BRW_VERTICAL_STRIDE_4
,
211 BRW_HORIZONTAL_STRIDE_1
,
213 TGSI_WRITEMASK_XYZW
);
217 static __inline
struct brw_reg
brw_vec2_reg( unsigned file
,
225 BRW_VERTICAL_STRIDE_2
,
227 BRW_HORIZONTAL_STRIDE_1
,
232 static __inline
struct brw_reg
brw_vec1_reg( unsigned file
,
240 BRW_VERTICAL_STRIDE_0
,
242 BRW_HORIZONTAL_STRIDE_0
,
248 static __inline
struct brw_reg
retype( struct brw_reg reg
,
255 static __inline
struct brw_reg
suboffset( struct brw_reg reg
,
258 reg
.subnr
+= delta
* type_sz(reg
.type
);
263 static __inline
struct brw_reg
offset( struct brw_reg reg
,
271 static __inline
struct brw_reg
byte_offset( struct brw_reg reg
,
274 unsigned newoffset
= reg
.nr
* REG_SIZE
+ reg
.subnr
+ bytes
;
275 reg
.nr
= newoffset
/ REG_SIZE
;
276 reg
.subnr
= newoffset
% REG_SIZE
;
281 static __inline
struct brw_reg
brw_uw16_reg( unsigned file
,
285 return suboffset(retype(brw_vec16_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
288 static __inline
struct brw_reg
brw_uw8_reg( unsigned file
,
292 return suboffset(retype(brw_vec8_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
295 static __inline
struct brw_reg
brw_uw1_reg( unsigned file
,
299 return suboffset(retype(brw_vec1_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
302 static __inline
struct brw_reg
brw_imm_reg( unsigned type
)
304 return brw_reg( BRW_IMMEDIATE_VALUE
,
308 BRW_VERTICAL_STRIDE_0
,
310 BRW_HORIZONTAL_STRIDE_0
,
315 static __inline
struct brw_reg
brw_imm_f( float f
)
317 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_F
);
322 static __inline
struct brw_reg
brw_imm_d( int d
)
324 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_D
);
329 static __inline
struct brw_reg
brw_imm_ud( unsigned ud
)
331 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UD
);
336 static __inline
struct brw_reg
brw_imm_uw( ushort uw
)
338 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UW
);
343 static __inline
struct brw_reg
brw_imm_w( short w
)
345 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_W
);
350 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
351 * numbers alias with _V and _VF below:
354 /* Vector of eight signed half-byte values:
356 static __inline
struct brw_reg
brw_imm_v( unsigned v
)
358 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_V
);
359 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
360 imm
.width
= BRW_WIDTH_8
;
361 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
366 /* Vector of four 8-bit float values:
368 static __inline
struct brw_reg
brw_imm_vf( unsigned v
)
370 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
371 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
372 imm
.width
= BRW_WIDTH_4
;
373 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
380 #define VF_NEG (1<<7)
382 static __inline
struct brw_reg
brw_imm_vf4( unsigned v0
,
387 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
388 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
389 imm
.width
= BRW_WIDTH_4
;
390 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
391 imm
.dw1
.ud
= ((v0
<< 0) |
399 static __inline
struct brw_reg
brw_address( struct brw_reg reg
)
401 return brw_imm_uw(reg
.nr
* REG_SIZE
+ reg
.subnr
);
405 static __inline
struct brw_reg
brw_vec1_grf( unsigned nr
,
408 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
411 static __inline
struct brw_reg
brw_vec8_grf( unsigned nr
,
414 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
417 static __inline
struct brw_reg
brw_vec4_grf( unsigned nr
,
420 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
424 static __inline
struct brw_reg
brw_vec2_grf( unsigned nr
,
427 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
430 static __inline
struct brw_reg
brw_uw8_grf( unsigned nr
,
433 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
436 static __inline
struct brw_reg
brw_null_reg( void )
438 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
443 static __inline
struct brw_reg
brw_address_reg( unsigned subnr
)
445 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
450 /* If/else instructions break in align16 mode if writemask & swizzle
451 * aren't xyzw. This goes against the convention for other scalar
454 static __inline
struct brw_reg
brw_ip_reg( void )
456 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
459 BRW_REGISTER_TYPE_UD
,
460 BRW_VERTICAL_STRIDE_4
, /* ? */
462 BRW_HORIZONTAL_STRIDE_0
,
463 BRW_SWIZZLE_XYZW
, /* NOTE! */
464 TGSI_WRITEMASK_XYZW
); /* NOTE! */
467 static __inline
struct brw_reg
brw_acc_reg( void )
469 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
475 static __inline
struct brw_reg
brw_flag_reg( void )
477 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
483 static __inline
struct brw_reg
brw_mask_reg( unsigned subnr
)
485 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
490 static __inline
struct brw_reg
brw_message_reg( unsigned nr
)
492 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
,
500 /* This is almost always called with a numeric constant argument, so
501 * make things easy to evaluate at compile time:
503 static __inline
unsigned cvt( unsigned val
)
517 static __inline
struct brw_reg
stride( struct brw_reg reg
,
523 reg
.vstride
= cvt(vstride
);
524 reg
.width
= cvt(width
) - 1;
525 reg
.hstride
= cvt(hstride
);
529 static __inline
struct brw_reg
vec16( struct brw_reg reg
)
531 return stride(reg
, 16,16,1);
534 static __inline
struct brw_reg
vec8( struct brw_reg reg
)
536 return stride(reg
, 8,8,1);
539 static __inline
struct brw_reg
vec4( struct brw_reg reg
)
541 return stride(reg
, 4,4,1);
544 static __inline
struct brw_reg
vec2( struct brw_reg reg
)
546 return stride(reg
, 2,2,1);
549 static __inline
struct brw_reg
vec1( struct brw_reg reg
)
551 return stride(reg
, 0,1,0);
554 static __inline
struct brw_reg
get_element( struct brw_reg reg
, unsigned elt
)
556 return vec1(suboffset(reg
, elt
));
559 static __inline
struct brw_reg
get_element_ud( struct brw_reg reg
, unsigned elt
)
561 return vec1(suboffset(retype(reg
, BRW_REGISTER_TYPE_UD
), elt
));
565 static __inline
struct brw_reg
brw_swizzle( struct brw_reg reg
,
571 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, x
),
572 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, y
),
573 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, z
),
574 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, w
));
579 static __inline
struct brw_reg
brw_swizzle1( struct brw_reg reg
,
582 return brw_swizzle(reg
, x
, x
, x
, x
);
585 static __inline
struct brw_reg
brw_writemask( struct brw_reg reg
,
588 reg
.dw1
.bits
.writemask
&= mask
;
592 static __inline
struct brw_reg
brw_set_writemask( struct brw_reg reg
,
595 reg
.dw1
.bits
.writemask
= mask
;
599 static __inline
struct brw_reg
negate( struct brw_reg reg
)
605 static __inline
struct brw_reg
brw_abs( struct brw_reg reg
)
611 /***********************************************************************
613 static __inline
struct brw_reg
brw_vec4_indirect( unsigned subnr
,
616 struct brw_reg reg
= brw_vec4_grf(0, 0);
618 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
619 reg
.dw1
.bits
.indirect_offset
= offset
;
623 static __inline
struct brw_reg
brw_vec1_indirect( unsigned subnr
,
626 struct brw_reg reg
= brw_vec1_grf(0, 0);
628 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
629 reg
.dw1
.bits
.indirect_offset
= offset
;
633 static __inline
struct brw_reg
deref_4f(struct brw_indirect ptr
, int offset
)
635 return brw_vec4_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
638 static __inline
struct brw_reg
deref_1f(struct brw_indirect ptr
, int offset
)
640 return brw_vec1_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
643 static __inline
struct brw_reg
deref_4b(struct brw_indirect ptr
, int offset
)
645 return retype(deref_4f(ptr
, offset
), BRW_REGISTER_TYPE_B
);
648 static __inline
struct brw_reg
deref_1uw(struct brw_indirect ptr
, int offset
)
650 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_UW
);
653 static __inline
struct brw_reg
deref_1ud(struct brw_indirect ptr
, int offset
)
655 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_UD
);
658 static __inline
struct brw_reg
get_addr_reg(struct brw_indirect ptr
)
660 return brw_address_reg(ptr
.addr_subnr
);
663 static __inline
struct brw_indirect
brw_indirect_offset( struct brw_indirect ptr
, int offset
)
665 ptr
.addr_offset
+= offset
;
669 static __inline
struct brw_indirect
brw_indirect( unsigned addr_subnr
, int offset
)
671 struct brw_indirect ptr
;
672 ptr
.addr_subnr
= addr_subnr
;
673 ptr
.addr_offset
= offset
;
678 static __inline
struct brw_instruction
*current_insn( struct brw_compile
*p
)
680 return &p
->store
[p
->nr_insn
];
683 void brw_pop_insn_state( struct brw_compile
*p
);
684 void brw_push_insn_state( struct brw_compile
*p
);
685 void brw_set_mask_control( struct brw_compile
*p
, unsigned value
);
686 void brw_set_saturate( struct brw_compile
*p
, unsigned value
);
687 void brw_set_access_mode( struct brw_compile
*p
, unsigned access_mode
);
688 void brw_set_compression_control( struct brw_compile
*p
, boolean control
);
689 void brw_set_predicate_control_flag_value( struct brw_compile
*p
, unsigned value
);
690 void brw_set_predicate_control( struct brw_compile
*p
, unsigned pc
);
691 void brw_set_conditionalmod( struct brw_compile
*p
, unsigned conditional
);
693 void brw_init_compile( struct brw_compile
*p
);
694 const unsigned *brw_get_program( struct brw_compile
*p
, unsigned *sz
);
697 struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
700 struct brw_reg src
);
702 struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
706 struct brw_reg src1
);
708 /* Helpers for regular instructions:
711 struct brw_instruction *brw_##OP(struct brw_compile *p, \
712 struct brw_reg dest, \
713 struct brw_reg src0);
716 struct brw_instruction *brw_##OP(struct brw_compile *p, \
717 struct brw_reg dest, \
718 struct brw_reg src0, \
719 struct brw_reg src1);
751 /* Helpers for SEND instruction:
753 void brw_urb_WRITE(struct brw_compile
*p
,
760 unsigned response_length
,
762 boolean writes_complete
,
766 void brw_fb_WRITE(struct brw_compile
*p
,
770 unsigned binding_table_index
,
772 unsigned response_length
,
775 void brw_SAMPLE(struct brw_compile
*p
,
779 unsigned binding_table_index
,
783 unsigned response_length
,
787 void brw_math_16( struct brw_compile
*p
,
793 unsigned precision
);
795 void brw_math( struct brw_compile
*p
,
802 unsigned precision
);
804 void brw_dp_READ_16( struct brw_compile
*p
,
807 unsigned scratch_offset
);
809 void brw_dp_WRITE_16( struct brw_compile
*p
,
812 unsigned scratch_offset
);
814 /* If/else/endif. Works by manipulating the execution flags on each
817 struct brw_instruction
*brw_IF(struct brw_compile
*p
,
818 unsigned execute_size
);
820 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
821 struct brw_instruction
*if_insn
);
823 void brw_ENDIF(struct brw_compile
*p
,
824 struct brw_instruction
*if_or_else_insn
);
829 struct brw_instruction
*brw_DO(struct brw_compile
*p
,
830 unsigned execute_size
);
832 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
833 struct brw_instruction
*patch_insn
);
835 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
);
836 struct brw_instruction
*brw_CONT(struct brw_compile
*p
);
839 void brw_land_fwd_jump(struct brw_compile
*p
,
840 struct brw_instruction
*jmp_insn
);
844 void brw_NOP(struct brw_compile
*p
);
846 /* Special case: there is never a destination, execution size will be
849 void brw_CMP(struct brw_compile
*p
,
851 unsigned conditional
,
853 struct brw_reg src1
);
855 void brw_print_reg( struct brw_reg reg
);
858 /***********************************************************************
862 void brw_copy_indirect_to_indirect(struct brw_compile
*p
,
863 struct brw_indirect dst_ptr
,
864 struct brw_indirect src_ptr
,
867 void brw_copy_from_indirect(struct brw_compile
*p
,
869 struct brw_indirect ptr
,
872 void brw_copy4(struct brw_compile
*p
,
877 void brw_copy8(struct brw_compile
*p
,
882 void brw_math_invert( struct brw_compile
*p
,
886 void brw_set_src1( struct brw_instruction
*insn
,
887 struct brw_reg reg
);