2 /* FF is big and ugly so feel free to write lines as long as you like.
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
26 #define NINE_TGSI_LAZY_DEVS 1
28 #define DBG_CHANNEL DBG_FF
30 #define NINE_FF_NUM_VS_CONST 256
31 #define NINE_FF_NUM_PS_CONST 24
33 #define NINED3DTSS_TCI_DISABLE 0
34 #define NINED3DTSS_TCI_PASSTHRU 1
35 #define NINED3DTSS_TCI_CAMERASPACENORMAL 2
36 #define NINED3DTSS_TCI_CAMERASPACEPOSITION 3
37 #define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4
38 #define NINED3DTSS_TCI_SPHEREMAP 5
49 uint32_t position_t
: 1;
50 uint32_t lighting
: 1;
51 uint32_t darkness
: 1; /* lighting enabled but no active lights */
52 uint32_t localviewer
: 1;
53 uint32_t vertexpointsize
: 1;
54 uint32_t pointscale
: 1;
55 uint32_t vertexblend
: 3;
56 uint32_t vertexblend_indexed
: 1;
57 uint32_t vertextween
: 1;
58 uint32_t mtl_diffuse
: 2; /* 0 = material, 1 = color1, 2 = color2 */
59 uint32_t mtl_ambient
: 2;
60 uint32_t mtl_specular
: 2;
61 uint32_t mtl_emissive
: 2;
62 uint32_t fog_mode
: 2;
63 uint32_t fog_range
: 1;
64 uint32_t color0in_one
: 1;
65 uint32_t color1in_one
: 1;
67 uint32_t tc_gen
: 24; /* 8 * 3 bits */
71 uint32_t tc_dim
: 24; /* 8 * 3 bits */
74 uint64_t value64
[2]; /* don't forget to resize VertexShader9.ff_key */
79 /* Texture stage state:
81 * COLOROP D3DTOP 5 bit
82 * ALPHAOP D3DTOP 5 bit
83 * COLORARG0 D3DTA 3 bit
84 * COLORARG1 D3DTA 3 bit
85 * COLORARG2 D3DTA 3 bit
86 * ALPHAARG0 D3DTA 3 bit
87 * ALPHAARG1 D3DTA 3 bit
88 * ALPHAARG2 D3DTA 3 bit
89 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
90 * TEXCOORDINDEX 0 - 7 3 bit
91 * ===========================
100 uint32_t alphaop
: 5;
101 uint32_t colorarg0
: 3;
102 uint32_t colorarg1
: 3;
103 uint32_t colorarg2
: 3;
104 uint32_t alphaarg0
: 3;
105 uint32_t alphaarg1
: 3;
106 uint32_t alphaarg2
: 3;
107 uint32_t resultarg
: 1; /* CURRENT:0 or TEMP:1 */
108 uint32_t textarget
: 2; /* 1D/2D/3D/CUBE */
109 uint32_t projected
: 1;
110 /* that's 32 bit exactly */
112 uint32_t fog
: 1; /* for vFog with programmable VS */
113 uint32_t fog_mode
: 2;
114 uint32_t specular
: 1; /* 9 32-bit words with this */
115 uint8_t colorarg_b4
[3];
116 uint8_t colorarg_b5
[3];
117 uint8_t alphaarg_b4
[3]; /* 11 32-bit words plus a byte */
119 uint64_t value64
[6]; /* don't forget to resize PixelShader9.ff_key */
120 uint32_t value32
[12];
124 static unsigned nine_ff_vs_key_hash(void *key
)
126 struct nine_ff_vs_key
*vs
= key
;
128 uint32_t hash
= vs
->value32
[0];
129 for (i
= 1; i
< Elements(vs
->value32
); ++i
)
130 hash
^= vs
->value32
[i
];
133 static int nine_ff_vs_key_comp(void *key1
, void *key2
)
135 struct nine_ff_vs_key
*a
= (struct nine_ff_vs_key
*)key1
;
136 struct nine_ff_vs_key
*b
= (struct nine_ff_vs_key
*)key2
;
138 return memcmp(a
->value64
, b
->value64
, sizeof(a
->value64
));
140 static unsigned nine_ff_ps_key_hash(void *key
)
142 struct nine_ff_ps_key
*ps
= key
;
144 uint32_t hash
= ps
->value32
[0];
145 for (i
= 1; i
< Elements(ps
->value32
); ++i
)
146 hash
^= ps
->value32
[i
];
149 static int nine_ff_ps_key_comp(void *key1
, void *key2
)
151 struct nine_ff_ps_key
*a
= (struct nine_ff_ps_key
*)key1
;
152 struct nine_ff_ps_key
*b
= (struct nine_ff_ps_key
*)key2
;
154 return memcmp(a
->value64
, b
->value64
, sizeof(a
->value64
));
156 static unsigned nine_ff_fvf_key_hash(void *key
)
158 return *(DWORD
*)key
;
160 static int nine_ff_fvf_key_comp(void *key1
, void *key2
)
162 return *(DWORD
*)key1
!= *(DWORD
*)key2
;
165 static void nine_ff_prune_vs(struct NineDevice9
*);
166 static void nine_ff_prune_ps(struct NineDevice9
*);
168 static void nine_ureg_tgsi_dump(struct ureg_program
*ureg
, boolean override
)
170 if (debug_get_bool_option("NINE_FF_DUMP", FALSE
) || override
) {
172 const struct tgsi_token
*toks
= ureg_get_tokens(ureg
, &count
);
174 ureg_free_tokens(toks
);
178 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
179 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
180 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
181 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
183 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
184 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
185 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
186 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
190 /* AL should contain base address of lights table. */
191 #define LIGHT_CONST(i) \
192 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
194 #define MATERIAL_CONST(i) \
195 ureg_DECL_constant(ureg, 19 + (i))
197 #define _CONST(n) ureg_DECL_constant(ureg, n)
199 /* VS FF constants layout:
201 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
202 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
203 * CONST[ 8..11] D3DTS_VIEW * D3DTS_PROJECTION
204 * CONST[12..15] D3DTS_VIEW
205 * CONST[16..18] Normal matrix
207 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
208 * CONST[20] MATERIAL.Diffuse
209 * CONST[21] MATERIAL.Ambient
210 * CONST[22] MATERIAL.Specular
211 * CONST[23].x___ MATERIAL.Power
212 * CONST[24] MATERIAL.Emissive
213 * CONST[25] RS.Ambient
215 * CONST[26].x___ RS.PointSizeMin
216 * CONST[26]._y__ RS.PointSizeMax
217 * CONST[26].__z_ RS.PointSize
218 * CONST[26].___w RS.PointScaleA
219 * CONST[27].x___ RS.PointScaleB
220 * CONST[27]._y__ RS.PointScaleC
222 * CONST[28].x___ RS.FogEnd
223 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
224 * CONST[28].__z_ RS.FogDensity
225 * CONST[29] RS.FogColor
227 * CONST[30].x___ TWEENFACTOR
229 * CONST[32].x___ LIGHT[0].Type
230 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
231 * CONST[33] LIGHT[0].Diffuse
232 * CONST[34] LIGHT[0].Specular
233 * CONST[35] LIGHT[0].Ambient
234 * CONST[36].xyz_ LIGHT[0].Position
235 * CONST[36].___w LIGHT[0].Range
236 * CONST[37].xyz_ LIGHT[0].Direction
237 * CONST[37].___w LIGHT[0].Falloff
238 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
239 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
240 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
241 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
242 * CONST[39].___w 1 if this is the last active light, 0 if not
250 * NOTE: no lighting code is generated if there are no active lights
252 * CONST[100].x___ Viewport 2/width
253 * CONST[100]._y__ Viewport 2/height
254 * CONST[100].__z_ Viewport 1/(zmax - zmin)
255 * CONST[101].x___ Viewport x0
256 * CONST[101]._y__ Viewport y0
257 * CONST[101].__z_ Viewport z0
259 * CONST[128..131] D3DTS_TEXTURE0
260 * CONST[132..135] D3DTS_TEXTURE1
261 * CONST[136..139] D3DTS_TEXTURE2
262 * CONST[140..143] D3DTS_TEXTURE3
263 * CONST[144..147] D3DTS_TEXTURE4
264 * CONST[148..151] D3DTS_TEXTURE5
265 * CONST[152..155] D3DTS_TEXTURE6
266 * CONST[156..159] D3DTS_TEXTURE7
268 * CONST[224] D3DTS_WORLDMATRIX[0]
269 * CONST[228] D3DTS_WORLDMATRIX[1]
271 * CONST[252] D3DTS_WORLDMATRIX[7]
275 struct ureg_program
*ureg
;
276 const struct nine_ff_vs_key
*key
;
278 uint16_t input
[PIPE_MAX_ATTRIBS
];
281 struct ureg_src aVtx
;
282 struct ureg_src aNrm
;
283 struct ureg_src aCol
[2];
284 struct ureg_src aTex
[8];
285 struct ureg_src aPsz
;
286 struct ureg_src aInd
;
287 struct ureg_src aWgt
;
289 struct ureg_src aVtx1
; /* tweening */
290 struct ureg_src aNrm1
;
292 struct ureg_src mtlA
;
293 struct ureg_src mtlD
;
294 struct ureg_src mtlS
;
295 struct ureg_src mtlE
;
298 static INLINE
unsigned
299 get_texcoord_sn(struct pipe_screen
*screen
)
301 if (screen
->get_param(screen
, PIPE_CAP_TGSI_TEXCOORD
))
302 return TGSI_SEMANTIC_TEXCOORD
;
303 return TGSI_SEMANTIC_GENERIC
;
306 static INLINE
struct ureg_src
307 build_vs_add_input(struct vs_build_ctx
*vs
, uint16_t ndecl
)
309 const unsigned i
= vs
->num_inputs
++;
310 assert(i
< PIPE_MAX_ATTRIBS
);
311 vs
->input
[i
] = ndecl
;
312 return ureg_DECL_vs_input(vs
->ureg
, i
);
315 /* NOTE: dst may alias src */
317 ureg_normalize3(struct ureg_program
*ureg
,
318 struct ureg_dst dst
, struct ureg_src src
,
321 #ifdef NINE_TGSI_LAZY_DEVS
322 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
324 ureg_DP3(ureg
, tmp_x
, src
, src
);
325 ureg_RSQ(ureg
, tmp_x
, _X(tmp
));
326 ureg_MUL(ureg
, dst
, src
, _X(tmp
));
328 ureg_NRM(ureg
, dst
, src
);
333 nine_ff_build_vs(struct NineDevice9
*device
, struct vs_build_ctx
*vs
)
335 const struct nine_ff_vs_key
*key
= vs
->key
;
336 struct ureg_program
*ureg
= ureg_create(TGSI_PROCESSOR_VERTEX
);
337 struct ureg_dst oPos
, oCol
[2], oTex
[8], oPsz
, oFog
;
338 struct ureg_dst rCol
[2]; /* oCol if no fog, TEMP otherwise */
339 struct ureg_dst rVtx
, rNrm
;
340 struct ureg_dst r
[8];
342 struct ureg_dst tmp
, tmp_x
, tmp_z
;
344 unsigned label
[32], l
= 0;
346 boolean need_rNrm
= key
->lighting
|| key
->pointscale
;
347 boolean need_rVtx
= key
->lighting
|| key
->fog_mode
;
348 const unsigned texcoord_sn
= get_texcoord_sn(device
->screen
);
352 /* Check which inputs we should transform. */
353 for (i
= 0; i
< 8 * 3; i
+= 3) {
354 switch ((key
->tc_gen
>> i
) & 0x3) {
355 case NINED3DTSS_TCI_CAMERASPACENORMAL
:
358 case NINED3DTSS_TCI_CAMERASPACEPOSITION
:
361 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR
:
362 need_rVtx
= need_rNrm
= TRUE
;
369 /* Declare and record used inputs (needed for linkage with vertex format):
370 * (texture coordinates handled later)
372 vs
->aVtx
= build_vs_add_input(vs
,
373 key
->position_t
? NINE_DECLUSAGE_POSITIONT
: NINE_DECLUSAGE_POSITION
);
376 vs
->aNrm
= build_vs_add_input(vs
, NINE_DECLUSAGE_NORMAL
);
378 vs
->aCol
[0] = ureg_imm1f(ureg
, 1.0f
);
379 vs
->aCol
[1] = ureg_imm1f(ureg
, 1.0f
);
381 if (key
->lighting
|| key
->darkness
) {
382 const unsigned mask
= key
->mtl_diffuse
| key
->mtl_specular
|
383 key
->mtl_ambient
| key
->mtl_emissive
;
384 if ((mask
& 0x1) && !key
->color0in_one
)
385 vs
->aCol
[0] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 0));
386 if ((mask
& 0x2) && !key
->color1in_one
)
387 vs
->aCol
[1] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 1));
389 vs
->mtlD
= MATERIAL_CONST(1);
390 vs
->mtlA
= MATERIAL_CONST(2);
391 vs
->mtlS
= MATERIAL_CONST(3);
392 vs
->mtlE
= MATERIAL_CONST(5);
393 if (key
->mtl_diffuse
== 1) vs
->mtlD
= vs
->aCol
[0]; else
394 if (key
->mtl_diffuse
== 2) vs
->mtlD
= vs
->aCol
[1];
395 if (key
->mtl_ambient
== 1) vs
->mtlA
= vs
->aCol
[0]; else
396 if (key
->mtl_ambient
== 2) vs
->mtlA
= vs
->aCol
[1];
397 if (key
->mtl_specular
== 1) vs
->mtlS
= vs
->aCol
[0]; else
398 if (key
->mtl_specular
== 2) vs
->mtlS
= vs
->aCol
[1];
399 if (key
->mtl_emissive
== 1) vs
->mtlE
= vs
->aCol
[0]; else
400 if (key
->mtl_emissive
== 2) vs
->mtlE
= vs
->aCol
[1];
402 if (!key
->color0in_one
) vs
->aCol
[0] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 0));
403 if (!key
->color1in_one
) vs
->aCol
[1] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 1));
406 if (key
->vertexpointsize
)
407 vs
->aPsz
= build_vs_add_input(vs
, NINE_DECLUSAGE_PSIZE
);
409 if (key
->vertexblend_indexed
)
410 vs
->aInd
= build_vs_add_input(vs
, NINE_DECLUSAGE_BLENDINDICES
);
411 if (key
->vertexblend
)
412 vs
->aWgt
= build_vs_add_input(vs
, NINE_DECLUSAGE_BLENDWEIGHT
);
413 if (key
->vertextween
) {
414 vs
->aVtx1
= build_vs_add_input(vs
, NINE_DECLUSAGE_i(POSITION
,1));
415 vs
->aNrm1
= build_vs_add_input(vs
, NINE_DECLUSAGE_i(NORMAL
,1));
420 oPos
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_POSITION
, 0); /* HPOS */
421 oCol
[0] = ureg_saturate(ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0));
422 oCol
[1] = ureg_saturate(ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 1));
424 if (key
->vertexpointsize
|| key
->pointscale
) {
425 oPsz
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_PSIZE
, 0, TGSI_WRITEMASK_X
);
426 oPsz
= ureg_writemask(oPsz
, TGSI_WRITEMASK_X
);
429 /* We apply fog to the vertex colors, oFog is for programmable shaders only ?
431 oFog
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_FOG
, 0, TGSI_WRITEMASK_X
);
432 oFog
= ureg_writemask(oFog
, TGSI_WRITEMASK_X
);
437 for (i
= 0; i
< num_r
; ++i
)
438 r
[i
] = ureg_DECL_local_temporary(ureg
);
440 tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
441 tmp_z
= ureg_writemask(tmp
, TGSI_WRITEMASK_Z
);
442 if (key
->lighting
|| key
->vertexblend
)
443 AR
= ureg_DECL_address(ureg
);
453 rVtx
= ureg_writemask(r
[1], TGSI_WRITEMASK_XYZ
);
454 rNrm
= ureg_writemask(r
[2], TGSI_WRITEMASK_XYZ
);
456 /* === Vertex transformation / vertex blending:
458 if (key
->vertextween
) {
459 assert(!key
->vertexblend
);
460 ureg_LRP(ureg
, r
[2], _XXXX(_CONST(30)), vs
->aVtx
, vs
->aVtx1
);
462 ureg_LRP(ureg
, r
[3], _XXXX(_CONST(30)), vs
->aNrm
, vs
->aNrm1
);
463 vs
->aVtx
= ureg_src(r
[2]);
464 vs
->aNrm
= ureg_src(r
[3]);
467 if (key
->vertexblend
) {
468 struct ureg_src cWM
[4];
470 for (i
= 224; i
<= 255; ++i
)
471 ureg_DECL_constant(ureg
, i
);
473 /* translate world matrix index to constant file index */
474 if (key
->vertexblend_indexed
) {
475 ureg_MAD(ureg
, tmp
, vs
->aInd
, ureg_imm1f(ureg
, 4.0f
), ureg_imm1f(ureg
, 224.0f
));
476 ureg_ARL(ureg
, AR
, ureg_src(tmp
));
478 for (i
= 0; i
< key
->vertexblend
; ++i
) {
479 for (c
= 0; c
< 4; ++c
) {
480 cWM
[c
] = ureg_src_register(TGSI_FILE_CONSTANT
, (224 + i
* 4) * !key
->vertexblend_indexed
+ c
);
481 if (key
->vertexblend_indexed
)
482 cWM
[c
] = ureg_src_indirect(cWM
[c
], ureg_scalar(ureg_src(AR
), i
));
484 /* multiply by WORLD(index) */
485 ureg_MUL(ureg
, r
[0], _XXXX(vs
->aVtx
), cWM
[0]);
486 ureg_MAD(ureg
, r
[0], _YYYY(vs
->aVtx
), cWM
[1], ureg_src(r
[0]));
487 ureg_MAD(ureg
, r
[0], _ZZZZ(vs
->aVtx
), cWM
[2], ureg_src(r
[0]));
488 ureg_MAD(ureg
, r
[0], _WWWW(vs
->aVtx
), cWM
[3], ureg_src(r
[0]));
490 /* accumulate weighted position value */
492 ureg_MAD(ureg
, r
[2], ureg_src(r
[0]), ureg_scalar(vs
->aWgt
, i
), ureg_src(r
[2]));
494 ureg_MUL(ureg
, r
[2], ureg_src(r
[0]), ureg_scalar(vs
->aWgt
, 0));
496 /* multiply by VIEW_PROJ */
497 ureg_MUL(ureg
, r
[0], _X(r
[2]), _CONST(8));
498 ureg_MAD(ureg
, r
[0], _Y(r
[2]), _CONST(9), ureg_src(r
[0]));
499 ureg_MAD(ureg
, r
[0], _Z(r
[2]), _CONST(10), ureg_src(r
[0]));
500 ureg_MAD(ureg
, oPos
, _W(r
[2]), _CONST(11), ureg_src(r
[0]));
503 vs
->aVtx
= ureg_src(r
[2]);
505 if (key
->position_t
&& device
->driver_caps
.window_space_position_support
) {
506 ureg_MOV(ureg
, oPos
, vs
->aVtx
);
507 } else if (key
->position_t
) {
508 /* vs->aVtx contains the coordinates buffer wise.
509 * later in the pipeline, clipping, viewport and division
510 * by w (rhw = 1/w) are going to be applied, so do the reverse
511 * of these transformations (except clipping) to have the good
512 * position at the end.*/
513 ureg_MOV(ureg
, tmp
, vs
->aVtx
);
514 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
515 ureg_SUB(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), _CONST(101));
516 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), _CONST(100));
517 ureg_SUB(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), ureg_imm1f(ureg
, 1.0f
));
518 /* Y needs to be reversed */
519 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_negate(ureg_src(tmp
)));
521 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), _W(tmp
));
522 /* multiply X, Y, Z by w */
523 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), _W(tmp
));
524 ureg_MOV(ureg
, oPos
, ureg_src(tmp
));
526 /* position = vertex * WORLD_VIEW_PROJ */
527 ureg_MUL(ureg
, r
[0], _XXXX(vs
->aVtx
), _CONST(0));
528 ureg_MAD(ureg
, r
[0], _YYYY(vs
->aVtx
), _CONST(1), ureg_src(r
[0]));
529 ureg_MAD(ureg
, r
[0], _ZZZZ(vs
->aVtx
), _CONST(2), ureg_src(r
[0]));
530 ureg_MAD(ureg
, oPos
, _WWWW(vs
->aVtx
), _CONST(3), ureg_src(r
[0]));
534 ureg_MUL(ureg
, rVtx
, _XXXX(vs
->aVtx
), _CONST(4));
535 ureg_MAD(ureg
, rVtx
, _YYYY(vs
->aVtx
), _CONST(5), ureg_src(rVtx
));
536 ureg_MAD(ureg
, rVtx
, _ZZZZ(vs
->aVtx
), _CONST(6), ureg_src(rVtx
));
537 ureg_MAD(ureg
, rVtx
, _WWWW(vs
->aVtx
), _CONST(7), ureg_src(rVtx
));
540 ureg_MUL(ureg
, rNrm
, _XXXX(vs
->aNrm
), _CONST(16));
541 ureg_MAD(ureg
, rNrm
, _YYYY(vs
->aNrm
), _CONST(17), ureg_src(rNrm
));
542 ureg_MAD(ureg
, rNrm
, _ZZZZ(vs
->aNrm
), _CONST(18), ureg_src(rNrm
));
543 ureg_normalize3(ureg
, rNrm
, ureg_src(rNrm
), tmp
);
545 /* NOTE: don't use vs->aVtx, vs->aNrm after this line */
547 /* === Process point size:
549 if (key
->vertexpointsize
) {
550 struct ureg_src cPsz1
= ureg_DECL_constant(ureg
, 26);
551 #ifdef NINE_TGSI_LAZY_DEVS
552 struct ureg_dst tmp_clamp
= ureg_DECL_temporary(ureg
);
554 ureg_MAX(ureg
, tmp_clamp
, vs
->aPsz
, _XXXX(cPsz1
));
555 ureg_MIN(ureg
, oPsz
, ureg_src(tmp_clamp
), _YYYY(cPsz1
));
556 ureg_release_temporary(ureg
, tmp_clamp
);
558 ureg_CLAMP(ureg
, oPsz
, vs
->aPsz
, _XXXX(cPsz1
), _YYYY(cPsz1
));
560 } else if (key
->pointscale
) {
561 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
562 struct ureg_dst tmp_y
= ureg_writemask(tmp
, TGSI_WRITEMASK_Y
);
563 struct ureg_src cPsz1
= ureg_DECL_constant(ureg
, 26);
564 struct ureg_src cPsz2
= ureg_DECL_constant(ureg
, 27);
566 ureg_DP3(ureg
, tmp_x
, ureg_src(r
[1]), ureg_src(r
[1]));
567 ureg_SQRT(ureg
, tmp_y
, _X(tmp
));
568 ureg_MAD(ureg
, tmp_x
, _Y(tmp
), _YYYY(cPsz2
), _XXXX(cPsz2
));
569 ureg_MAD(ureg
, tmp_x
, _Y(tmp
), _X(tmp
), _WWWW(cPsz1
));
570 ureg_RCP(ureg
, tmp_x
, ureg_src(tmp
));
571 ureg_MUL(ureg
, tmp_x
, ureg_src(tmp
), _ZZZZ(cPsz1
));
572 #ifdef NINE_TGSI_LAZY_DEVS
573 struct ureg_dst tmp_clamp
= ureg_DECL_temporary(ureg
);
575 ureg_MAX(ureg
, tmp_clamp
, _X(tmp
), _XXXX(cPsz1
));
576 ureg_MIN(ureg
, oPsz
, ureg_src(tmp_clamp
), _YYYY(cPsz1
));
577 ureg_release_temporary(ureg
, tmp_clamp
);
579 ureg_CLAMP(ureg
, oPsz
, _X(tmp
), _XXXX(cPsz1
), _YYYY(cPsz1
));
583 /* Texture coordinate generation:
584 * XXX: D3DTTFF_PROJECTED, transform matrix
586 for (i
= 0; i
< 8; ++i
) {
587 struct ureg_dst dst
[5];
590 const unsigned tci
= (key
->tc_gen
>> (i
* 3)) & 0x7;
591 const unsigned idx
= (key
->tc_idx
>> (i
* 3)) & 0x7;
592 const unsigned dim
= (key
->tc_dim
>> (i
* 3)) & 0x7;
594 if (tci
== NINED3DTSS_TCI_DISABLE
)
596 oTex
[i
] = ureg_DECL_output(ureg
, texcoord_sn
, i
);
598 if (tci
== NINED3DTSS_TCI_PASSTHRU
)
599 vs
->aTex
[idx
] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(TEXCOORD
,idx
));
602 dst
[c
= 4] = oTex
[i
];
605 src
= ureg_src(dst
[4]);
606 for (c
= 0; c
< (dim
- 1); ++c
)
607 dst
[c
] = ureg_writemask(tmp
, (1 << dim
) - 1);
608 dst
[c
] = ureg_writemask(oTex
[i
], (1 << dim
) - 1);
612 case NINED3DTSS_TCI_PASSTHRU
:
613 ureg_MOV(ureg
, dst
[4], vs
->aTex
[idx
]);
615 case NINED3DTSS_TCI_CAMERASPACENORMAL
:
617 ureg_MOV(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_XYZ
), ureg_src(rNrm
));
618 ureg_MOV(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
620 case NINED3DTSS_TCI_CAMERASPACEPOSITION
:
621 ureg_MOV(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_XYZ
), ureg_src(rVtx
));
622 ureg_MOV(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
624 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR
:
625 tmp
.WriteMask
= TGSI_WRITEMASK_XYZ
;
626 ureg_DP3(ureg
, tmp_x
, ureg_src(rVtx
), ureg_src(rNrm
));
627 ureg_MUL(ureg
, tmp
, ureg_src(rNrm
), _X(tmp
));
628 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_src(tmp
));
629 ureg_SUB(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_XYZ
), ureg_src(rVtx
), ureg_src(tmp
));
630 ureg_MOV(ureg
, ureg_writemask(dst
[4], TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
631 tmp
.WriteMask
= TGSI_WRITEMASK_XYZW
;
633 case NINED3DTSS_TCI_SPHEREMAP
:
641 dst
[c
].WriteMask
= ~dst
[c
].WriteMask
;
642 if (dst
[c
].WriteMask
)
643 ureg_MOV(ureg
, dst
[c
], src
); /* store untransformed components */
644 dst
[c
].WriteMask
= ~dst
[c
].WriteMask
;
645 if (dim
> 0) ureg_MUL(ureg
, dst
[0], _XXXX(src
), _CONST(128 + i
* 4));
646 if (dim
> 1) ureg_MAD(ureg
, dst
[1], _YYYY(src
), _CONST(129 + i
* 4), ureg_src(tmp
));
647 if (dim
> 2) ureg_MAD(ureg
, dst
[2], _ZZZZ(src
), _CONST(130 + i
* 4), ureg_src(tmp
));
648 if (dim
> 3) ureg_MAD(ureg
, dst
[3], _WWWW(src
), _CONST(131 + i
* 4), ureg_src(tmp
));
653 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
654 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
655 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
657 * vec3 normal = normalize(in.Normal * NormalMatrix);
658 * vec3 hitDir = light.direction;
661 * if (light.type != DIRECTIONAL)
663 * vec3 hitVec = light.position - eyeVertex;
664 * float d = length(hitVec);
665 * hitDir = hitVec / d;
666 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
669 * if (light.type == SPOTLIGHT)
671 * float rho = dp3(-hitVec, light.direction);
672 * if (rho < cos(light.phi / 2))
674 * if (rho < cos(light.theta / 2))
675 * atten *= pow(some_func(rho), light.falloff);
678 * float nDotHit = dp3_sat(normal, hitVec);
679 * float powFact = 0.0;
683 * vec3 midVec = normalize(hitDir + eye);
684 * float nDotMid = dp3_sat(normal, midVec);
685 * pFact = pow(nDotMid, material.power);
688 * ambient += light.ambient * atten;
689 * diffuse += light.diffuse * atten * nDotHit;
690 * specular += light.specular * atten * powFact;
693 struct ureg_dst tmp_y
= ureg_writemask(tmp
, TGSI_WRITEMASK_Y
);
695 struct ureg_dst rAtt
= ureg_writemask(r
[1], TGSI_WRITEMASK_W
);
696 struct ureg_dst rHit
= ureg_writemask(r
[3], TGSI_WRITEMASK_XYZ
);
697 struct ureg_dst rMid
= ureg_writemask(r
[4], TGSI_WRITEMASK_XYZ
);
699 struct ureg_dst rCtr
= ureg_writemask(r
[2], TGSI_WRITEMASK_W
);
701 struct ureg_dst AL
= ureg_writemask(AR
, TGSI_WRITEMASK_X
);
703 /* Light.*.Alpha is not used. */
704 struct ureg_dst rD
= ureg_writemask(r
[5], TGSI_WRITEMASK_XYZ
);
705 struct ureg_dst rA
= ureg_writemask(r
[6], TGSI_WRITEMASK_XYZ
);
706 struct ureg_dst rS
= ureg_writemask(r
[7], TGSI_WRITEMASK_XYZ
);
708 struct ureg_src mtlP
= _XXXX(MATERIAL_CONST(4));
710 struct ureg_src cLKind
= _XXXX(LIGHT_CONST(0));
711 struct ureg_src cLAtt0
= _YYYY(LIGHT_CONST(0));
712 struct ureg_src cLAtt1
= _ZZZZ(LIGHT_CONST(0));
713 struct ureg_src cLAtt2
= _WWWW(LIGHT_CONST(0));
714 struct ureg_src cLColD
= _XYZW(LIGHT_CONST(1));
715 struct ureg_src cLColS
= _XYZW(LIGHT_CONST(2));
716 struct ureg_src cLColA
= _XYZW(LIGHT_CONST(3));
717 struct ureg_src cLPos
= _XYZW(LIGHT_CONST(4));
718 struct ureg_src cLRng
= _WWWW(LIGHT_CONST(4));
719 struct ureg_src cLDir
= _XYZW(LIGHT_CONST(5));
720 struct ureg_src cLFOff
= _WWWW(LIGHT_CONST(5));
721 struct ureg_src cLTht
= _XXXX(LIGHT_CONST(6));
722 struct ureg_src cLPhi
= _YYYY(LIGHT_CONST(6));
723 struct ureg_src cLSDiv
= _ZZZZ(LIGHT_CONST(6));
724 struct ureg_src cLLast
= _WWWW(LIGHT_CONST(7));
726 const unsigned loop_label
= l
++;
728 ureg_MOV(ureg
, rCtr
, ureg_imm1f(ureg
, 32.0f
)); /* &lightconst(0) */
729 ureg_MOV(ureg
, rD
, ureg_imm1f(ureg
, 0.0f
));
730 ureg_MOV(ureg
, rA
, ureg_imm1f(ureg
, 0.0f
));
731 ureg_MOV(ureg
, rS
, ureg_imm1f(ureg
, 0.0f
));
732 rD
= ureg_saturate(rD
);
733 rA
= ureg_saturate(rA
);
734 rS
= ureg_saturate(rS
);
737 /* loop management */
738 ureg_BGNLOOP(ureg
, &label
[loop_label
]);
739 ureg_ARL(ureg
, AL
, _W(rCtr
));
741 /* if (not DIRECTIONAL light): */
742 ureg_SNE(ureg
, tmp_x
, cLKind
, ureg_imm1f(ureg
, D3DLIGHT_DIRECTIONAL
));
743 ureg_MOV(ureg
, rHit
, ureg_negate(cLDir
));
744 ureg_MOV(ureg
, rAtt
, ureg_imm1f(ureg
, 1.0f
));
745 ureg_IF(ureg
, _X(tmp
), &label
[l
++]);
747 /* hitDir = light.position - eyeVtx
751 ureg_SUB(ureg
, rHit
, cLPos
, ureg_src(rVtx
));
752 ureg_DP3(ureg
, tmp_x
, ureg_src(rHit
), ureg_src(rHit
));
753 ureg_RSQ(ureg
, tmp_y
, _X(tmp
));
754 ureg_MUL(ureg
, rHit
, ureg_src(rHit
), _Y(tmp
)); /* normalize */
755 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _Y(tmp
)); /* length */
757 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
758 ureg_MAD(ureg
, rAtt
, _X(tmp
), cLAtt2
, cLAtt1
);
759 ureg_MAD(ureg
, rAtt
, _X(tmp
), _W(rAtt
), cLAtt0
);
760 ureg_RCP(ureg
, rAtt
, _W(rAtt
));
761 /* cut-off if distance exceeds Light.Range */
762 ureg_SLT(ureg
, tmp_x
, _X(tmp
), cLRng
);
763 ureg_MUL(ureg
, rAtt
, _W(rAtt
), _X(tmp
));
765 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
768 /* if (SPOT light) */
769 ureg_SEQ(ureg
, tmp_x
, cLKind
, ureg_imm1f(ureg
, D3DLIGHT_SPOT
));
770 ureg_IF(ureg
, _X(tmp
), &label
[l
++]);
772 /* rho = dp3(-hitDir, light.spotDir)
774 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
777 * if (rho <= light.cphi2)
780 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
782 ureg_DP3(ureg
, tmp_y
, ureg_negate(ureg_src(rHit
)), cLDir
); /* rho */
783 ureg_SUB(ureg
, tmp_x
, _Y(tmp
), cLPhi
);
784 ureg_MUL(ureg
, tmp_x
, _X(tmp
), cLSDiv
);
785 ureg_POW(ureg
, tmp_x
, _X(tmp
), cLFOff
); /* spotAtten */
786 ureg_SGE(ureg
, tmp_z
, _Y(tmp
), cLTht
); /* if inside theta && phi */
787 ureg_SGE(ureg
, tmp_y
, _Y(tmp
), cLPhi
); /* if inside phi */
788 ureg_MAD(ureg
, ureg_saturate(tmp_x
), _X(tmp
), _Y(tmp
), _Z(tmp
));
789 ureg_MUL(ureg
, rAtt
, _W(rAtt
), _X(tmp
));
791 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
794 /* directional factors, let's not use LIT because of clarity */
795 ureg_DP3(ureg
, ureg_saturate(tmp_x
), ureg_src(rNrm
), ureg_src(rHit
));
796 ureg_MOV(ureg
, tmp_y
, ureg_imm1f(ureg
, 0.0f
));
797 ureg_IF(ureg
, _X(tmp
), &label
[l
++]);
799 /* midVec = normalize(hitDir + eyeDir) */
800 if (key
->localviewer
) {
801 ureg_normalize3(ureg
, rMid
, ureg_src(rVtx
), tmp
);
802 ureg_ADD(ureg
, rMid
, ureg_src(rHit
), ureg_negate(ureg_src(rMid
)));
804 ureg_ADD(ureg
, rMid
, ureg_src(rHit
), ureg_imm3f(ureg
, 0.0f
, 0.0f
, 1.0f
));
806 ureg_normalize3(ureg
, rMid
, ureg_src(rMid
), tmp
);
807 ureg_DP3(ureg
, ureg_saturate(tmp_y
), ureg_src(rNrm
), ureg_src(rMid
));
808 ureg_POW(ureg
, tmp_y
, _Y(tmp
), mtlP
);
810 ureg_MUL(ureg
, tmp_x
, _W(rAtt
), _X(tmp
)); /* dp3(normal,hitDir) * att */
811 ureg_MUL(ureg
, tmp_y
, _W(rAtt
), _Y(tmp
)); /* power factor * att */
812 ureg_MAD(ureg
, rD
, cLColD
, _X(tmp
), ureg_src(rD
)); /* accumulate diffuse */
813 ureg_MAD(ureg
, rS
, cLColS
, _Y(tmp
), ureg_src(rS
)); /* accumulate specular */
815 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
818 ureg_MAD(ureg
, rA
, cLColA
, _W(rAtt
), ureg_src(rA
)); /* accumulate ambient */
820 /* break if this was the last light */
821 ureg_IF(ureg
, cLLast
, &label
[l
++]);
824 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
826 ureg_ADD(ureg
, rCtr
, _W(rCtr
), ureg_imm1f(ureg
, 8.0f
));
827 ureg_fixup_label(ureg
, label
[loop_label
], ureg_get_instruction_number(ureg
));
828 ureg_ENDLOOP(ureg
, &label
[loop_label
]);
830 /* Set alpha factors of illumination to 1.0 for the multiplications. */
831 rD
.WriteMask
= TGSI_WRITEMASK_W
; rD
.Saturate
= 0;
832 rS
.WriteMask
= TGSI_WRITEMASK_W
; rS
.Saturate
= 0;
833 rA
.WriteMask
= TGSI_WRITEMASK_W
; rA
.Saturate
= 0;
834 ureg_MOV(ureg
, rD
, ureg_imm1f(ureg
, 1.0f
));
835 ureg_MOV(ureg
, rS
, ureg_imm1f(ureg
, 1.0f
));
837 /* Apply to material:
839 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
840 * material.ambient * ambient +
841 * material.diffuse * diffuse +
842 * oCol[1] = material.specular * specular;
844 if (key
->mtl_emissive
== 0 && key
->mtl_ambient
== 0) {
845 ureg_MOV(ureg
, rA
, ureg_imm1f(ureg
, 1.0f
));
846 ureg_MAD(ureg
, tmp
, ureg_src(rA
), vs
->mtlA
, _CONST(19));
848 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(rA
), _CONST(25));
849 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), vs
->mtlA
, ureg_src(tmp
), vs
->mtlE
);
850 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), vs
->mtlA
, vs
->mtlE
);
852 ureg_MAD(ureg
, rCol
[0], ureg_src(rD
), vs
->mtlD
, ureg_src(tmp
));
853 ureg_MUL(ureg
, rCol
[1], ureg_src(rS
), vs
->mtlS
);
857 if (key
->mtl_emissive
== 0 && key
->mtl_ambient
== 0) {
858 ureg_MAD(ureg
, rCol
[0], vs
->mtlD
, ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 1.0f
), _CONST(19));
860 ureg_MAD(ureg
, ureg_writemask(rCol
[0], TGSI_WRITEMASK_XYZ
), vs
->mtlA
, _CONST(25), vs
->mtlE
);
861 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), vs
->mtlA
, vs
->mtlE
);
862 ureg_ADD(ureg
, ureg_writemask(rCol
[0], TGSI_WRITEMASK_W
), vs
->mtlD
, _W(tmp
));
864 ureg_MUL(ureg
, rCol
[1], ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 1.0f
), vs
->mtlS
);
866 ureg_MOV(ureg
, rCol
[0], vs
->aCol
[0]);
867 ureg_MOV(ureg
, rCol
[1], vs
->aCol
[1]);
872 * exp(x) = ex2(log2(e) * x)
875 /* Fog doesn't affect alpha, TODO: combine with light code output */
876 ureg_MOV(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_W
), _W(rCol
[0]));
877 ureg_MOV(ureg
, ureg_writemask(oCol
[1], TGSI_WRITEMASK_W
), _W(rCol
[1]));
879 if (key
->position_t
) {
880 ureg_MOV(ureg
, ureg_saturate(tmp_x
), ureg_scalar(vs
->aCol
[1], TGSI_SWIZZLE_W
));
882 if (key
->fog_range
) {
883 ureg_DP3(ureg
, tmp_x
, ureg_src(rVtx
), ureg_src(rVtx
));
884 ureg_RSQ(ureg
, tmp_z
, _X(tmp
));
885 ureg_MUL(ureg
, tmp_z
, _Z(tmp
), _X(tmp
));
887 ureg_MOV(ureg
, tmp_z
, ureg_abs(_Z(rVtx
)));
890 if (key
->fog_mode
== D3DFOG_EXP
) {
891 ureg_MUL(ureg
, tmp_x
, _Z(tmp
), _ZZZZ(_CONST(28)));
892 ureg_MUL(ureg
, tmp_x
, _X(tmp
), ureg_imm1f(ureg
, -1.442695f
));
893 ureg_EX2(ureg
, tmp_x
, _X(tmp
));
895 if (key
->fog_mode
== D3DFOG_EXP2
) {
896 ureg_MUL(ureg
, tmp_x
, _Z(tmp
), _ZZZZ(_CONST(28)));
897 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _X(tmp
));
898 ureg_MUL(ureg
, tmp_x
, _X(tmp
), ureg_imm1f(ureg
, -1.442695f
));
899 ureg_EX2(ureg
, tmp_x
, _X(tmp
));
901 if (key
->fog_mode
== D3DFOG_LINEAR
&& !key
->position_t
) {
902 ureg_SUB(ureg
, tmp_x
, _XXXX(_CONST(28)), _Z(tmp
));
903 ureg_MUL(ureg
, ureg_saturate(tmp_x
), _X(tmp
), _YYYY(_CONST(28)));
905 ureg_MOV(ureg
, oFog
, _X(tmp
));
906 ureg_LRP(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_XYZ
), _X(tmp
), ureg_src(rCol
[0]), _CONST(29));
907 ureg_LRP(ureg
, ureg_writemask(oCol
[1], TGSI_WRITEMASK_XYZ
), _X(tmp
), ureg_src(rCol
[1]), _CONST(29));
910 if (key
->position_t
&& device
->driver_caps
.window_space_position_support
)
911 ureg_property(ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
914 nine_ureg_tgsi_dump(ureg
, FALSE
);
915 return ureg_create_shader_and_destroy(ureg
, device
->pipe
);
918 /* PS FF constants layout:
920 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
921 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
922 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
923 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
924 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
925 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
926 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
928 * CONST[20] D3DRS_TEXTUREFACTOR
929 * CONST[21] D3DRS_FOGCOLOR
930 * CONST[22].x___ RS.FogEnd
931 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
932 * CONST[22].__z_ RS.FogDensity
936 struct ureg_program
*ureg
;
938 struct ureg_src vC
[2]; /* DIFFUSE, SPECULAR */
939 struct ureg_src vT
[8]; /* TEXCOORD[i] */
940 struct ureg_dst r
[6]; /* TEMPs */
941 struct ureg_dst rCur
; /* D3DTA_CURRENT */
942 struct ureg_dst rMod
;
943 struct ureg_src rCurSrc
;
944 struct ureg_dst rTmp
; /* D3DTA_TEMP */
945 struct ureg_src rTmpSrc
;
946 struct ureg_dst rTex
;
947 struct ureg_src rTexSrc
;
948 struct ureg_src cBEM
[8];
949 struct ureg_src s
[8];
953 unsigned index_pre_mod
;
958 static struct ureg_src
959 ps_get_ts_arg(struct ps_build_ctx
*ps
, unsigned ta
)
963 switch (ta
& D3DTA_SELECTMASK
) {
965 reg
= ureg_DECL_constant(ps
->ureg
, ps
->stage
.index
);
968 reg
= (ps
->stage
.index
== ps
->stage
.index_pre_mod
) ? ureg_src(ps
->rMod
) : ps
->rCurSrc
;
971 reg
= ureg_DECL_fs_input(ps
->ureg
, TGSI_SEMANTIC_COLOR
, 0, TGSI_INTERPOLATE_PERSPECTIVE
);
974 reg
= ureg_DECL_fs_input(ps
->ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_PERSPECTIVE
);
983 reg
= ureg_DECL_constant(ps
->ureg
, 20);
987 reg
= ureg_src_undef();
990 if (ta
& D3DTA_COMPLEMENT
) {
991 struct ureg_dst dst
= ps
->r
[ps
->stage
.num_regs
++];
992 ureg_SUB(ps
->ureg
, dst
, ureg_imm1f(ps
->ureg
, 1.0f
), reg
);
995 if (ta
& D3DTA_ALPHAREPLICATE
)
1000 static struct ureg_dst
1001 ps_get_ts_dst(struct ps_build_ctx
*ps
, unsigned ta
)
1003 assert(!(ta
& (D3DTA_COMPLEMENT
| D3DTA_ALPHAREPLICATE
)));
1005 switch (ta
& D3DTA_SELECTMASK
) {
1012 return ureg_dst_undef();
1016 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top
)
1019 case D3DTOP_DISABLE
:
1021 case D3DTOP_SELECTARG1
:
1022 case D3DTOP_PREMODULATE
:
1024 case D3DTOP_SELECTARG2
:
1026 case D3DTOP_MULTIPLYADD
:
1034 static INLINE boolean
1035 is_MOV_no_op(struct ureg_dst dst
, struct ureg_src src
)
1037 return !dst
.WriteMask
||
1038 (dst
.File
== src
.File
&&
1039 dst
.Index
== src
.Index
&&
1045 (!(dst
.WriteMask
& TGSI_WRITEMASK_X
) || (src
.SwizzleX
== TGSI_SWIZZLE_X
)) &&
1046 (!(dst
.WriteMask
& TGSI_WRITEMASK_Y
) || (src
.SwizzleY
== TGSI_SWIZZLE_Y
)) &&
1047 (!(dst
.WriteMask
& TGSI_WRITEMASK_Z
) || (src
.SwizzleZ
== TGSI_SWIZZLE_Z
)) &&
1048 (!(dst
.WriteMask
& TGSI_WRITEMASK_W
) || (src
.SwizzleW
== TGSI_SWIZZLE_W
)));
1053 ps_do_ts_op(struct ps_build_ctx
*ps
, unsigned top
, struct ureg_dst dst
, struct ureg_src
*arg
)
1055 struct ureg_program
*ureg
= ps
->ureg
;
1056 struct ureg_dst tmp
= ps
->r
[ps
->stage
.num_regs
];
1057 struct ureg_dst tmp2
= ps
->r
[ps
->stage
.num_regs
+1];
1058 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
1060 tmp
.WriteMask
= dst
.WriteMask
;
1062 if (top
!= D3DTOP_SELECTARG1
&& top
!= D3DTOP_SELECTARG2
&&
1063 top
!= D3DTOP_MODULATE
&& top
!= D3DTOP_PREMODULATE
&&
1064 top
!= D3DTOP_BLENDDIFFUSEALPHA
&& top
!= D3DTOP_BLENDTEXTUREALPHA
&&
1065 top
!= D3DTOP_BLENDFACTORALPHA
&& top
!= D3DTOP_BLENDCURRENTALPHA
&&
1066 top
!= D3DTOP_BUMPENVMAP
&& top
!= D3DTOP_BUMPENVMAPLUMINANCE
&&
1068 dst
= ureg_saturate(dst
);
1071 case D3DTOP_SELECTARG1
:
1072 if (!is_MOV_no_op(dst
, arg
[1]))
1073 ureg_MOV(ureg
, dst
, arg
[1]);
1075 case D3DTOP_SELECTARG2
:
1076 if (!is_MOV_no_op(dst
, arg
[2]))
1077 ureg_MOV(ureg
, dst
, arg
[2]);
1079 case D3DTOP_MODULATE
:
1080 ureg_MUL(ureg
, dst
, arg
[1], arg
[2]);
1082 case D3DTOP_MODULATE2X
:
1083 ureg_MUL(ureg
, tmp
, arg
[1], arg
[2]);
1084 ureg_ADD(ureg
, dst
, ureg_src(tmp
), ureg_src(tmp
));
1086 case D3DTOP_MODULATE4X
:
1087 ureg_MUL(ureg
, tmp
, arg
[1], arg
[2]);
1088 ureg_MUL(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, 4.0f
));
1091 ureg_ADD(ureg
, dst
, arg
[1], arg
[2]);
1093 case D3DTOP_ADDSIGNED
:
1094 ureg_ADD(ureg
, tmp
, arg
[1], arg
[2]);
1095 ureg_SUB(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, 0.5f
));
1097 case D3DTOP_ADDSIGNED2X
:
1098 ureg_ADD(ureg
, tmp
, arg
[1], arg
[2]);
1099 ureg_MAD(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
1101 case D3DTOP_SUBTRACT
:
1102 ureg_SUB(ureg
, dst
, arg
[1], arg
[2]);
1104 case D3DTOP_ADDSMOOTH
:
1105 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), arg
[1]);
1106 ureg_MAD(ureg
, dst
, ureg_src(tmp
), arg
[2], arg
[1]);
1108 case D3DTOP_BLENDDIFFUSEALPHA
:
1109 ureg_LRP(ureg
, dst
, _WWWW(ps
->vC
[0]), arg
[1], arg
[2]);
1111 case D3DTOP_BLENDTEXTUREALPHA
:
1112 /* XXX: alpha taken from previous stage, texture or result ? */
1113 ureg_LRP(ureg
, dst
, _W(ps
->rTex
), arg
[1], arg
[2]);
1115 case D3DTOP_BLENDFACTORALPHA
:
1116 ureg_LRP(ureg
, dst
, _WWWW(_CONST(20)), arg
[1], arg
[2]);
1118 case D3DTOP_BLENDTEXTUREALPHAPM
:
1119 ureg_SUB(ureg
, tmp_x
, ureg_imm1f(ureg
, 1.0f
), _W(ps
->rTex
));
1120 ureg_MAD(ureg
, dst
, arg
[2], _X(tmp
), arg
[1]);
1122 case D3DTOP_BLENDCURRENTALPHA
:
1123 ureg_LRP(ureg
, dst
, _WWWW(ps
->rCurSrc
), arg
[1], arg
[2]);
1125 case D3DTOP_PREMODULATE
:
1126 ureg_MOV(ureg
, dst
, arg
[1]);
1127 ps
->stage
.index_pre_mod
= ps
->stage
.index
+ 1;
1129 case D3DTOP_MODULATEALPHA_ADDCOLOR
:
1130 ureg_MAD(ureg
, dst
, _WWWW(arg
[1]), arg
[2], arg
[1]);
1132 case D3DTOP_MODULATECOLOR_ADDALPHA
:
1133 ureg_MAD(ureg
, dst
, arg
[1], arg
[2], _WWWW(arg
[1]));
1135 case D3DTOP_MODULATEINVALPHA_ADDCOLOR
:
1136 ureg_SUB(ureg
, tmp_x
, ureg_imm1f(ureg
, 1.0f
), _WWWW(arg
[1]));
1137 ureg_MAD(ureg
, dst
, _X(tmp
), arg
[2], arg
[1]);
1139 case D3DTOP_MODULATEINVCOLOR_ADDALPHA
:
1140 ureg_SUB(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), arg
[1]);
1141 ureg_MAD(ureg
, dst
, ureg_src(tmp
), arg
[2], _WWWW(arg
[1]));
1143 case D3DTOP_BUMPENVMAP
:
1145 case D3DTOP_BUMPENVMAPLUMINANCE
:
1147 case D3DTOP_DOTPRODUCT3
:
1148 ureg_SUB(ureg
, tmp
, arg
[1], ureg_imm4f(ureg
,0.5,0.5,0.5,0.5));
1149 ureg_SUB(ureg
, tmp2
, arg
[2] , ureg_imm4f(ureg
,0.5,0.5,0.5,0.5));
1150 ureg_DP3(ureg
, tmp
, ureg_src(tmp
), ureg_src(tmp2
));
1151 ureg_MUL(ureg
, ureg_saturate(dst
), ureg_src(tmp
), ureg_imm4f(ureg
,4.0,4.0,4.0,4.0));
1153 case D3DTOP_MULTIPLYADD
:
1154 ureg_MAD(ureg
, dst
, arg
[1], arg
[2], arg
[0]);
1157 ureg_LRP(ureg
, dst
, arg
[0], arg
[1], arg
[2]);
1159 case D3DTOP_DISABLE
:
1163 assert(!"invalid D3DTOP");
1169 nine_ff_build_ps(struct NineDevice9
*device
, struct nine_ff_ps_key
*key
)
1171 struct ps_build_ctx ps
;
1172 struct ureg_program
*ureg
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
1173 struct ureg_dst oCol
;
1175 const unsigned texcoord_sn
= get_texcoord_sn(device
->screen
);
1177 memset(&ps
, 0, sizeof(ps
));
1179 ps
.stage
.index_pre_mod
= -1;
1181 ps
.vC
[0] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 0, TGSI_INTERPOLATE_PERSPECTIVE
);
1183 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1184 for (i
= 0; i
< Elements(ps
.r
); ++i
)
1185 ps
.r
[i
] = ureg_DECL_local_temporary(ureg
);
1189 ps
.rCurSrc
= ureg_src(ps
.rCur
);
1190 ps
.rTmpSrc
= ureg_src(ps
.rTmp
);
1191 ps
.rTexSrc
= ureg_src(ps
.rTex
);
1193 for (s
= 0; s
< 8; ++s
) {
1194 ps
.s
[s
] = ureg_src_undef();
1196 if (key
->ts
[s
].colorop
!= D3DTOP_DISABLE
) {
1197 if (key
->ts
[s
].colorarg0
== D3DTA_SPECULAR
||
1198 key
->ts
[s
].colorarg1
== D3DTA_SPECULAR
||
1199 key
->ts
[s
].colorarg2
== D3DTA_SPECULAR
)
1200 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_PERSPECTIVE
);
1202 if (key
->ts
[s
].colorarg0
== D3DTA_TEXTURE
||
1203 key
->ts
[s
].colorarg1
== D3DTA_TEXTURE
||
1204 key
->ts
[s
].colorarg2
== D3DTA_TEXTURE
) {
1205 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1206 ps
.vT
[s
] = ureg_DECL_fs_input(ureg
, texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
1208 if (s
&& (key
->ts
[s
- 1].colorop
== D3DTOP_PREMODULATE
||
1209 key
->ts
[s
- 1].alphaop
== D3DTOP_PREMODULATE
))
1210 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1213 if (key
->ts
[s
].alphaop
!= D3DTOP_DISABLE
) {
1214 if (key
->ts
[s
].alphaarg0
== D3DTA_SPECULAR
||
1215 key
->ts
[s
].alphaarg1
== D3DTA_SPECULAR
||
1216 key
->ts
[s
].alphaarg2
== D3DTA_SPECULAR
)
1217 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_PERSPECTIVE
);
1219 if (key
->ts
[s
].alphaarg0
== D3DTA_TEXTURE
||
1220 key
->ts
[s
].alphaarg1
== D3DTA_TEXTURE
||
1221 key
->ts
[s
].alphaarg2
== D3DTA_TEXTURE
) {
1222 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1223 ps
.vT
[s
] = ureg_DECL_fs_input(ureg
, texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
1228 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_PERSPECTIVE
);
1230 oCol
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
1232 if (key
->ts
[0].colorop
== D3DTOP_DISABLE
&&
1233 key
->ts
[0].alphaop
== D3DTOP_DISABLE
)
1234 ureg_MOV(ureg
, ps
.rCur
, ps
.vC
[0]);
1235 /* Or is it undefined then ? */
1239 for (s
= 0; s
< 8; ++s
) {
1240 unsigned colorarg
[3];
1241 unsigned alphaarg
[3];
1242 const uint8_t used_c
= ps_d3dtop_args_mask(key
->ts
[s
].colorop
);
1243 const uint8_t used_a
= ps_d3dtop_args_mask(key
->ts
[s
].alphaop
);
1244 struct ureg_dst dst
;
1245 struct ureg_src arg
[3];
1247 if (key
->ts
[s
].colorop
== D3DTOP_DISABLE
&&
1248 key
->ts
[s
].alphaop
== D3DTOP_DISABLE
)
1251 ps
.stage
.num_regs
= 3;
1253 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s
,
1254 nine_D3DTOP_to_str(key
->ts
[s
].colorop
),
1255 nine_D3DTOP_to_str(key
->ts
[s
].alphaop
));
1257 if (!ureg_src_is_undef(ps
.s
[s
])) {
1259 switch (key
->ts
[s
].textarget
) {
1260 case 0: target
= TGSI_TEXTURE_1D
; break;
1261 case 1: target
= TGSI_TEXTURE_2D
; break;
1262 case 2: target
= TGSI_TEXTURE_3D
; break;
1263 case 3: target
= TGSI_TEXTURE_CUBE
; break;
1264 /* this is a 2 bit bitfield, do I really need a default case ? */
1267 /* sample the texture */
1268 if (key
->ts
[s
].colorop
== D3DTOP_BUMPENVMAP
||
1269 key
->ts
[s
].colorop
== D3DTOP_BUMPENVMAPLUMINANCE
) {
1271 if (key
->ts
[s
].projected
)
1272 ureg_TXP(ureg
, ps
.rTex
, target
, ps
.vT
[s
], ps
.s
[s
]);
1274 ureg_TEX(ureg
, ps
.rTex
, target
, ps
.vT
[s
], ps
.s
[s
]);
1278 (key
->ts
[0].resultarg
!= 0 /* not current */ ||
1279 key
->ts
[0].colorop
== D3DTOP_DISABLE
||
1280 key
->ts
[0].alphaop
== D3DTOP_DISABLE
||
1281 key
->ts
[0].colorop
== D3DTOP_BLENDCURRENTALPHA
||
1282 key
->ts
[0].alphaop
== D3DTOP_BLENDCURRENTALPHA
||
1283 key
->ts
[0].colorarg0
== D3DTA_CURRENT
||
1284 key
->ts
[0].colorarg1
== D3DTA_CURRENT
||
1285 key
->ts
[0].colorarg2
== D3DTA_CURRENT
||
1286 key
->ts
[0].alphaarg0
== D3DTA_CURRENT
||
1287 key
->ts
[0].alphaarg1
== D3DTA_CURRENT
||
1288 key
->ts
[0].alphaarg2
== D3DTA_CURRENT
)
1290 /* Initialize D3DTA_CURRENT.
1291 * (Yes we can do this before the loop but not until
1292 * NVE4 has an instruction scheduling pass.)
1294 ureg_MOV(ureg
, ps
.rCur
, ps
.vC
[0]);
1297 dst
= ps_get_ts_dst(&ps
, key
->ts
[s
].resultarg
? D3DTA_TEMP
: D3DTA_CURRENT
);
1299 if (ps
.stage
.index_pre_mod
== ps
.stage
.index
) {
1300 ps
.rMod
= ps
.r
[ps
.stage
.num_regs
++];
1301 ureg_MUL(ureg
, ps
.rMod
, ps
.rCurSrc
, ps
.rTexSrc
);
1304 colorarg
[0] = (key
->ts
[s
].colorarg0
| ((key
->colorarg_b4
[0] >> s
) << 4) | ((key
->colorarg_b5
[0] >> s
) << 5)) & 0x3f;
1305 colorarg
[1] = (key
->ts
[s
].colorarg1
| ((key
->colorarg_b4
[1] >> s
) << 4) | ((key
->colorarg_b5
[1] >> s
) << 5)) & 0x3f;
1306 colorarg
[2] = (key
->ts
[s
].colorarg2
| ((key
->colorarg_b4
[2] >> s
) << 4) | ((key
->colorarg_b5
[2] >> s
) << 5)) & 0x3f;
1307 alphaarg
[0] = (key
->ts
[s
].alphaarg0
| ((key
->alphaarg_b4
[0] >> s
) << 4)) & 0x1f;
1308 alphaarg
[1] = (key
->ts
[s
].alphaarg1
| ((key
->alphaarg_b4
[1] >> s
) << 4)) & 0x1f;
1309 alphaarg
[2] = (key
->ts
[s
].alphaarg2
| ((key
->alphaarg_b4
[2] >> s
) << 4)) & 0x1f;
1311 if (key
->ts
[s
].colorop
!= key
->ts
[s
].alphaop
||
1312 colorarg
[0] != alphaarg
[0] ||
1313 colorarg
[1] != alphaarg
[1] ||
1314 colorarg
[2] != alphaarg
[2])
1315 dst
.WriteMask
= TGSI_WRITEMASK_XYZ
;
1317 if (used_c
& 0x1) arg
[0] = ps_get_ts_arg(&ps
, colorarg
[0]);
1318 if (used_c
& 0x2) arg
[1] = ps_get_ts_arg(&ps
, colorarg
[1]);
1319 if (used_c
& 0x4) arg
[2] = ps_get_ts_arg(&ps
, colorarg
[2]);
1320 ps_do_ts_op(&ps
, key
->ts
[s
].colorop
, dst
, arg
);
1322 if (dst
.WriteMask
!= TGSI_WRITEMASK_XYZW
) {
1323 dst
.WriteMask
= TGSI_WRITEMASK_W
;
1325 if (used_a
& 0x1) arg
[0] = ps_get_ts_arg(&ps
, alphaarg
[0]);
1326 if (used_a
& 0x2) arg
[1] = ps_get_ts_arg(&ps
, alphaarg
[1]);
1327 if (used_a
& 0x4) arg
[2] = ps_get_ts_arg(&ps
, alphaarg
[2]);
1328 ps_do_ts_op(&ps
, key
->ts
[s
].alphaop
, dst
, arg
);
1333 ureg_ADD(ureg
, ps
.rCur
, ps
.rCurSrc
, ps
.vC
[1]);
1337 if (key
->fog_mode
) {
1338 struct ureg_src vPos
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
, 0, TGSI_INTERPOLATE_LINEAR
);
1339 struct ureg_dst rFog
= ureg_writemask(ps
.rTmp
, TGSI_WRITEMASK_X
);
1340 if (key
->fog_mode
== D3DFOG_EXP
) {
1341 ureg_MUL(ureg
, rFog
, _ZZZZ(vPos
), _ZZZZ(_CONST(22)));
1342 ureg_MUL(ureg
, rFog
, _X(rFog
), ureg_imm1f(ureg
, -1.442695f
));
1343 ureg_EX2(ureg
, rFog
, _X(rFog
));
1345 if (key
->fog_mode
== D3DFOG_EXP2
) {
1346 ureg_MUL(ureg
, rFog
, _ZZZZ(vPos
), _ZZZZ(_CONST(22)));
1347 ureg_MUL(ureg
, rFog
, _X(rFog
), _X(rFog
));
1348 ureg_MUL(ureg
, rFog
, _X(rFog
), ureg_imm1f(ureg
, -1.442695f
));
1349 ureg_EX2(ureg
, rFog
, _X(rFog
));
1351 if (key
->fog_mode
== D3DFOG_LINEAR
) {
1352 ureg_SUB(ureg
, rFog
, _XXXX(_CONST(22)), _ZZZZ(vPos
));
1353 ureg_MUL(ureg
, ureg_saturate(rFog
), _X(rFog
), _YYYY(_CONST(22)));
1355 ureg_LRP(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_XYZ
), _X(rFog
), ps
.rCurSrc
, _CONST(21));
1356 ureg_MOV(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_W
), ps
.rCurSrc
);
1359 struct ureg_src vFog
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_FOG
, 0, TGSI_INTERPOLATE_PERSPECTIVE
);
1360 ureg_LRP(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_XYZ
), _XXXX(vFog
), ps
.rCurSrc
, _CONST(21));
1361 ureg_MOV(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_W
), ps
.rCurSrc
);
1363 ureg_MOV(ureg
, oCol
, ps
.rCurSrc
);
1367 nine_ureg_tgsi_dump(ureg
, FALSE
);
1368 return ureg_create_shader_and_destroy(ureg
, device
->pipe
);
1371 static struct NineVertexShader9
*
1372 nine_ff_get_vs(struct NineDevice9
*device
)
1374 const struct nine_state
*state
= &device
->state
;
1375 struct NineVertexShader9
*vs
;
1376 enum pipe_error err
;
1377 struct vs_build_ctx bld
;
1378 struct nine_ff_vs_key key
;
1380 char input_texture_coord
[8];
1382 assert(sizeof(key
) <= sizeof(key
.value32
));
1384 memset(&key
, 0, sizeof(key
));
1385 memset(&bld
, 0, sizeof(bld
));
1386 memset(&input_texture_coord
, 0, sizeof(input_texture_coord
));
1390 /* FIXME: this shouldn't be NULL, but it is on init */
1392 key
.color0in_one
= 1;
1393 key
.color1in_one
= 1;
1394 for (i
= 0; i
< state
->vdecl
->nelems
; i
++) {
1395 uint16_t usage
= state
->vdecl
->usage_map
[i
];
1396 if (usage
== NINE_DECLUSAGE_POSITIONT
)
1398 else if (usage
== NINE_DECLUSAGE_i(COLOR
, 0))
1399 key
.color0in_one
= 0;
1400 else if (usage
== NINE_DECLUSAGE_i(COLOR
, 1))
1401 key
.color1in_one
= 0;
1402 else if (usage
== NINE_DECLUSAGE_PSIZE
)
1403 key
.vertexpointsize
= 1;
1404 else if (usage
% NINE_DECLUSAGE_COUNT
== NINE_DECLUSAGE_TEXCOORD
) {
1405 s
= usage
/ NINE_DECLUSAGE_COUNT
;
1407 input_texture_coord
[s
] = 1;
1409 DBG("FF given texture coordinate >= 8. Ignoring\n");
1413 if (!key
.vertexpointsize
)
1414 key
.pointscale
= !!state
->rs
[D3DRS_POINTSCALEENABLE
];
1416 key
.lighting
= !!state
->rs
[D3DRS_LIGHTING
] && state
->ff
.num_lights_active
;
1417 key
.darkness
= !!state
->rs
[D3DRS_LIGHTING
] && !state
->ff
.num_lights_active
;
1418 if (key
.position_t
) {
1419 key
.darkness
= 0; /* |= key.lighting; */ /* XXX ? */
1422 if ((key
.lighting
| key
.darkness
) && state
->rs
[D3DRS_COLORVERTEX
]) {
1423 key
.mtl_diffuse
= state
->rs
[D3DRS_DIFFUSEMATERIALSOURCE
];
1424 key
.mtl_ambient
= state
->rs
[D3DRS_AMBIENTMATERIALSOURCE
];
1425 key
.mtl_specular
= state
->rs
[D3DRS_SPECULARMATERIALSOURCE
];
1426 key
.mtl_emissive
= state
->rs
[D3DRS_EMISSIVEMATERIALSOURCE
];
1428 key
.fog_mode
= state
->rs
[D3DRS_FOGENABLE
] ? state
->rs
[D3DRS_FOGVERTEXMODE
] : 0;
1430 key
.fog_range
= !key
.position_t
&& state
->rs
[D3DRS_RANGEFOGENABLE
];
1432 if (state
->rs
[D3DRS_VERTEXBLEND
] != D3DVBF_DISABLE
) {
1433 key
.vertexblend_indexed
= !!state
->rs
[D3DRS_INDEXEDVERTEXBLENDENABLE
];
1435 switch (state
->rs
[D3DRS_VERTEXBLEND
]) {
1436 case D3DVBF_0WEIGHTS
: key
.vertexblend
= key
.vertexblend_indexed
; break;
1437 case D3DVBF_1WEIGHTS
: key
.vertexblend
= 2; break;
1438 case D3DVBF_2WEIGHTS
: key
.vertexblend
= 3; break;
1439 case D3DVBF_3WEIGHTS
: key
.vertexblend
= 4; break;
1440 case D3DVBF_TWEENING
: key
.vertextween
= 1; break;
1442 assert(!"invalid D3DVBF");
1447 for (s
= 0; s
< 8; ++s
) {
1448 unsigned gen
= (state
->ff
.tex_stage
[s
][D3DTSS_TEXCOORDINDEX
] >> 16) + 1;
1449 unsigned dim
= MIN2(state
->ff
.tex_stage
[s
][D3DTSS_TEXTURETRANSFORMFLAGS
] & 0x7, 4);
1451 if (key
.position_t
&& gen
> NINED3DTSS_TCI_PASSTHRU
)
1452 gen
= NINED3DTSS_TCI_PASSTHRU
;
1454 if (!input_texture_coord
[s
] && gen
== NINED3DTSS_TCI_PASSTHRU
)
1455 gen
= NINED3DTSS_TCI_DISABLE
;
1457 key
.tc_gen
|= gen
<< (s
* 3);
1458 key
.tc_idx
|= (state
->ff
.tex_stage
[s
][D3DTSS_TEXCOORDINDEX
] & 7) << (s
* 3);
1459 key
.tc_dim
|= dim
<< (s
* 3);
1462 vs
= util_hash_table_get(device
->ff
.ht_vs
, &key
);
1465 NineVertexShader9_new(device
, &vs
, NULL
, nine_ff_build_vs(device
, &bld
));
1467 nine_ff_prune_vs(device
);
1471 memcpy(&vs
->ff_key
, &key
, sizeof(vs
->ff_key
));
1473 err
= util_hash_table_set(device
->ff
.ht_vs
, &vs
->ff_key
, vs
);
1474 assert(err
== PIPE_OK
);
1475 device
->ff
.num_vs
++;
1476 NineUnknown_ConvertRefToBind(NineUnknown(vs
));
1478 vs
->num_inputs
= bld
.num_inputs
;
1479 for (n
= 0; n
< bld
.num_inputs
; ++n
)
1480 vs
->input_map
[n
].ndecl
= bld
.input
[n
];
1482 vs
->position_t
= key
.position_t
;
1483 vs
->point_size
= key
.vertexpointsize
| key
.pointscale
;
1488 static struct NinePixelShader9
*
1489 nine_ff_get_ps(struct NineDevice9
*device
)
1491 struct nine_state
*state
= &device
->state
;
1492 struct NinePixelShader9
*ps
;
1493 enum pipe_error err
;
1494 struct nine_ff_ps_key key
;
1496 uint8_t sampler_mask
= 0;
1498 assert(sizeof(key
) <= sizeof(key
.value32
));
1500 memset(&key
, 0, sizeof(key
));
1501 for (s
= 0; s
< 8; ++s
) {
1502 key
.ts
[s
].colorop
= state
->ff
.tex_stage
[s
][D3DTSS_COLOROP
];
1503 key
.ts
[s
].alphaop
= state
->ff
.tex_stage
[s
][D3DTSS_ALPHAOP
];
1504 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1505 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1506 if (key
.ts
[s
].colorop
== D3DTOP_DISABLE
) {
1507 key
.ts
[s
].alphaop
= D3DTOP_DISABLE
; /* DISABLE == 1, avoid degenerate keys */
1511 if (!state
->texture
[s
] &&
1512 state
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] == D3DTA_TEXTURE
) {
1513 /* This should also disable the stage. */
1514 key
.ts
[s
].colorop
= key
.ts
[s
].alphaop
= D3DTOP_DISABLE
;
1518 if (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] == D3DTA_TEXTURE
)
1519 sampler_mask
|= (1 << s
);
1521 if (key
.ts
[s
].colorop
!= D3DTOP_DISABLE
) {
1522 uint8_t used_c
= ps_d3dtop_args_mask(key
.ts
[s
].colorop
);
1523 if (used_c
& 0x1) key
.ts
[s
].colorarg0
= state
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
];
1524 if (used_c
& 0x2) key
.ts
[s
].colorarg1
= state
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
];
1525 if (used_c
& 0x4) key
.ts
[s
].colorarg2
= state
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
];
1526 if (used_c
& 0x1) key
.colorarg_b4
[0] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] >> 4) << s
;
1527 if (used_c
& 0x1) key
.colorarg_b5
[0] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] >> 5) << s
;
1528 if (used_c
& 0x2) key
.colorarg_b4
[1] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] >> 4) << s
;
1529 if (used_c
& 0x2) key
.colorarg_b5
[1] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] >> 5) << s
;
1530 if (used_c
& 0x4) key
.colorarg_b4
[2] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] >> 4) << s
;
1531 if (used_c
& 0x4) key
.colorarg_b5
[2] |= (state
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] >> 5) << s
;
1533 if (key
.ts
[s
].alphaop
!= D3DTOP_DISABLE
) {
1534 uint8_t used_a
= ps_d3dtop_args_mask(key
.ts
[s
].alphaop
);
1535 if (used_a
& 0x1) key
.ts
[s
].alphaarg0
= state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG0
];
1536 if (used_a
& 0x2) key
.ts
[s
].alphaarg1
= state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG1
];
1537 if (used_a
& 0x4) key
.ts
[s
].alphaarg2
= state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG2
];
1538 if (used_a
& 0x1) key
.alphaarg_b4
[0] |= (state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG0
] >> 4) << s
;
1539 if (used_a
& 0x2) key
.alphaarg_b4
[1] |= (state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG1
] >> 4) << s
;
1540 if (used_a
& 0x4) key
.alphaarg_b4
[2] |= (state
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG2
] >> 4) << s
;
1542 key
.ts
[s
].resultarg
= state
->ff
.tex_stage
[s
][D3DTSS_RESULTARG
] == D3DTA_TEMP
;
1544 key
.ts
[s
].projected
= !!(state
->ff
.tex_stage
[s
][D3DTSS_TEXTURETRANSFORMFLAGS
] & D3DTTFF_PROJECTED
);
1546 if (state
->texture
[s
]) {
1547 switch (state
->texture
[s
]->base
.type
) {
1548 case D3DRTYPE_TEXTURE
: key
.ts
[s
].textarget
= 1; break;
1549 case D3DRTYPE_VOLUMETEXTURE
: key
.ts
[s
].textarget
= 2; break;
1550 case D3DRTYPE_CUBETEXTURE
: key
.ts
[s
].textarget
= 3; break;
1552 assert(!"unexpected texture type");
1556 key
.ts
[s
].textarget
= 1;
1560 key
.ts
[s
].colorop
= key
.ts
[s
].alphaop
= D3DTOP_DISABLE
;
1561 if (state
->rs
[D3DRS_FOGENABLE
])
1562 key
.fog_mode
= state
->rs
[D3DRS_FOGTABLEMODE
];
1564 ps
= util_hash_table_get(device
->ff
.ht_ps
, &key
);
1567 NinePixelShader9_new(device
, &ps
, NULL
, nine_ff_build_ps(device
, &key
));
1569 nine_ff_prune_ps(device
);
1571 memcpy(&ps
->ff_key
, &key
, sizeof(ps
->ff_key
));
1573 err
= util_hash_table_set(device
->ff
.ht_ps
, &ps
->ff_key
, ps
);
1574 assert(err
== PIPE_OK
);
1575 device
->ff
.num_ps
++;
1576 NineUnknown_ConvertRefToBind(NineUnknown(ps
));
1579 ps
->sampler_mask
= sampler_mask
;
1584 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1585 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1587 nine_ff_load_vs_transforms(struct NineDevice9
*device
)
1589 struct nine_state
*state
= &device
->state
;
1591 D3DMATRIX
*M
= (D3DMATRIX
*)device
->ff
.vs_const
;
1594 /* TODO: make this nicer, and only upload the ones we need */
1595 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1597 if (IS_D3DTS_DIRTY(state
, WORLD
) ||
1598 IS_D3DTS_DIRTY(state
, VIEW
) ||
1599 IS_D3DTS_DIRTY(state
, PROJECTION
)) {
1600 /* WVP, WV matrices */
1601 nine_d3d_matrix_matrix_mul(&M
[1], GET_D3DTS(WORLD
), GET_D3DTS(VIEW
));
1602 nine_d3d_matrix_matrix_mul(&M
[0], &M
[1], GET_D3DTS(PROJECTION
));
1604 /* normal matrix == transpose(inverse(WV)) */
1605 nine_d3d_matrix_inverse_3x3(&T
, &M
[1]);
1606 nine_d3d_matrix_transpose(&M
[4], &T
);
1609 nine_d3d_matrix_matrix_mul(&M
[2], GET_D3DTS(VIEW
), GET_D3DTS(PROJECTION
));
1611 /* V and W matrix */
1612 M
[3] = *GET_D3DTS(VIEW
);
1613 M
[56] = *GET_D3DTS(WORLD
);
1616 if (state
->rs
[D3DRS_VERTEXBLEND
] != D3DVBF_DISABLE
) {
1617 /* load other world matrices */
1618 for (i
= 1; i
<= 7; ++i
)
1619 M
[56 + i
] = *GET_D3DTS(WORLDMATRIX(i
));
1622 device
->ff
.vs_const
[30 * 4] = asfloat(state
->rs
[D3DRS_TWEENFACTOR
]);
1626 nine_ff_load_lights(struct NineDevice9
*device
)
1628 struct nine_state
*state
= &device
->state
;
1629 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
1632 if (state
->changed
.group
& NINE_STATE_FF_MATERIAL
) {
1633 const D3DMATERIAL9
*mtl
= &state
->ff
.material
;
1635 memcpy(&dst
[20], &mtl
->Diffuse
, 4 * sizeof(float));
1636 memcpy(&dst
[21], &mtl
->Ambient
, 4 * sizeof(float));
1637 memcpy(&dst
[22], &mtl
->Specular
, 4 * sizeof(float));
1638 dst
[23].x
= mtl
->Power
;
1639 memcpy(&dst
[24], &mtl
->Emissive
, 4 * sizeof(float));
1640 d3dcolor_to_rgba(&dst
[25].x
, state
->rs
[D3DRS_AMBIENT
]);
1641 dst
[19].x
= dst
[25].x
* mtl
->Ambient
.r
+ mtl
->Emissive
.r
;
1642 dst
[19].y
= dst
[25].y
* mtl
->Ambient
.g
+ mtl
->Emissive
.g
;
1643 dst
[19].z
= dst
[25].z
* mtl
->Ambient
.b
+ mtl
->Emissive
.b
;
1644 dst
[19].w
= mtl
->Ambient
.a
+ mtl
->Emissive
.a
;
1647 if (!(state
->changed
.group
& NINE_STATE_FF_LIGHTING
))
1650 for (l
= 0; l
< state
->ff
.num_lights_active
; ++l
) {
1651 const D3DLIGHT9
*light
= &state
->ff
.light
[state
->ff
.active_light
[l
]];
1653 dst
[32 + l
* 8].x
= light
->Type
;
1654 dst
[32 + l
* 8].y
= light
->Attenuation0
;
1655 dst
[32 + l
* 8].z
= light
->Attenuation1
;
1656 dst
[32 + l
* 8].w
= light
->Attenuation2
;
1657 memcpy(&dst
[33 + l
* 8].x
, &light
->Diffuse
, sizeof(light
->Diffuse
));
1658 memcpy(&dst
[34 + l
* 8].x
, &light
->Specular
, sizeof(light
->Specular
));
1659 memcpy(&dst
[35 + l
* 8].x
, &light
->Ambient
, sizeof(light
->Ambient
));
1660 nine_d3d_vector4_matrix_mul((D3DVECTOR
*)&dst
[36 + l
* 8].x
, &light
->Position
, GET_D3DTS(VIEW
));
1661 nine_d3d_vector3_matrix_mul((D3DVECTOR
*)&dst
[37 + l
* 8].x
, &light
->Direction
, GET_D3DTS(VIEW
));
1662 dst
[36 + l
* 8].w
= light
->Type
== D3DLIGHT_DIRECTIONAL
? 1e9f
: light
->Range
;
1663 dst
[37 + l
* 8].w
= light
->Falloff
;
1664 dst
[38 + l
* 8].x
= cosf(light
->Theta
* 0.5f
);
1665 dst
[38 + l
* 8].y
= cosf(light
->Phi
* 0.5f
);
1666 dst
[38 + l
* 8].z
= 1.0f
/ (dst
[38 + l
* 8].x
- dst
[38 + l
* 8].y
);
1667 dst
[39 + l
* 8].w
= (l
+ 1) == state
->ff
.num_lights_active
;
1672 nine_ff_load_point_and_fog_params(struct NineDevice9
*device
)
1674 const struct nine_state
*state
= &device
->state
;
1675 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
1677 if (!(state
->changed
.group
& NINE_STATE_FF_OTHER
))
1679 dst
[26].x
= asfloat(state
->rs
[D3DRS_POINTSIZE_MIN
]);
1680 dst
[26].y
= asfloat(state
->rs
[D3DRS_POINTSIZE_MAX
]);
1681 dst
[26].z
= asfloat(state
->rs
[D3DRS_POINTSIZE
]);
1682 dst
[26].w
= asfloat(state
->rs
[D3DRS_POINTSCALE_A
]);
1683 dst
[27].x
= asfloat(state
->rs
[D3DRS_POINTSCALE_B
]);
1684 dst
[27].y
= asfloat(state
->rs
[D3DRS_POINTSCALE_C
]);
1685 dst
[28].x
= asfloat(state
->rs
[D3DRS_FOGEND
]);
1686 dst
[28].y
= 1.0f
/ (asfloat(state
->rs
[D3DRS_FOGEND
]) - asfloat(state
->rs
[D3DRS_FOGSTART
]));
1687 if (isinf(dst
[28].y
))
1689 dst
[28].z
= asfloat(state
->rs
[D3DRS_FOGDENSITY
]);
1690 d3dcolor_to_rgba(&dst
[29].x
, state
->rs
[D3DRS_FOGCOLOR
]);
1694 nine_ff_load_tex_matrices(struct NineDevice9
*device
)
1696 struct nine_state
*state
= &device
->state
;
1697 D3DMATRIX
*M
= (D3DMATRIX
*)device
->ff
.vs_const
;
1700 if (!(state
->ff
.changed
.transform
[0] & 0xff0000))
1702 for (s
= 0; s
< 8; ++s
) {
1703 if (IS_D3DTS_DIRTY(state
, TEXTURE0
+ s
))
1704 M
[32 + s
] = *nine_state_access_transform(state
, D3DTS_TEXTURE0
+ s
, FALSE
);
1709 nine_ff_load_ps_params(struct NineDevice9
*device
)
1711 const struct nine_state
*state
= &device
->state
;
1712 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.ps_const
;
1715 if (!(state
->changed
.group
& (NINE_STATE_FF_PSSTAGES
| NINE_STATE_FF_OTHER
)))
1718 for (s
= 0; s
< 8; ++s
)
1719 d3dcolor_to_rgba(&dst
[s
].x
, state
->ff
.tex_stage
[s
][D3DTSS_CONSTANT
]);
1721 for (s
= 0; s
< 8; ++s
) {
1722 dst
[8 + s
].x
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT00
]);
1723 dst
[8 + s
].y
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT01
]);
1724 dst
[8 + s
].z
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT10
]);
1725 dst
[8 + s
].w
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT11
]);
1727 dst
[8 + s
/ 2].z
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLSCALE
]);
1728 dst
[8 + s
/ 2].w
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLOFFSET
]);
1730 dst
[8 + s
/ 2].x
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLSCALE
]);
1731 dst
[8 + s
/ 2].y
= asfloat(state
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLOFFSET
]);
1735 d3dcolor_to_rgba(&dst
[20].x
, state
->rs
[D3DRS_TEXTUREFACTOR
]);
1736 d3dcolor_to_rgba(&dst
[21].x
, state
->rs
[D3DRS_FOGCOLOR
]);
1737 dst
[22].x
= asfloat(state
->rs
[D3DRS_FOGEND
]);
1738 dst
[22].y
= 1.0f
/ (asfloat(state
->rs
[D3DRS_FOGEND
]) - asfloat(state
->rs
[D3DRS_FOGSTART
]));
1739 dst
[22].z
= asfloat(state
->rs
[D3DRS_FOGDENSITY
]);
1743 nine_ff_load_viewport_info(struct NineDevice9
*device
)
1745 D3DVIEWPORT9
*viewport
= &device
->state
.viewport
;
1746 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
1747 float diffZ
= viewport
->MaxZ
- viewport
->MinZ
;
1749 /* Note: the other functions avoids to fill the const again if nothing changed.
1750 * But we don't have much to fill, and adding code to allow that may be complex
1751 * so just fill it always */
1752 dst
[100].x
= 2.0f
/ (float)(viewport
->Width
);
1753 dst
[100].y
= 2.0f
/ (float)(viewport
->Height
);
1754 dst
[100].z
= (diffZ
== 0.0f
) ? 0.0f
: (1.0f
/ diffZ
);
1755 dst
[101].x
= (float)(viewport
->X
);
1756 dst
[101].y
= (float)(viewport
->Y
);
1757 dst
[101].z
= (float)(viewport
->MinZ
);
1761 nine_ff_update(struct NineDevice9
*device
)
1763 struct pipe_context
*pipe
= device
->pipe
;
1764 struct nine_state
*state
= &device
->state
;
1766 DBG("vs=%p ps=%p\n", device
->state
.vs
, device
->state
.ps
);
1768 /* NOTE: the only reference belongs to the hash table */
1769 if (!device
->state
.vs
)
1770 device
->ff
.vs
= nine_ff_get_vs(device
);
1771 if (!device
->state
.ps
)
1772 device
->ff
.ps
= nine_ff_get_ps(device
);
1774 if (!device
->state
.vs
) {
1775 if (device
->state
.ff
.clobber
.vs_const
) {
1776 device
->state
.ff
.clobber
.vs_const
= FALSE
;
1777 device
->state
.changed
.group
|=
1778 NINE_STATE_FF_VSTRANSF
|
1779 NINE_STATE_FF_MATERIAL
|
1780 NINE_STATE_FF_LIGHTING
|
1781 NINE_STATE_FF_OTHER
;
1782 device
->state
.ff
.changed
.transform
[0] |= 0xff000c;
1783 device
->state
.ff
.changed
.transform
[8] |= 0xff;
1785 nine_ff_load_vs_transforms(device
);
1786 nine_ff_load_tex_matrices(device
);
1787 nine_ff_load_lights(device
);
1788 nine_ff_load_point_and_fog_params(device
);
1789 nine_ff_load_viewport_info(device
);
1791 memset(state
->ff
.changed
.transform
, 0, sizeof(state
->ff
.changed
.transform
));
1793 device
->state
.changed
.group
|= NINE_STATE_VS
;
1794 device
->state
.changed
.group
|= NINE_STATE_VS_CONST
;
1796 if (device
->prefer_user_constbuf
) {
1797 struct pipe_context
*pipe
= device
->pipe
;
1798 struct pipe_constant_buffer cb
;
1799 cb
.buffer_offset
= 0;
1801 cb
.user_buffer
= device
->ff
.vs_const
;
1802 cb
.buffer_size
= NINE_FF_NUM_VS_CONST
* 4 * sizeof(float);
1803 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_VERTEX
, 0, &cb
);
1805 struct pipe_box box
;
1806 u_box_1d(0, NINE_FF_NUM_VS_CONST
* 4 * sizeof(float), &box
);
1807 pipe
->transfer_inline_write(pipe
, device
->constbuf_vs
, 0,
1809 device
->ff
.vs_const
, 0, 0);
1810 nine_ranges_insert(&device
->state
.changed
.vs_const_f
, 0, NINE_FF_NUM_VS_CONST
,
1811 &device
->range_pool
);
1815 if (!device
->state
.ps
) {
1816 if (device
->state
.ff
.clobber
.ps_const
) {
1817 device
->state
.ff
.clobber
.ps_const
= FALSE
;
1818 device
->state
.changed
.group
|=
1819 NINE_STATE_FF_PSSTAGES
|
1820 NINE_STATE_FF_OTHER
;
1822 nine_ff_load_ps_params(device
);
1824 device
->state
.changed
.group
|= NINE_STATE_PS
;
1825 device
->state
.changed
.group
|= NINE_STATE_PS_CONST
;
1827 if (device
->prefer_user_constbuf
) {
1828 struct pipe_context
*pipe
= device
->pipe
;
1829 struct pipe_constant_buffer cb
;
1830 cb
.buffer_offset
= 0;
1832 cb
.user_buffer
= device
->ff
.ps_const
;
1833 cb
.buffer_size
= NINE_FF_NUM_PS_CONST
* 4 * sizeof(float);
1834 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_FRAGMENT
, 0, &cb
);
1836 struct pipe_box box
;
1837 u_box_1d(0, NINE_FF_NUM_PS_CONST
* 4 * sizeof(float), &box
);
1838 pipe
->transfer_inline_write(pipe
, device
->constbuf_ps
, 0,
1840 device
->ff
.ps_const
, 0, 0);
1841 nine_ranges_insert(&device
->state
.changed
.ps_const_f
, 0, NINE_FF_NUM_PS_CONST
,
1842 &device
->range_pool
);
1846 device
->state
.changed
.group
&= ~NINE_STATE_FF
;
1851 nine_ff_init(struct NineDevice9
*device
)
1853 device
->ff
.ht_vs
= util_hash_table_create(nine_ff_vs_key_hash
,
1854 nine_ff_vs_key_comp
);
1855 device
->ff
.ht_ps
= util_hash_table_create(nine_ff_ps_key_hash
,
1856 nine_ff_ps_key_comp
);
1858 device
->ff
.ht_fvf
= util_hash_table_create(nine_ff_fvf_key_hash
,
1859 nine_ff_fvf_key_comp
);
1861 device
->ff
.vs_const
= CALLOC(NINE_FF_NUM_VS_CONST
, 4 * sizeof(float));
1862 device
->ff
.ps_const
= CALLOC(NINE_FF_NUM_PS_CONST
, 4 * sizeof(float));
1864 return device
->ff
.ht_vs
&& device
->ff
.ht_ps
&&
1865 device
->ff
.ht_fvf
&&
1866 device
->ff
.vs_const
&& device
->ff
.ps_const
;
1869 static enum pipe_error
nine_ff_ht_delete_cb(void *key
, void *value
, void *data
)
1871 NineUnknown_Unbind(NineUnknown(value
));
1876 nine_ff_fini(struct NineDevice9
*device
)
1878 if (device
->ff
.ht_vs
) {
1879 util_hash_table_foreach(device
->ff
.ht_vs
, nine_ff_ht_delete_cb
, NULL
);
1880 util_hash_table_destroy(device
->ff
.ht_vs
);
1882 if (device
->ff
.ht_ps
) {
1883 util_hash_table_foreach(device
->ff
.ht_ps
, nine_ff_ht_delete_cb
, NULL
);
1884 util_hash_table_destroy(device
->ff
.ht_ps
);
1886 if (device
->ff
.ht_fvf
) {
1887 util_hash_table_foreach(device
->ff
.ht_fvf
, nine_ff_ht_delete_cb
, NULL
);
1888 util_hash_table_destroy(device
->ff
.ht_fvf
);
1890 device
->ff
.vs
= NULL
; /* destroyed by unbinding from hash table */
1891 device
->ff
.ps
= NULL
;
1893 FREE(device
->ff
.vs_const
);
1894 FREE(device
->ff
.ps_const
);
1898 nine_ff_prune_vs(struct NineDevice9
*device
)
1900 if (device
->ff
.num_vs
> 100) {
1901 /* could destroy the bound one here, so unbind */
1902 device
->pipe
->bind_vs_state(device
->pipe
, NULL
);
1903 util_hash_table_foreach(device
->ff
.ht_vs
, nine_ff_ht_delete_cb
, NULL
);
1904 util_hash_table_clear(device
->ff
.ht_vs
);
1905 device
->ff
.num_vs
= 0;
1906 device
->state
.changed
.group
|= NINE_STATE_VS
;
1910 nine_ff_prune_ps(struct NineDevice9
*device
)
1912 if (device
->ff
.num_ps
> 100) {
1913 /* could destroy the bound one here, so unbind */
1914 device
->pipe
->bind_fs_state(device
->pipe
, NULL
);
1915 util_hash_table_foreach(device
->ff
.ht_ps
, nine_ff_ht_delete_cb
, NULL
);
1916 util_hash_table_clear(device
->ff
.ht_ps
);
1917 device
->ff
.num_ps
= 0;
1918 device
->state
.changed
.group
|= NINE_STATE_PS
;
1922 /* ========================================================================== */
1924 /* Matrix multiplication:
1926 * in memory: 0 1 2 3 (row major)
1932 * r0 = (r0 * cA) (r0 * cB) . .
1933 * r1 = (r1 * cA) (r1 * cB)
1937 * r: (11) (12) (13) (14)
1938 * (21) (22) (23) (24)
1939 * (31) (32) (33) (34)
1940 * (41) (42) (43) (44)
1948 * t.xyzw = MUL(v.xxxx, r[0]);
1949 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
1950 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
1951 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
1953 * v.x = DP4(v, c[0]);
1954 * v.y = DP4(v, c[1]);
1955 * v.z = DP4(v, c[2]);
1956 * v.w = DP4(v, c[3]) = 1
1961 nine_D3DMATRIX_print(const D3DMATRIX *M)
1963 DBG("\n(%f %f %f %f)\n"
1967 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
1968 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
1969 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
1970 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
1975 nine_DP4_row_col(const D3DMATRIX
*A
, int r
, const D3DMATRIX
*B
, int c
)
1977 return A
->m
[r
][0] * B
->m
[0][c
] +
1978 A
->m
[r
][1] * B
->m
[1][c
] +
1979 A
->m
[r
][2] * B
->m
[2][c
] +
1980 A
->m
[r
][3] * B
->m
[3][c
];
1984 nine_DP4_vec_col(const D3DVECTOR
*v
, const D3DMATRIX
*M
, int c
)
1986 return v
->x
* M
->m
[0][c
] +
1993 nine_DP3_vec_col(const D3DVECTOR
*v
, const D3DMATRIX
*M
, int c
)
1995 return v
->x
* M
->m
[0][c
] +
2001 nine_d3d_matrix_matrix_mul(D3DMATRIX
*D
, const D3DMATRIX
*L
, const D3DMATRIX
*R
)
2003 D
->_11
= nine_DP4_row_col(L
, 0, R
, 0);
2004 D
->_12
= nine_DP4_row_col(L
, 0, R
, 1);
2005 D
->_13
= nine_DP4_row_col(L
, 0, R
, 2);
2006 D
->_14
= nine_DP4_row_col(L
, 0, R
, 3);
2008 D
->_21
= nine_DP4_row_col(L
, 1, R
, 0);
2009 D
->_22
= nine_DP4_row_col(L
, 1, R
, 1);
2010 D
->_23
= nine_DP4_row_col(L
, 1, R
, 2);
2011 D
->_24
= nine_DP4_row_col(L
, 1, R
, 3);
2013 D
->_31
= nine_DP4_row_col(L
, 2, R
, 0);
2014 D
->_32
= nine_DP4_row_col(L
, 2, R
, 1);
2015 D
->_33
= nine_DP4_row_col(L
, 2, R
, 2);
2016 D
->_34
= nine_DP4_row_col(L
, 2, R
, 3);
2018 D
->_41
= nine_DP4_row_col(L
, 3, R
, 0);
2019 D
->_42
= nine_DP4_row_col(L
, 3, R
, 1);
2020 D
->_43
= nine_DP4_row_col(L
, 3, R
, 2);
2021 D
->_44
= nine_DP4_row_col(L
, 3, R
, 3);
2025 nine_d3d_vector4_matrix_mul(D3DVECTOR
*d
, const D3DVECTOR
*v
, const D3DMATRIX
*M
)
2027 d
->x
= nine_DP4_vec_col(v
, M
, 0);
2028 d
->y
= nine_DP4_vec_col(v
, M
, 1);
2029 d
->z
= nine_DP4_vec_col(v
, M
, 2);
2033 nine_d3d_vector3_matrix_mul(D3DVECTOR
*d
, const D3DVECTOR
*v
, const D3DMATRIX
*M
)
2035 d
->x
= nine_DP3_vec_col(v
, M
, 0);
2036 d
->y
= nine_DP3_vec_col(v
, M
, 1);
2037 d
->z
= nine_DP3_vec_col(v
, M
, 2);
2041 nine_d3d_matrix_transpose(D3DMATRIX
*D
, const D3DMATRIX
*M
)
2044 for (i
= 0; i
< 4; ++i
)
2045 for (j
= 0; j
< 4; ++j
)
2046 D
->m
[i
][j
] = M
->m
[j
][i
];
2049 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2050 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2051 if (t > 0.0f) pos += t; else neg += t; } while(0)
2053 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2054 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2055 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2057 nine_d3d_matrix_det(const D3DMATRIX
*M
)
2062 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2063 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2064 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2066 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2067 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2068 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2070 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2071 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2072 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2074 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2075 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2076 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2078 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2079 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2080 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2082 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2083 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2084 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2086 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2087 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2088 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2090 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2091 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2092 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2097 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2098 * I have no idea where this code came from.
2101 nine_d3d_matrix_inverse(D3DMATRIX
*D
, const D3DMATRIX
*M
)
2107 M
->m
[1][1] * M
->m
[2][2] * M
->m
[3][3] -
2108 M
->m
[1][1] * M
->m
[3][2] * M
->m
[2][3] -
2109 M
->m
[1][2] * M
->m
[2][1] * M
->m
[3][3] +
2110 M
->m
[1][2] * M
->m
[3][1] * M
->m
[2][3] +
2111 M
->m
[1][3] * M
->m
[2][1] * M
->m
[3][2] -
2112 M
->m
[1][3] * M
->m
[3][1] * M
->m
[2][2];
2115 -M
->m
[0][1] * M
->m
[2][2] * M
->m
[3][3] +
2116 M
->m
[0][1] * M
->m
[3][2] * M
->m
[2][3] +
2117 M
->m
[0][2] * M
->m
[2][1] * M
->m
[3][3] -
2118 M
->m
[0][2] * M
->m
[3][1] * M
->m
[2][3] -
2119 M
->m
[0][3] * M
->m
[2][1] * M
->m
[3][2] +
2120 M
->m
[0][3] * M
->m
[3][1] * M
->m
[2][2];
2123 M
->m
[0][1] * M
->m
[1][2] * M
->m
[3][3] -
2124 M
->m
[0][1] * M
->m
[3][2] * M
->m
[1][3] -
2125 M
->m
[0][2] * M
->m
[1][1] * M
->m
[3][3] +
2126 M
->m
[0][2] * M
->m
[3][1] * M
->m
[1][3] +
2127 M
->m
[0][3] * M
->m
[1][1] * M
->m
[3][2] -
2128 M
->m
[0][3] * M
->m
[3][1] * M
->m
[1][2];
2131 -M
->m
[0][1] * M
->m
[1][2] * M
->m
[2][3] +
2132 M
->m
[0][1] * M
->m
[2][2] * M
->m
[1][3] +
2133 M
->m
[0][2] * M
->m
[1][1] * M
->m
[2][3] -
2134 M
->m
[0][2] * M
->m
[2][1] * M
->m
[1][3] -
2135 M
->m
[0][3] * M
->m
[1][1] * M
->m
[2][2] +
2136 M
->m
[0][3] * M
->m
[2][1] * M
->m
[1][2];
2139 -M
->m
[1][0] * M
->m
[2][2] * M
->m
[3][3] +
2140 M
->m
[1][0] * M
->m
[3][2] * M
->m
[2][3] +
2141 M
->m
[1][2] * M
->m
[2][0] * M
->m
[3][3] -
2142 M
->m
[1][2] * M
->m
[3][0] * M
->m
[2][3] -
2143 M
->m
[1][3] * M
->m
[2][0] * M
->m
[3][2] +
2144 M
->m
[1][3] * M
->m
[3][0] * M
->m
[2][2];
2147 M
->m
[0][0] * M
->m
[2][2] * M
->m
[3][3] -
2148 M
->m
[0][0] * M
->m
[3][2] * M
->m
[2][3] -
2149 M
->m
[0][2] * M
->m
[2][0] * M
->m
[3][3] +
2150 M
->m
[0][2] * M
->m
[3][0] * M
->m
[2][3] +
2151 M
->m
[0][3] * M
->m
[2][0] * M
->m
[3][2] -
2152 M
->m
[0][3] * M
->m
[3][0] * M
->m
[2][2];
2155 -M
->m
[0][0] * M
->m
[1][2] * M
->m
[3][3] +
2156 M
->m
[0][0] * M
->m
[3][2] * M
->m
[1][3] +
2157 M
->m
[0][2] * M
->m
[1][0] * M
->m
[3][3] -
2158 M
->m
[0][2] * M
->m
[3][0] * M
->m
[1][3] -
2159 M
->m
[0][3] * M
->m
[1][0] * M
->m
[3][2] +
2160 M
->m
[0][3] * M
->m
[3][0] * M
->m
[1][2];
2163 M
->m
[0][0] * M
->m
[1][2] * M
->m
[2][3] -
2164 M
->m
[0][0] * M
->m
[2][2] * M
->m
[1][3] -
2165 M
->m
[0][2] * M
->m
[1][0] * M
->m
[2][3] +
2166 M
->m
[0][2] * M
->m
[2][0] * M
->m
[1][3] +
2167 M
->m
[0][3] * M
->m
[1][0] * M
->m
[2][2] -
2168 M
->m
[0][3] * M
->m
[2][0] * M
->m
[1][2];
2171 M
->m
[1][0] * M
->m
[2][1] * M
->m
[3][3] -
2172 M
->m
[1][0] * M
->m
[3][1] * M
->m
[2][3] -
2173 M
->m
[1][1] * M
->m
[2][0] * M
->m
[3][3] +
2174 M
->m
[1][1] * M
->m
[3][0] * M
->m
[2][3] +
2175 M
->m
[1][3] * M
->m
[2][0] * M
->m
[3][1] -
2176 M
->m
[1][3] * M
->m
[3][0] * M
->m
[2][1];
2179 -M
->m
[0][0] * M
->m
[2][1] * M
->m
[3][3] +
2180 M
->m
[0][0] * M
->m
[3][1] * M
->m
[2][3] +
2181 M
->m
[0][1] * M
->m
[2][0] * M
->m
[3][3] -
2182 M
->m
[0][1] * M
->m
[3][0] * M
->m
[2][3] -
2183 M
->m
[0][3] * M
->m
[2][0] * M
->m
[3][1] +
2184 M
->m
[0][3] * M
->m
[3][0] * M
->m
[2][1];
2187 M
->m
[0][0] * M
->m
[1][1] * M
->m
[3][3] -
2188 M
->m
[0][0] * M
->m
[3][1] * M
->m
[1][3] -
2189 M
->m
[0][1] * M
->m
[1][0] * M
->m
[3][3] +
2190 M
->m
[0][1] * M
->m
[3][0] * M
->m
[1][3] +
2191 M
->m
[0][3] * M
->m
[1][0] * M
->m
[3][1] -
2192 M
->m
[0][3] * M
->m
[3][0] * M
->m
[1][1];
2195 -M
->m
[0][0] * M
->m
[1][1] * M
->m
[2][3] +
2196 M
->m
[0][0] * M
->m
[2][1] * M
->m
[1][3] +
2197 M
->m
[0][1] * M
->m
[1][0] * M
->m
[2][3] -
2198 M
->m
[0][1] * M
->m
[2][0] * M
->m
[1][3] -
2199 M
->m
[0][3] * M
->m
[1][0] * M
->m
[2][1] +
2200 M
->m
[0][3] * M
->m
[2][0] * M
->m
[1][1];
2203 -M
->m
[1][0] * M
->m
[2][1] * M
->m
[3][2] +
2204 M
->m
[1][0] * M
->m
[3][1] * M
->m
[2][2] +
2205 M
->m
[1][1] * M
->m
[2][0] * M
->m
[3][2] -
2206 M
->m
[1][1] * M
->m
[3][0] * M
->m
[2][2] -
2207 M
->m
[1][2] * M
->m
[2][0] * M
->m
[3][1] +
2208 M
->m
[1][2] * M
->m
[3][0] * M
->m
[2][1];
2211 M
->m
[0][0] * M
->m
[2][1] * M
->m
[3][2] -
2212 M
->m
[0][0] * M
->m
[3][1] * M
->m
[2][2] -
2213 M
->m
[0][1] * M
->m
[2][0] * M
->m
[3][2] +
2214 M
->m
[0][1] * M
->m
[3][0] * M
->m
[2][2] +
2215 M
->m
[0][2] * M
->m
[2][0] * M
->m
[3][1] -
2216 M
->m
[0][2] * M
->m
[3][0] * M
->m
[2][1];
2219 -M
->m
[0][0] * M
->m
[1][1] * M
->m
[3][2] +
2220 M
->m
[0][0] * M
->m
[3][1] * M
->m
[1][2] +
2221 M
->m
[0][1] * M
->m
[1][0] * M
->m
[3][2] -
2222 M
->m
[0][1] * M
->m
[3][0] * M
->m
[1][2] -
2223 M
->m
[0][2] * M
->m
[1][0] * M
->m
[3][1] +
2224 M
->m
[0][2] * M
->m
[3][0] * M
->m
[1][1];
2227 M
->m
[0][0] * M
->m
[1][1] * M
->m
[2][2] -
2228 M
->m
[0][0] * M
->m
[2][1] * M
->m
[1][2] -
2229 M
->m
[0][1] * M
->m
[1][0] * M
->m
[2][2] +
2230 M
->m
[0][1] * M
->m
[2][0] * M
->m
[1][2] +
2231 M
->m
[0][2] * M
->m
[1][0] * M
->m
[2][1] -
2232 M
->m
[0][2] * M
->m
[2][0] * M
->m
[1][1];
2235 M
->m
[0][0] * D
->m
[0][0] +
2236 M
->m
[1][0] * D
->m
[0][1] +
2237 M
->m
[2][0] * D
->m
[0][2] +
2238 M
->m
[3][0] * D
->m
[0][3];
2242 for (i
= 0; i
< 4; i
++)
2243 for (k
= 0; k
< 4; k
++)
2250 nine_d3d_matrix_matrix_mul(&I
, D
, M
);
2252 for (i
= 0; i
< 4; ++i
)
2253 for (k
= 0; k
< 4; ++k
)
2254 if (fabsf(I
.m
[i
][k
] - (float)(i
== k
)) > 1e-3)
2255 DBG("Matrix inversion check FAILED !\n");
2260 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2262 nine_d3d_matrix_inverse_3x3(D3DMATRIX
*D
, const D3DMATRIX
*M
)
2267 for (i
= 0; i
< 3; ++i
)
2268 for (j
= 0; j
< 3; ++j
)
2269 T
.m
[i
][j
] = M
->m
[i
][j
];
2270 for (i
= 0; i
< 3; ++i
) {
2276 nine_d3d_matrix_inverse(D
, &T
);