2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "vertexdeclaration9.h"
24 #include "vertexbuffer9.h"
26 #include "nine_helpers.h"
27 #include "nine_shader.h"
29 #include "pipe/p_format.h"
30 #include "pipe/p_context.h"
31 #include "util/u_math.h"
32 #include "util/u_format.h"
33 #include "translate/translate.h"
35 #define DBG_CHANNEL DBG_VERTEXDECLARATION
37 static inline enum pipe_format
decltype_format(BYTE type
)
40 case D3DDECLTYPE_FLOAT1
: return PIPE_FORMAT_R32_FLOAT
;
41 case D3DDECLTYPE_FLOAT2
: return PIPE_FORMAT_R32G32_FLOAT
;
42 case D3DDECLTYPE_FLOAT3
: return PIPE_FORMAT_R32G32B32_FLOAT
;
43 case D3DDECLTYPE_FLOAT4
: return PIPE_FORMAT_R32G32B32A32_FLOAT
;
44 case D3DDECLTYPE_D3DCOLOR
: return PIPE_FORMAT_B8G8R8A8_UNORM
;
45 case D3DDECLTYPE_UBYTE4
: return PIPE_FORMAT_R8G8B8A8_USCALED
;
46 case D3DDECLTYPE_SHORT2
: return PIPE_FORMAT_R16G16_SSCALED
;
47 case D3DDECLTYPE_SHORT4
: return PIPE_FORMAT_R16G16B16A16_SSCALED
;
48 case D3DDECLTYPE_UBYTE4N
: return PIPE_FORMAT_R8G8B8A8_UNORM
;
49 case D3DDECLTYPE_SHORT2N
: return PIPE_FORMAT_R16G16_SNORM
;
50 case D3DDECLTYPE_SHORT4N
: return PIPE_FORMAT_R16G16B16A16_SNORM
;
51 case D3DDECLTYPE_USHORT2N
: return PIPE_FORMAT_R16G16_UNORM
;
52 case D3DDECLTYPE_USHORT4N
: return PIPE_FORMAT_R16G16B16A16_UNORM
;
53 case D3DDECLTYPE_UDEC3
: return PIPE_FORMAT_R10G10B10X2_USCALED
;
54 case D3DDECLTYPE_DEC3N
: return PIPE_FORMAT_R10G10B10X2_SNORM
;
55 case D3DDECLTYPE_FLOAT16_2
: return PIPE_FORMAT_R16G16_FLOAT
;
56 case D3DDECLTYPE_FLOAT16_4
: return PIPE_FORMAT_R16G16B16A16_FLOAT
;
58 assert(!"Implementation error !");
60 return PIPE_FORMAT_NONE
;
63 static inline unsigned decltype_size(BYTE type
)
66 case D3DDECLTYPE_FLOAT1
: return 1 * sizeof(float);
67 case D3DDECLTYPE_FLOAT2
: return 2 * sizeof(float);
68 case D3DDECLTYPE_FLOAT3
: return 3 * sizeof(float);
69 case D3DDECLTYPE_FLOAT4
: return 4 * sizeof(float);
70 case D3DDECLTYPE_D3DCOLOR
: return 1 * sizeof(DWORD
);
71 case D3DDECLTYPE_UBYTE4
: return 4 * sizeof(BYTE
);
72 case D3DDECLTYPE_SHORT2
: return 2 * sizeof(short);
73 case D3DDECLTYPE_SHORT4
: return 4 * sizeof(short);
74 case D3DDECLTYPE_UBYTE4N
: return 4 * sizeof(BYTE
);
75 case D3DDECLTYPE_SHORT2N
: return 2 * sizeof(short);
76 case D3DDECLTYPE_SHORT4N
: return 4 * sizeof(short);
77 case D3DDECLTYPE_USHORT2N
: return 2 * sizeof(short);
78 case D3DDECLTYPE_USHORT4N
: return 4 * sizeof(short);
79 case D3DDECLTYPE_UDEC3
: return 4;
80 case D3DDECLTYPE_DEC3N
: return 4;
81 case D3DDECLTYPE_FLOAT16_2
: return 2 * 2;
82 case D3DDECLTYPE_FLOAT16_4
: return 4 * 2;
84 assert(!"Implementation error !");
89 /* Actually, arbitrary usage index values are permitted, but a
90 * simple lookup table won't work in that case. Let's just wait
91 * with making this more generic until we need it.
94 nine_d3ddeclusage_check(unsigned usage
, unsigned usage_idx
)
97 case D3DDECLUSAGE_POSITIONT
:
98 case D3DDECLUSAGE_TESSFACTOR
:
99 case D3DDECLUSAGE_DEPTH
:
100 case D3DDECLUSAGE_NORMAL
:
101 case D3DDECLUSAGE_TANGENT
:
102 case D3DDECLUSAGE_BINORMAL
:
103 case D3DDECLUSAGE_POSITION
:
104 case D3DDECLUSAGE_BLENDWEIGHT
:
105 case D3DDECLUSAGE_BLENDINDICES
:
106 case D3DDECLUSAGE_COLOR
:
108 case D3DDECLUSAGE_PSIZE
:
109 case D3DDECLUSAGE_FOG
:
110 case D3DDECLUSAGE_SAMPLE
:
111 return usage_idx
<= 0;
112 case D3DDECLUSAGE_TEXCOORD
:
113 return usage_idx
<= 15;
119 #define NINE_DECLUSAGE_CASE0(n) case D3DDECLUSAGE_##n: return NINE_DECLUSAGE_##n
120 #define NINE_DECLUSAGE_CASEi(n) case D3DDECLUSAGE_##n: return NINE_DECLUSAGE_i(n, usage_idx)
122 nine_d3d9_to_nine_declusage(unsigned usage
, unsigned usage_idx
)
124 if (!nine_d3ddeclusage_check(usage
, usage_idx
))
125 ERR("D3DDECLUSAGE_%u[%u]\n",usage
,usage_idx
);
126 assert(nine_d3ddeclusage_check(usage
, usage_idx
));
128 NINE_DECLUSAGE_CASEi(POSITION
);
129 NINE_DECLUSAGE_CASEi(BLENDWEIGHT
);
130 NINE_DECLUSAGE_CASEi(BLENDINDICES
);
131 NINE_DECLUSAGE_CASEi(NORMAL
);
132 NINE_DECLUSAGE_CASE0(PSIZE
);
133 NINE_DECLUSAGE_CASEi(TEXCOORD
);
134 NINE_DECLUSAGE_CASEi(TANGENT
);
135 NINE_DECLUSAGE_CASEi(BINORMAL
);
136 NINE_DECLUSAGE_CASE0(TESSFACTOR
);
137 NINE_DECLUSAGE_CASEi(POSITIONT
);
138 NINE_DECLUSAGE_CASEi(COLOR
);
139 NINE_DECLUSAGE_CASE0(DEPTH
);
140 NINE_DECLUSAGE_CASE0(FOG
);
141 NINE_DECLUSAGE_CASE0(SAMPLE
);
143 assert(!"Invalid DECLUSAGE.");
144 return NINE_DECLUSAGE_NONE
;
148 static const char *nine_declusage_names
[] =
150 [NINE_DECLUSAGE_POSITION
] = "POSITION",
151 [NINE_DECLUSAGE_BLENDWEIGHT
] = "BLENDWEIGHT",
152 [NINE_DECLUSAGE_BLENDINDICES
] = "BLENDINDICES",
153 [NINE_DECLUSAGE_NORMAL
] = "NORMAL",
154 [NINE_DECLUSAGE_PSIZE
] = "PSIZE",
155 [NINE_DECLUSAGE_TEXCOORD
] = "TEXCOORD",
156 [NINE_DECLUSAGE_TANGENT
] = "TANGENT",
157 [NINE_DECLUSAGE_BINORMAL
] = "BINORMAL",
158 [NINE_DECLUSAGE_TESSFACTOR
] = "TESSFACTOR",
159 [NINE_DECLUSAGE_POSITIONT
] = "POSITIONT",
160 [NINE_DECLUSAGE_COLOR
] = "DIFFUSE",
161 [NINE_DECLUSAGE_DEPTH
] = "DEPTH",
162 [NINE_DECLUSAGE_FOG
] = "FOG",
163 [NINE_DECLUSAGE_NONE
] = "(NONE)",
165 static inline const char *
166 nine_declusage_name(unsigned ndcl
)
168 return nine_declusage_names
[ndcl
% NINE_DECLUSAGE_COUNT
];
172 NineVertexDeclaration9_ctor( struct NineVertexDeclaration9
*This
,
173 struct NineUnknownParams
*pParams
,
174 const D3DVERTEXELEMENT9
*pElements
)
176 const D3DCAPS9
*caps
;
178 DBG("This=%p pParams=%p pElements=%p\n", This
, pParams
, pElements
);
182 pElements
[nelems
].Stream
!= 0xFF;
184 user_assert(pElements
[nelems
].Type
!= D3DDECLTYPE_UNUSED
, E_FAIL
);
185 user_assert(!(pElements
[nelems
].Offset
& 3), E_FAIL
);
188 caps
= NineDevice9_GetCaps(pParams
->device
);
189 user_assert(nelems
<= caps
->MaxStreams
, D3DERR_INVALIDCALL
);
191 HRESULT hr
= NineUnknown_ctor(&This
->base
, pParams
);
192 if (FAILED(hr
)) { return hr
; }
194 This
->nelems
= nelems
;
195 This
->decls
= CALLOC(This
->nelems
+1, sizeof(D3DVERTEXELEMENT9
));
196 This
->elems
= CALLOC(This
->nelems
, sizeof(struct pipe_vertex_element
));
197 This
->usage_map
= CALLOC(This
->nelems
, sizeof(uint16_t));
198 if (!This
->decls
|| !This
->elems
|| !This
->usage_map
) { return E_OUTOFMEMORY
; }
199 memcpy(This
->decls
, pElements
, sizeof(D3DVERTEXELEMENT9
)*(This
->nelems
+1));
201 for (i
= 0; i
< This
->nelems
; ++i
) {
202 uint16_t usage
= nine_d3d9_to_nine_declusage(This
->decls
[i
].Usage
,
203 This
->decls
[i
].UsageIndex
);
204 This
->usage_map
[i
] = usage
;
206 if (This
->decls
[i
].Usage
== D3DDECLUSAGE_POSITIONT
)
207 This
->position_t
= TRUE
;
209 This
->elems
[i
].src_offset
= This
->decls
[i
].Offset
;
210 This
->elems
[i
].instance_divisor
= 0;
211 This
->elems
[i
].vertex_buffer_index
= This
->decls
[i
].Stream
;
212 This
->elems
[i
].src_format
= decltype_format(This
->decls
[i
].Type
);
213 /* XXX Remember Method (tesselation), Usage, UsageIndex */
215 DBG("VERTEXELEMENT[%u]: Stream=%u Offset=%u Type=%s DeclUsage=%s%d\n", i
,
216 This
->decls
[i
].Stream
,
217 This
->decls
[i
].Offset
,
218 util_format_name(This
->elems
[i
].src_format
),
219 nine_declusage_name(usage
),
220 usage
/ NINE_DECLUSAGE_COUNT
);
227 NineVertexDeclaration9_dtor( struct NineVertexDeclaration9
*This
)
229 DBG("This=%p\n", This
);
233 FREE(This
->usage_map
);
235 NineUnknown_dtor(&This
->base
);
239 NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9
*This
,
240 D3DVERTEXELEMENT9
*pElement
,
244 user_assert(pNumElements
, D3DERR_INVALIDCALL
);
245 *pNumElements
= This
->nelems
+1;
248 if (pNumElements
) { *pNumElements
= This
->nelems
+1; }
249 memcpy(pElement
, This
->decls
, sizeof(D3DVERTEXELEMENT9
)*(This
->nelems
+1));
253 IDirect3DVertexDeclaration9Vtbl NineVertexDeclaration9_vtable
= {
254 (void *)NineUnknown_QueryInterface
,
255 (void *)NineUnknown_AddRef
,
256 (void *)NineUnknown_Release
,
257 (void *)NineUnknown_GetDevice
, /* actually part of VertexDecl9 iface */
258 (void *)NineVertexDeclaration9_GetDeclaration
261 static const GUID
*NineVertexDeclaration9_IIDs
[] = {
262 &IID_IDirect3DVertexDeclaration9
,
268 NineVertexDeclaration9_new( struct NineDevice9
*pDevice
,
269 const D3DVERTEXELEMENT9
*pElements
,
270 struct NineVertexDeclaration9
**ppOut
)
272 NINE_DEVICE_CHILD_NEW(VertexDeclaration9
, ppOut
, /* args */ pDevice
, pElements
);
276 NineVertexDeclaration9_new_from_fvf( struct NineDevice9
*pDevice
,
278 struct NineVertexDeclaration9
**ppOut
)
280 D3DVERTEXELEMENT9 elems
[16], decl_end
= D3DDECL_END();
281 unsigned texcount
, i
, betas
, nelems
= 0;
282 BYTE beta_index
= 0xFF;
284 switch (FVF
& D3DFVF_POSITION_MASK
) {
285 case D3DFVF_XYZ
: /* simple XYZ */
290 case D3DFVF_XYZB5
: /* XYZ with beta values */
291 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT3
;
292 elems
[nelems
].Usage
= D3DDECLUSAGE_POSITION
;
293 elems
[nelems
].UsageIndex
= 0;
295 /* simple XYZ has no beta values. break. */
296 if ((FVF
& D3DFVF_POSITION_MASK
) == D3DFVF_XYZ
) { break; }
298 betas
= (((FVF
& D3DFVF_XYZB5
)-D3DFVF_XYZB1
)>>1)+1;
299 if (FVF
& D3DFVF_LASTBETA_D3DCOLOR
) {
300 beta_index
= D3DDECLTYPE_D3DCOLOR
;
301 } else if (FVF
& D3DFVF_LASTBETA_UBYTE4
) {
302 beta_index
= D3DDECLTYPE_UBYTE4
;
303 } else if ((FVF
& D3DFVF_XYZB5
) == D3DFVF_XYZB5
) {
304 beta_index
= D3DDECLTYPE_FLOAT1
;
306 if (beta_index
!= 0xFF) { --betas
; }
310 case 1: elems
[nelems
].Type
= D3DDECLTYPE_FLOAT1
; break;
311 case 2: elems
[nelems
].Type
= D3DDECLTYPE_FLOAT2
; break;
312 case 3: elems
[nelems
].Type
= D3DDECLTYPE_FLOAT3
; break;
313 case 4: elems
[nelems
].Type
= D3DDECLTYPE_FLOAT4
; break;
315 assert(!"Implementation error!");
317 elems
[nelems
].Usage
= D3DDECLUSAGE_BLENDWEIGHT
;
318 elems
[nelems
].UsageIndex
= 0;
322 if (beta_index
!= 0xFF) {
323 elems
[nelems
].Type
= beta_index
;
324 elems
[nelems
].Usage
= D3DDECLUSAGE_BLENDINDICES
;
325 elems
[nelems
].UsageIndex
= 0;
330 case D3DFVF_XYZW
: /* simple XYZW */
331 case D3DFVF_XYZRHW
: /* pretransformed XYZW */
332 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT4
;
333 elems
[nelems
].Usage
=
334 ((FVF
& D3DFVF_POSITION_MASK
) == D3DFVF_XYZW
) ?
335 D3DDECLUSAGE_POSITION
: D3DDECLUSAGE_POSITIONT
;
336 elems
[nelems
].UsageIndex
= 0;
341 (void)user_error(!"Position doesn't match any known combination");
344 /* normals, psize and colors */
345 if (FVF
& D3DFVF_NORMAL
) {
346 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT3
;
347 elems
[nelems
].Usage
= D3DDECLUSAGE_NORMAL
;
348 elems
[nelems
].UsageIndex
= 0;
351 if (FVF
& D3DFVF_PSIZE
) {
352 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT1
;
353 elems
[nelems
].Usage
= D3DDECLUSAGE_PSIZE
;
354 elems
[nelems
].UsageIndex
= 0;
357 if (FVF
& D3DFVF_DIFFUSE
) {
358 elems
[nelems
].Type
= D3DDECLTYPE_D3DCOLOR
;
359 elems
[nelems
].Usage
= D3DDECLUSAGE_COLOR
;
360 elems
[nelems
].UsageIndex
= 0;
363 if (FVF
& D3DFVF_SPECULAR
) {
364 elems
[nelems
].Type
= D3DDECLTYPE_D3DCOLOR
;
365 elems
[nelems
].Usage
= D3DDECLUSAGE_COLOR
;
366 elems
[nelems
].UsageIndex
= 1;
371 texcount
= (FVF
& D3DFVF_TEXCOUNT_MASK
) >> D3DFVF_TEXCOUNT_SHIFT
;
372 if (user_error(texcount
<= 8)) { texcount
= 8; }
374 for (i
= 0; i
< texcount
; ++i
) {
375 switch ((FVF
>> (16+i
*2)) & 0x3) {
376 case D3DFVF_TEXTUREFORMAT1
:
377 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT1
;
380 case D3DFVF_TEXTUREFORMAT2
:
381 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT2
;
384 case D3DFVF_TEXTUREFORMAT3
:
385 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT3
;
388 case D3DFVF_TEXTUREFORMAT4
:
389 elems
[nelems
].Type
= D3DDECLTYPE_FLOAT4
;
393 assert(!"Implementation error!");
395 elems
[nelems
].Usage
= D3DDECLUSAGE_TEXCOORD
;
396 elems
[nelems
].UsageIndex
= i
;
400 /* fill out remaining data */
401 for (i
= 0; i
< nelems
; ++i
) {
403 elems
[i
].Offset
= (i
== 0) ? 0 : (elems
[i
-1].Offset
+
404 decltype_size(elems
[i
-1].Type
));
405 elems
[i
].Method
= D3DDECLMETHOD_DEFAULT
;
407 elems
[nelems
++] = decl_end
;
409 NINE_DEVICE_CHILD_NEW(VertexDeclaration9
, ppOut
, /* args */ pDevice
, elems
);
413 NineVertexDeclaration9_FillStreamOutputInfo(
414 struct NineVertexDeclaration9
*This
,
415 struct nine_vs_output_info
*ShaderOutputsInfo
,
417 struct pipe_stream_output_info
*so
)
419 unsigned so_outputs
= 0;
422 memset(so
, 0, sizeof(struct pipe_stream_output_info
));
424 for (i
= 0; i
< numOutputs
; i
++) {
425 BYTE output_semantic
= ShaderOutputsInfo
[i
].output_semantic
;
426 unsigned output_semantic_index
= ShaderOutputsInfo
[i
].output_semantic_index
;
428 for (j
= 0; j
< This
->nelems
; j
++) {
429 if ((This
->decls
[j
].Usage
== output_semantic
||
430 (output_semantic
== D3DDECLUSAGE_POSITION
&&
431 This
->decls
[j
].Usage
== D3DDECLUSAGE_POSITIONT
)) &&
432 This
->decls
[j
].UsageIndex
== output_semantic_index
) {
433 DBG("Matching %s %d: o%d -> %d\n",
434 nine_declusage_name(nine_d3d9_to_nine_declusage(This
->decls
[j
].Usage
, 0)),
435 This
->decls
[j
].UsageIndex
, i
, j
);
436 so
->output
[so_outputs
].register_index
= ShaderOutputsInfo
[i
].output_index
;
437 so
->output
[so_outputs
].start_component
= 0;
438 if (ShaderOutputsInfo
[i
].mask
& 8)
439 so
->output
[so_outputs
].num_components
= 4;
440 else if (ShaderOutputsInfo
[i
].mask
& 4)
441 so
->output
[so_outputs
].num_components
= 3;
442 else if (ShaderOutputsInfo
[i
].mask
& 2)
443 so
->output
[so_outputs
].num_components
= 2;
445 so
->output
[so_outputs
].num_components
= 1;
446 so
->output
[so_outputs
].output_buffer
= 0;
447 so
->output
[so_outputs
].dst_offset
= so_outputs
* sizeof(float[4])/4;
448 so
->output
[so_outputs
].stream
= 0;
455 so
->num_outputs
= so_outputs
;
456 so
->stride
[0] = so_outputs
* sizeof(float[4])/4;
459 /* ProcessVertices runs stream output into a temporary buffer to capture
461 * Now we have to convert them to the format and order set by the vertex
462 * declaration, for which we use u_translate.
463 * This is necessary if the vertex declaration contains elements using a
464 * non float32 format, because stream output only supports f32/u32/s32.
467 NineVertexDeclaration9_ConvertStreamOutput(
468 struct NineVertexDeclaration9
*This
,
469 struct NineVertexBuffer9
*pDstBuf
,
473 const struct pipe_stream_output_info
*so
)
475 struct translate
*translate
;
476 struct translate_key transkey
;
481 DBG("This=%p pDstBuf=%p DestIndex=%u VertexCount=%u pSrcBuf=%p so=%p\n",
482 This
, pDstBuf
, DestIndex
, VertexCount
, pSrcBuf
, so
);
484 transkey
.output_stride
= 0;
485 for (i
= 0; i
< This
->nelems
; ++i
) {
486 enum pipe_format format
;
488 switch (so
->output
[i
].num_components
) {
489 case 1: format
= PIPE_FORMAT_R32_FLOAT
; break;
490 case 2: format
= PIPE_FORMAT_R32G32_FLOAT
; break;
491 case 3: format
= PIPE_FORMAT_R32G32B32_FLOAT
; break;
493 assert(so
->output
[i
].num_components
== 4);
494 format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
497 transkey
.element
[i
].type
= TRANSLATE_ELEMENT_NORMAL
;
498 transkey
.element
[i
].input_format
= format
;
499 transkey
.element
[i
].input_buffer
= 0;
500 transkey
.element
[i
].input_offset
= so
->output
[i
].dst_offset
* 4;
501 transkey
.element
[i
].instance_divisor
= 0;
503 transkey
.element
[i
].output_format
= This
->elems
[i
].src_format
;
504 transkey
.element
[i
].output_offset
= This
->elems
[i
].src_offset
;
505 transkey
.output_stride
+=
506 util_format_get_blocksize(This
->elems
[i
].src_format
);
508 assert(!(transkey
.output_stride
& 3));
510 transkey
.nr_elements
= This
->nelems
;
512 translate
= translate_create(&transkey
);
514 return E_OUTOFMEMORY
;
516 hr
= NineVertexBuffer9_Lock(pDstBuf
,
517 transkey
.output_stride
* DestIndex
,
518 transkey
.output_stride
* VertexCount
,
519 &dst_map
, D3DLOCK_DISCARD
);
523 translate
->set_buffer(translate
, 0, pSrcBuf
, so
->stride
[0] * 4, ~0);
525 translate
->run(translate
, 0, VertexCount
, 0, 0, dst_map
);
527 NineVertexBuffer9_Unlock(pDstBuf
);
529 translate
->release(translate
); /* TODO: cache these */