2 * Copyright 2017 Jacob Lifshay
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "spirv_to_llvm/spirv_to_llvm.h"
25 #include "llvm_wrapper/llvm_wrapper.h"
26 #include "llvm_wrapper/orc_compile_stack.h"
27 #include "vulkan/util.h"
28 #include "util/soft_float.h"
29 #include "json/json.h"
43 void Api_object_deleter
<Pipeline_cache
>::operator()(Pipeline_cache
*pipeline_cache
) const noexcept
45 delete pipeline_cache
;
52 void Api_object_deleter
<Render_pass
>::operator()(Render_pass
*render_pass
) const noexcept
58 Render_pass_handle
Render_pass_handle::make(const VkRenderPassCreateInfo
&render_pass_create_info
)
60 #warning finish implementing Render_pass_handle::make
61 return Render_pass_handle(new Render_pass());
68 void Api_object_deleter
<Pipeline_layout
>::operator()(Pipeline_layout
*pipeline_layout
) const
71 delete pipeline_layout
;
75 Pipeline_layout_handle
Pipeline_layout_handle::make(
76 const VkPipelineLayoutCreateInfo
&pipeline_layout_create_info
)
78 #warning finish implementing Pipeline_layout_handle::make
79 return Pipeline_layout_handle(new Pipeline_layout());
82 llvm_wrapper::Module
Pipeline::optimize_module(llvm_wrapper::Module module
,
83 ::LLVMTargetMachineRef target_machine
)
85 switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine
))
87 case ::LLVMCodeGenLevelNone
:
88 case ::LLVMCodeGenLevelLess
:
90 case ::LLVMCodeGenLevelDefault
:
91 case ::LLVMCodeGenLevelAggressive
:
93 #warning finish implementing module optimizations
95 auto manager
= llvm_wrapper::Pass_manager::create_function_pass_manager(module
.get());
96 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
97 ::LLVMAddPromoteMemoryToRegisterPass(manager
.get());
98 ::LLVMAddScalarReplAggregatesPass(manager
.get());
99 ::LLVMAddScalarizerPass(manager
.get());
100 ::LLVMAddEarlyCSEMemSSAPass(manager
.get());
101 ::LLVMAddSCCPPass(manager
.get());
102 ::LLVMAddAggressiveDCEPass(manager
.get());
103 ::LLVMAddLICMPass(manager
.get());
104 ::LLVMAddIndVarSimplifyPass(manager
.get());
105 ::LLVMAddCFGSimplificationPass(manager
.get());
106 ::LLVMAddReassociatePass(manager
.get());
107 ::LLVMAddInstructionCombiningPass(manager
.get());
108 ::LLVMAddNewGVNPass(manager
.get());
109 ::LLVMAddCorrelatedValuePropagationPass(manager
.get());
110 ::LLVMInitializeFunctionPassManager(manager
.get());
111 for(auto fn
= ::LLVMGetFirstFunction(module
.get()); fn
; fn
= ::LLVMGetNextFunction(fn
))
112 ::LLVMRunFunctionPassManager(manager
.get(), fn
);
113 ::LLVMFinalizeFunctionPassManager(manager
.get());
116 auto manager
= llvm_wrapper::Pass_manager::create_module_pass_manager();
117 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
118 ::LLVMAddIPSCCPPass(manager
.get());
119 ::LLVMAddFunctionInliningPass(manager
.get());
120 ::LLVMAddDeadArgEliminationPass(manager
.get());
121 ::LLVMAddGlobalDCEPass(manager
.get());
122 ::LLVMRunPassManager(manager
.get(), module
.get());
125 auto manager
= llvm_wrapper::Pass_manager::create_function_pass_manager(module
.get());
126 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
127 ::LLVMAddCFGSimplificationPass(manager
.get());
128 ::LLVMAddPromoteMemoryToRegisterPass(manager
.get());
129 ::LLVMAddScalarReplAggregatesPass(manager
.get());
130 ::LLVMAddLICMPass(manager
.get());
131 ::LLVMAddIndVarSimplifyPass(manager
.get());
132 ::LLVMAddReassociatePass(manager
.get());
133 ::LLVMAddInstructionCombiningPass(manager
.get());
134 ::LLVMAddLoopUnrollPass(manager
.get());
135 ::LLVMAddSLPVectorizePass(manager
.get());
136 ::LLVMAddAggressiveDCEPass(manager
.get());
137 ::LLVMInitializeFunctionPassManager(manager
.get());
138 for(auto fn
= ::LLVMGetFirstFunction(module
.get()); fn
; fn
= ::LLVMGetNextFunction(fn
))
139 ::LLVMRunFunctionPassManager(manager
.get(), fn
);
140 ::LLVMFinalizeFunctionPassManager(manager
.get());
142 std::cerr
<< "optimized module:" << std::endl
;
143 ::LLVMDumpModule(module
.get());
150 struct Graphics_pipeline::Implementation
152 llvm_wrapper::Context llvm_context
= llvm_wrapper::Context::create();
153 spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver
;
154 llvm_wrapper::Orc_compile_stack jit_stack
;
155 llvm_wrapper::Target_data data_layout
;
156 std::vector
<spirv_to_llvm::Converted_module
> compiled_shaders
;
157 std::shared_ptr
<spirv_to_llvm::Struct_type_descriptor
> vertex_shader_output_struct
;
158 std::string
append_value_to_string(std::string str
,
159 spirv_to_llvm::Type_descriptor
&type
,
160 const void *value
) const
162 struct Visitor
: public spirv_to_llvm::Type_descriptor::Type_visitor
164 const Implementation
*this_
;
167 Visitor(const Implementation
*this_
, std::string
&str
, const void *value
) noexcept
173 virtual void visit(spirv_to_llvm::Simple_type_descriptor
&type
) override
175 auto llvm_type
= type
.get_or_make_type().type
;
176 switch(::LLVMGetTypeKind(llvm_type
))
178 case ::LLVMVoidTypeKind
:
179 case ::LLVMX86_FP80TypeKind
:
180 case ::LLVMFP128TypeKind
:
181 case ::LLVMPPC_FP128TypeKind
:
182 case ::LLVMLabelTypeKind
:
183 case ::LLVMFunctionTypeKind
:
184 case ::LLVMStructTypeKind
:
185 case ::LLVMArrayTypeKind
:
186 case ::LLVMPointerTypeKind
:
187 case ::LLVMVectorTypeKind
:
188 case ::LLVMMetadataTypeKind
:
189 case ::LLVMX86_MMXTypeKind
:
190 case ::LLVMTokenTypeKind
:
192 case ::LLVMHalfTypeKind
:
194 auto integer_value
= *static_cast<const std::uint16_t *>(value
);
196 util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value
);
197 str
= json::ast::Number_value::append_double_to_string(
198 static_cast<double>(float_value
), std::move(str
));
199 if(float_value
.isNaN())
202 str
= json::ast::Number_value::append_unsigned_integer_to_string(
203 integer_value
, std::move(str
), 0x10);
208 case ::LLVMFloatTypeKind
:
210 static_assert(sizeof(std::uint32_t) == sizeof(float)
211 && alignof(std::uint32_t) == alignof(float),
215 std::uint32_t integer_value
;
218 integer_value
= *static_cast<const std::uint32_t *>(value
);
219 str
= json::ast::Number_value::append_double_to_string(float_value
,
221 if(std::isnan(float_value
))
224 str
= json::ast::Number_value::append_unsigned_integer_to_string(
225 integer_value
, std::move(str
), 0x10);
230 case ::LLVMDoubleTypeKind
:
232 static_assert(sizeof(std::uint64_t) == sizeof(double)
233 && alignof(std::uint64_t) == alignof(double),
237 std::uint64_t integer_value
;
240 integer_value
= *static_cast<const std::uint64_t *>(value
);
241 str
= json::ast::Number_value::append_double_to_string(float_value
,
243 if(std::isnan(float_value
))
246 str
= json::ast::Number_value::append_unsigned_integer_to_string(
247 integer_value
, std::move(str
), 0x10);
252 case ::LLVMIntegerTypeKind
:
254 switch(::LLVMGetIntTypeWidth(llvm_type
))
258 auto integer_value
= *static_cast<const std::uint8_t *>(value
);
260 str
= json::ast::Number_value::append_unsigned_integer_to_string(
261 integer_value
, std::move(str
), 0x10);
263 str
= json::ast::Number_value::append_unsigned_integer_to_string(
264 integer_value
, std::move(str
));
266 str
= json::ast::Number_value::append_signed_integer_to_string(
267 static_cast<std::int8_t>(integer_value
), std::move(str
));
272 auto integer_value
= *static_cast<const std::uint16_t *>(value
);
274 str
= json::ast::Number_value::append_unsigned_integer_to_string(
275 integer_value
, std::move(str
), 0x10);
277 str
= json::ast::Number_value::append_unsigned_integer_to_string(
278 integer_value
, std::move(str
));
280 str
= json::ast::Number_value::append_signed_integer_to_string(
281 static_cast<std::int16_t>(integer_value
), std::move(str
));
286 auto integer_value
= *static_cast<const std::uint32_t *>(value
);
288 str
= json::ast::Number_value::append_unsigned_integer_to_string(
289 integer_value
, std::move(str
), 0x10);
291 str
= json::ast::Number_value::append_unsigned_integer_to_string(
292 integer_value
, std::move(str
));
294 str
= json::ast::Number_value::append_signed_integer_to_string(
295 static_cast<std::int32_t>(integer_value
), std::move(str
));
300 auto integer_value
= *static_cast<const std::uint64_t *>(value
);
302 str
= json::ast::Number_value::append_unsigned_integer_to_string(
303 integer_value
, std::move(str
), 0x10);
305 str
= json::ast::Number_value::append_unsigned_integer_to_string(
306 integer_value
, std::move(str
));
308 str
= json::ast::Number_value::append_signed_integer_to_string(
309 static_cast<std::int64_t>(integer_value
), std::move(str
));
316 assert(!"unhandled type");
317 throw std::runtime_error("unhandled type");
319 virtual void visit(spirv_to_llvm::Vector_type_descriptor
&type
) override
321 auto llvm_element_type
= type
.get_element_type()->get_or_make_type().type
;
322 std::size_t element_size
=
323 ::LLVMABISizeOfType(this_
->data_layout
.get(), llvm_element_type
);
324 std::size_t element_count
= type
.get_element_count();
327 for(std::size_t i
= 0; i
< element_count
; i
++)
331 str
= this_
->append_value_to_string(
333 *type
.get_element_type(),
334 static_cast<const char *>(value
) + i
* element_size
);
338 virtual void visit(spirv_to_llvm::Matrix_type_descriptor
&type
) override
340 assert(!"dumping matrix not implemented");
341 throw std::runtime_error("dumping matrix not implemented");
342 #warning dumping matrix not implemented
344 virtual void visit(spirv_to_llvm::Array_type_descriptor
&type
) override
346 auto llvm_element_type
= type
.get_element_type()->get_or_make_type().type
;
347 std::size_t element_size
=
348 ::LLVMABISizeOfType(this_
->data_layout
.get(), llvm_element_type
);
349 std::size_t element_count
= type
.get_element_count();
352 for(std::size_t i
= 0; i
< element_count
; i
++)
356 str
= this_
->append_value_to_string(
358 *type
.get_element_type(),
359 static_cast<const char *>(value
) + i
* element_size
);
363 virtual void visit(spirv_to_llvm::Pointer_type_descriptor
&type
) override
366 str
= json::ast::Number_value::append_unsigned_integer_to_string(
367 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value
)),
371 virtual void visit(spirv_to_llvm::Function_type_descriptor
&type
) override
373 str
+= "function:0x";
374 str
= json::ast::Number_value::append_unsigned_integer_to_string(
375 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value
)),
379 virtual void visit(spirv_to_llvm::Struct_type_descriptor
&type
) override
381 auto &&members
= type
.get_members(true);
382 auto llvm_type
= type
.get_or_make_type().type
;
385 for(auto &member
: members
)
389 str
= this_
->append_value_to_string(
392 static_cast<const char *>(value
)
393 + ::LLVMOffsetOfElement(
394 this_
->data_layout
.get(), llvm_type
, member
.llvm_member_index
));
399 type
.visit(Visitor(this, str
, value
));
404 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct
) const
406 std::cerr
<< "output: "
407 << implementation
->append_value_to_string(
408 {}, *implementation
->vertex_shader_output_struct
, output_struct
)
412 void Graphics_pipeline::run(std::uint32_t vertex_start_index
,
413 std::uint32_t vertex_end_index
,
414 std::uint32_t instance_id
,
415 const image::Image
&color_attachment
,
416 void *const *bindings
)
418 typedef std::uint32_t Pixel_type
;
419 assert(color_attachment
.descriptor
.tiling
== VK_IMAGE_TILING_LINEAR
);
420 std::size_t color_attachment_stride
= color_attachment
.descriptor
.get_memory_stride();
421 std::size_t color_attachment_pixel_size
= color_attachment
.descriptor
.get_memory_pixel_size();
422 unsigned char *color_attachment_memory
= color_attachment
.memory
.get();
423 float viewport_x_scale
, viewport_x_offset
, viewport_y_scale
, viewport_y_offset
,
424 viewport_z_scale
, viewport_z_offset
;
426 float px
= viewport
.width
;
427 float ox
= viewport
.x
+ 0.5f
* viewport
.width
;
428 float py
= viewport
.height
;
429 float oy
= viewport
.y
+ 0.5f
* viewport
.height
;
430 float pz
= viewport
.maxDepth
- viewport
.minDepth
;
431 float oz
= viewport
.minDepth
;
432 viewport_x_scale
= px
* 0.5f
;
433 viewport_x_offset
= ox
;
434 viewport_y_scale
= py
* 0.5f
;
435 viewport_y_offset
= oy
;
436 viewport_z_scale
= pz
;
437 viewport_z_offset
= oz
;
439 constexpr std::size_t vec4_native_alignment
= alignof(float) * 4;
440 constexpr std::size_t max_alignment
= alignof(std::max_align_t
);
441 constexpr std::size_t vec4_alignment
=
442 vec4_native_alignment
> max_alignment
? max_alignment
: vec4_native_alignment
;
443 constexpr std::size_t ivec4_native_alignment
= alignof(std::int32_t) * 4;
444 constexpr std::size_t ivec4_alignment
=
445 ivec4_native_alignment
> max_alignment
? max_alignment
: ivec4_native_alignment
;
446 struct alignas(vec4_alignment
) Vec4
452 constexpr Vec4() noexcept
: x(), y(), z(), w()
455 constexpr explicit Vec4(float x
, float y
, float z
, float w
) noexcept
: x(x
),
462 struct alignas(ivec4_alignment
) Ivec4
468 constexpr Ivec4() noexcept
: x(), y(), z(), w()
471 constexpr explicit Ivec4(std::int32_t x
,
474 std::int32_t w
) noexcept
: x(x
),
481 auto interpolate_float
= [](float t
, float v0
, float v1
) noexcept
->float
483 return t
* v1
+ (1.0f
- t
) * v0
;
485 auto interpolate_vec4
= [interpolate_float
](
486 float t
, const Vec4
&v0
, const Vec4
&v1
) noexcept
->Vec4
488 return Vec4(interpolate_float(t
, v0
.x
, v1
.x
),
489 interpolate_float(t
, v0
.y
, v1
.y
),
490 interpolate_float(t
, v0
.z
, v1
.z
),
491 interpolate_float(t
, v0
.w
, v1
.w
));
493 static constexpr std::size_t triangle_vertex_count
= 3;
496 Vec4 vertexes
[triangle_vertex_count
];
497 constexpr Triangle() noexcept
: vertexes
{}
500 constexpr Triangle(const Vec4
&v0
, const Vec4
&v1
, const Vec4
&v2
) noexcept
501 : vertexes
{v0
, v1
, v2
}
505 auto solve_for_t
= [](float v0
, float v1
) noexcept
->float
507 // solves interpolate_float(t, v0, v1) == 0
508 return v0
/ (v0
- v1
);
510 auto clip_edge
= [solve_for_t
, interpolate_vec4
](const Vec4
&start_vertex
,
511 const Vec4
&end_vertex
,
512 Vec4
*output_vertexes
,
513 std::size_t &output_vertex_count
,
514 auto eval_vertex
) -> bool
516 // eval_vertex returns a non-negative number if the vertex is inside the clip volume
517 float start_vertex_signed_distance
= eval_vertex(start_vertex
);
518 float end_vertex_signed_distance
= eval_vertex(end_vertex
);
519 if(start_vertex_signed_distance
!= start_vertex_signed_distance
)
520 return false; // triangle has a NaN coordinate; skip it
521 if(start_vertex_signed_distance
< 0)
523 // start_vertex is outside
524 if(end_vertex_signed_distance
< 0)
526 // end_vertex is outside; do nothing
530 // end_vertex is inside
531 output_vertexes
[output_vertex_count
++] = interpolate_vec4(
532 solve_for_t(start_vertex_signed_distance
, end_vertex_signed_distance
),
535 output_vertexes
[output_vertex_count
++] = end_vertex
;
540 // start_vertex is inside
541 if(end_vertex_signed_distance
< 0)
543 // end_vertex is outside
544 output_vertexes
[output_vertex_count
++] = interpolate_vec4(
545 solve_for_t(start_vertex_signed_distance
, end_vertex_signed_distance
),
551 // end_vertex is inside
552 output_vertexes
[output_vertex_count
++] = end_vertex
;
557 auto clip_triangles
= [clip_edge
](
558 std::vector
<Triangle
> &triangles
, std::vector
<Triangle
> &temp_triangles
, auto eval_vertex
)
560 temp_triangles
.clear();
561 for(auto &input_ref
: triangles
)
563 Triangle input
= input_ref
; // copy to enable compiler optimizations
564 constexpr std::size_t max_clipped_output_vertex_count
= 4;
565 Vec4 output_vertexes
[max_clipped_output_vertex_count
];
566 std::size_t output_vertex_count
= 0;
567 bool skip_triangle
= false;
568 std::size_t end_vertex_index
= 1;
569 for(std::size_t start_vertex_index
= 0; start_vertex_index
< triangle_vertex_count
;
570 start_vertex_index
++)
572 if(!clip_edge(input
.vertexes
[start_vertex_index
],
573 input
.vertexes
[end_vertex_index
],
578 skip_triangle
= true;
581 if(++end_vertex_index
>= triangle_vertex_count
)
582 end_vertex_index
= 0;
586 switch(output_vertex_count
)
593 temp_triangles
.push_back(
594 Triangle(output_vertexes
[0], output_vertexes
[1], output_vertexes
[2]));
597 temp_triangles
.push_back(
598 Triangle(output_vertexes
[0], output_vertexes
[1], output_vertexes
[2]));
599 temp_triangles
.push_back(
600 Triangle(output_vertexes
[0], output_vertexes
[2], output_vertexes
[3]));
603 assert(!"clipping algorithm failed");
605 temp_triangles
.swap(triangles
);
607 std::vector
<Triangle
> triangles
;
608 std::vector
<Triangle
> temp_triangles
;
609 constexpr std::size_t chunk_max_size
= 96;
610 static_assert(chunk_max_size
% triangle_vertex_count
== 0, "");
611 std::unique_ptr
<unsigned char[]> chunk_vertex_buffer(
612 new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size
]);
613 while(vertex_start_index
< vertex_end_index
)
615 std::uint32_t chunk_size
= vertex_end_index
- vertex_start_index
;
616 if(chunk_size
> chunk_max_size
)
617 chunk_size
= chunk_max_size
;
618 auto current_vertex_start_index
= vertex_start_index
;
619 vertex_start_index
+= chunk_size
;
620 run_vertex_shader(current_vertex_start_index
,
621 current_vertex_start_index
+ chunk_size
,
623 chunk_vertex_buffer
.get(),
625 const unsigned char *current_vertex
=
626 chunk_vertex_buffer
.get() + vertex_shader_position_output_offset
;
628 for(std::uint32_t i
= 0; i
+ triangle_vertex_count
<= chunk_size
;
629 i
+= triangle_vertex_count
)
632 for(std::size_t j
= 0; j
< triangle_vertex_count
; j
++)
634 triangle
.vertexes
[j
] = *reinterpret_cast<const Vec4
*>(current_vertex
);
635 current_vertex
+= vertex_shader_output_struct_size
;
637 triangles
.push_back(triangle
);
639 // clip to 0 <= vertex.z
640 clip_triangles(triangles
,
642 [](const Vec4
&vertex
) noexcept
->float
646 // clip to vertex.z <= vertex.w
647 clip_triangles(triangles
,
649 [](const Vec4
&vertex
) noexcept
->float
651 return vertex
.w
- vertex
.z
;
653 // clip to -vertex.w <= vertex.x
654 clip_triangles(triangles
,
656 [](const Vec4
&vertex
) noexcept
->float
658 return vertex
.x
+ vertex
.w
;
660 // clip to vertex.x <= vertex.w
661 clip_triangles(triangles
,
663 [](const Vec4
&vertex
) noexcept
->float
665 return vertex
.w
- vertex
.x
;
667 // clip to -vertex.w <= vertex.y
668 clip_triangles(triangles
,
670 [](const Vec4
&vertex
) noexcept
->float
672 return vertex
.y
+ vertex
.w
;
674 // clip to vertex.y <= vertex.w
675 clip_triangles(triangles
,
677 [](const Vec4
&vertex
) noexcept
->float
679 return vertex
.w
- vertex
.y
;
681 VkOffset2D clipped_scissor_rect_min
= scissor_rect
.offset
;
682 VkOffset2D clipped_scissor_rect_end
= {
683 .x
= scissor_rect
.offset
.x
+ static_cast<std::int32_t>(scissor_rect
.extent
.width
),
684 .y
= scissor_rect
.offset
.y
+ static_cast<std::int32_t>(scissor_rect
.extent
.height
),
686 if(clipped_scissor_rect_min
.x
< 0)
687 clipped_scissor_rect_min
.x
= 0;
688 if(clipped_scissor_rect_min
.y
< 0)
689 clipped_scissor_rect_min
.y
= 0;
690 if(clipped_scissor_rect_end
.x
> color_attachment
.descriptor
.extent
.width
)
691 clipped_scissor_rect_end
.x
= color_attachment
.descriptor
.extent
.width
;
692 if(clipped_scissor_rect_end
.y
< color_attachment
.descriptor
.extent
.height
)
693 clipped_scissor_rect_end
.y
= color_attachment
.descriptor
.extent
.height
;
694 if(clipped_scissor_rect_end
.x
<= clipped_scissor_rect_min
.x
)
696 if(clipped_scissor_rect_end
.y
<= clipped_scissor_rect_min
.y
)
698 for(std::size_t triangle_index
= 0; triangle_index
< triangles
.size(); triangle_index
++)
700 Triangle triangle
= triangles
[triangle_index
];
701 Vec4 projected_triangle_and_inv_w
[triangle_vertex_count
];
702 Vec4 framebuffer_coordinates
[triangle_vertex_count
];
703 for(std::size_t i
= 0; i
< triangle_vertex_count
; i
++)
705 projected_triangle_and_inv_w
[i
].w
= 1.0f
/ triangle
.vertexes
[i
].w
;
706 projected_triangle_and_inv_w
[i
].x
=
707 triangle
.vertexes
[i
].x
* projected_triangle_and_inv_w
[i
].w
;
708 projected_triangle_and_inv_w
[i
].y
=
709 triangle
.vertexes
[i
].y
* projected_triangle_and_inv_w
[i
].w
;
710 projected_triangle_and_inv_w
[i
].z
=
711 triangle
.vertexes
[i
].z
* projected_triangle_and_inv_w
[i
].w
;
712 framebuffer_coordinates
[i
] =
713 Vec4(projected_triangle_and_inv_w
[i
].x
* viewport_x_scale
+ viewport_x_offset
,
714 projected_triangle_and_inv_w
[i
].y
* viewport_y_scale
+ viewport_y_offset
,
715 projected_triangle_and_inv_w
[i
].z
* viewport_z_scale
+ viewport_z_offset
,
718 float orientation
= 0;
719 for(std::size_t start_vertex_index
= 0, end_vertex_index
= 1;
720 start_vertex_index
< triangle_vertex_count
;
721 start_vertex_index
++)
723 float x1
= framebuffer_coordinates
[start_vertex_index
].x
;
724 float y1
= framebuffer_coordinates
[start_vertex_index
].y
;
725 float x2
= framebuffer_coordinates
[end_vertex_index
].x
;
726 float y2
= framebuffer_coordinates
[end_vertex_index
].y
;
727 orientation
+= x2
* y1
- x1
* y2
;
728 if(++end_vertex_index
>= triangle_vertex_count
)
729 end_vertex_index
= 0;
731 if(!(orientation
< 0)
732 && !(orientation
> 0)) // zero area triangle or triangle coordinate is NaN
734 // orientation > 0 for counter-clockwise triangle
735 // orientation < 0 for clockwise triangle
736 std::int32_t min_x
, end_x
, min_y
, end_y
;
738 for(std::size_t i
= 0; i
< triangle_vertex_count
; i
++)
740 // x and y will be >= 0 so we can use truncate instead of floor for speed
741 auto current_min_x
= static_cast<std::int32_t>(framebuffer_coordinates
[i
].x
);
742 auto current_min_y
= static_cast<std::int32_t>(framebuffer_coordinates
[i
].y
);
743 std::int32_t current_end_x
= current_min_x
+ 1;
744 std::int32_t current_end_y
= current_min_y
+ 1;
745 if(first
|| current_min_x
< min_x
)
746 min_x
= current_min_x
;
747 if(first
|| current_end_x
> end_x
)
748 end_x
= current_end_x
;
749 if(first
|| current_min_y
< min_y
)
750 min_y
= current_min_y
;
751 if(first
|| current_end_y
> end_y
)
752 end_y
= current_end_y
;
755 if(min_x
< clipped_scissor_rect_min
.x
)
756 min_x
= clipped_scissor_rect_min
.x
;
757 if(end_x
> clipped_scissor_rect_end
.x
)
758 end_x
= clipped_scissor_rect_end
.x
;
759 if(min_y
< clipped_scissor_rect_min
.y
)
760 min_y
= clipped_scissor_rect_min
.y
;
761 if(end_y
> clipped_scissor_rect_end
.y
)
762 end_y
= clipped_scissor_rect_end
.y
;
763 constexpr int log2_scale
= 16;
764 constexpr auto scale
= 1LL << log2_scale
;
765 typedef std::int64_t Edge_equation_integer_type
;
768 Edge_equation_integer_type a
;
769 Edge_equation_integer_type b
;
770 Edge_equation_integer_type c
;
771 Edge_equation_integer_type padding
;
772 constexpr Edge_equation() noexcept
: a(), b(), c(), padding()
775 constexpr Edge_equation(Edge_equation_integer_type a
,
776 Edge_equation_integer_type b
,
777 Edge_equation_integer_type c
) noexcept
: a(a
),
783 constexpr bool inside(std::int32_t x
, std::int32_t y
) const noexcept
785 return a
* x
+ b
* y
+ c
>= 0;
788 Edge_equation edge_equations
[triangle_vertex_count
];
789 bool skip_triangle
= false;
790 for(std::size_t start_vertex_index
= 0, end_vertex_index
= 1, other_vertex_index
= 2;
791 start_vertex_index
< triangle_vertex_count
;
792 start_vertex_index
++)
794 float x1_float
= framebuffer_coordinates
[start_vertex_index
].x
;
795 float y1_float
= framebuffer_coordinates
[start_vertex_index
].y
;
796 float x2_float
= framebuffer_coordinates
[end_vertex_index
].x
;
797 float y2_float
= framebuffer_coordinates
[end_vertex_index
].y
;
798 [[gnu::unused
]] float x3_float
= framebuffer_coordinates
[other_vertex_index
].x
;
799 [[gnu::unused
]] float y3_float
= framebuffer_coordinates
[other_vertex_index
].y
;
800 auto x1_fixed
= static_cast<Edge_equation_integer_type
>(x1_float
* scale
);
801 auto y1_fixed
= static_cast<Edge_equation_integer_type
>(y1_float
* scale
);
802 auto x2_fixed
= static_cast<Edge_equation_integer_type
>(x2_float
* scale
);
803 auto y2_fixed
= static_cast<Edge_equation_integer_type
>(y2_float
* scale
);
804 [[gnu::unused
]] auto x3_fixed
=
805 static_cast<Edge_equation_integer_type
>(x3_float
* scale
);
806 [[gnu::unused
]] auto y3_fixed
=
807 static_cast<Edge_equation_integer_type
>(y3_float
* scale
);
808 Edge_equation_integer_type a
;
809 Edge_equation_integer_type b
;
810 Edge_equation_integer_type c
;
812 // solve a * x1 + b * y1 + c == 0 &&
813 // a * x2 + b * y2 + c == 0 &&
814 // a * x3 + b * y3 + c >= 0
815 if(x1_fixed
== x2_fixed
&& y1_fixed
== y2_fixed
)
817 // rounded to a zero-area triangle
818 skip_triangle
= true;
821 Edge_equation_integer_type a_fixed
= (y1_fixed
- y2_fixed
) * scale
;
822 Edge_equation_integer_type b_fixed
= (x2_fixed
- x1_fixed
) * scale
;
823 Edge_equation_integer_type c_fixed
=
824 (x1_fixed
* y2_fixed
- x2_fixed
* y1_fixed
);
826 // offset to end up checking at pixel center instead of top-left pixel corner
827 c_fixed
+= (a_fixed
+ b_fixed
) / 2;
840 // handle top-left fill rule
841 if(a
< 0 || (a
== 0 && b
< 0))
843 // not a top-left edge, fixup c
844 // effectively changes the '>=' to '>' in Edge_equation::inside
848 edge_equations
[start_vertex_index
] = Edge_equation(a
, b
, c
);
849 if(++end_vertex_index
>= triangle_vertex_count
)
850 end_vertex_index
= 0;
851 if(++other_vertex_index
>= triangle_vertex_count
)
852 other_vertex_index
= 0;
856 auto fs
= this->fragment_shader_function
;
857 for(std::int32_t y
= min_y
; y
< end_y
; y
++)
859 for(std::int32_t x
= min_x
; x
< end_x
; x
++)
862 for(auto &edge_equation
: edge_equations
)
864 inside
&= edge_equation
.inside(x
, y
);
868 auto *pixel
= reinterpret_cast<Pixel_type
*>(
869 color_attachment_memory
870 + (static_cast<std::size_t>(x
) * color_attachment_pixel_size
871 + static_cast<std::size_t>(y
) * color_attachment_stride
));
880 std::unique_ptr
<Graphics_pipeline
> Graphics_pipeline::make(
881 Pipeline_cache
*pipeline_cache
, const VkGraphicsPipelineCreateInfo
&create_info
)
883 assert(create_info
.sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
884 auto *render_pass
= Render_pass_handle::from_handle(create_info
.renderPass
);
886 auto *pipeline_layout
= Pipeline_layout_handle::from_handle(create_info
.layout
);
887 assert(pipeline_layout
);
888 if(create_info
.flags
& VK_PIPELINE_CREATE_DERIVATIVE_BIT
)
890 #warning implement creating derived pipelines
891 throw std::runtime_error("creating derived pipelines is not implemented");
893 auto implementation
= std::make_shared
<Implementation
>();
894 auto optimization_level
= ::LLVMCodeGenLevelDefault
;
895 if(create_info
.flags
& VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT
)
896 optimization_level
= ::LLVMCodeGenLevelNone
;
897 auto llvm_target_machine
=
898 llvm_wrapper::Target_machine::create_native_target_machine(optimization_level
);
899 implementation
->compiled_shaders
.reserve(create_info
.stageCount
);
900 util::Enum_set
<spirv::Execution_model
> found_shader_stages
;
901 for(std::size_t i
= 0; i
< create_info
.stageCount
; i
++)
903 auto &stage_info
= create_info
.pStages
[i
];
904 auto execution_models
=
905 vulkan::get_execution_models_from_shader_stage_flags(stage_info
.stage
);
906 assert(execution_models
.size() == 1);
907 auto execution_model
= *execution_models
.begin();
908 bool added_to_found_shader_stages
=
909 std::get
<1>(found_shader_stages
.insert(execution_model
));
910 if(!added_to_found_shader_stages
)
911 throw std::runtime_error("duplicate shader stage");
912 auto *shader_module
= Shader_module_handle::from_handle(stage_info
.module
);
913 assert(shader_module
);
915 spirv::Dump_callbacks dump_callbacks
;
918 spirv::parse(dump_callbacks
, shader_module
->words(), shader_module
->word_count());
920 catch(spirv::Parser_error
&e
)
922 std::cerr
<< dump_callbacks
.ss
.str() << std::endl
;
925 std::cerr
<< dump_callbacks
.ss
.str() << std::endl
;
927 assert(create_info
.pVertexInputState
);
928 assert(create_info
.pVertexInputState
->sType
== VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO
);
929 auto compiled_shader
= spirv_to_llvm::spirv_to_llvm(implementation
->llvm_context
.get(),
930 llvm_target_machine
.get(),
931 shader_module
->words(),
932 shader_module
->word_count(),
933 implementation
->compiled_shaders
.size(),
936 create_info
.pVertexInputState
);
937 std::cerr
<< "Translation to LLVM succeeded." << std::endl
;
938 ::LLVMDumpModule(compiled_shader
.module
.get());
940 ::LLVMVerifyModule(compiled_shader
.module
.get(), ::LLVMPrintMessageAction
, nullptr);
942 throw std::runtime_error("LLVM module verification failed");
943 implementation
->compiled_shaders
.push_back(std::move(compiled_shader
));
945 implementation
->data_layout
= llvm_target_machine
.create_target_data_layout();
946 implementation
->jit_stack
=
947 llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine
), optimize_module
);
948 Vertex_shader_function vertex_shader_function
= nullptr;
949 std::size_t vertex_shader_output_struct_size
= 0;
950 util::optional
<std::size_t> vertex_shader_position_output_offset
;
951 Fragment_shader_function fragment_shader_function
= nullptr;
952 for(auto &compiled_shader
: implementation
->compiled_shaders
)
954 implementation
->jit_stack
.add_eagerly_compiled_ir(
955 std::move(compiled_shader
.module
),
956 &spirv_to_llvm::Jit_symbol_resolver::resolve
,
957 static_cast<void *>(&implementation
->jit_symbol_resolver
));
958 auto shader_entry_point_address
= implementation
->jit_stack
.get_symbol_address(
959 compiled_shader
.entry_function_name
.c_str());
960 std::cerr
<< "shader entry: " << compiled_shader
.entry_function_name
<< ": "
961 << reinterpret_cast<void *>(shader_entry_point_address
) << std::endl
;
962 assert(shader_entry_point_address
);
963 switch(compiled_shader
.execution_model
)
965 case spirv::Execution_model::fragment
:
966 fragment_shader_function
=
967 reinterpret_cast<Fragment_shader_function
>(shader_entry_point_address
);
968 #warning finish implementing Graphics_pipeline::make
970 #warning finish implementing Graphics_pipeline::make
971 throw std::runtime_error("creating fragment shaders is not implemented");
972 case spirv::Execution_model::geometry
:
973 #warning finish implementing Graphics_pipeline::make
974 throw std::runtime_error("creating geometry shaders is not implemented");
975 case spirv::Execution_model::gl_compute
:
976 case spirv::Execution_model::kernel
:
977 throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
978 case spirv::Execution_model::tessellation_control
:
979 case spirv::Execution_model::tessellation_evaluation
:
980 #warning finish implementing Graphics_pipeline::make
981 throw std::runtime_error("creating tessellation shaders is not implemented");
982 case spirv::Execution_model::vertex
:
984 vertex_shader_function
=
985 reinterpret_cast<Vertex_shader_function
>(shader_entry_point_address
);
986 implementation
->vertex_shader_output_struct
= compiled_shader
.outputs_struct
;
987 auto llvm_vertex_shader_output_struct
=
988 implementation
->vertex_shader_output_struct
->get_or_make_type().type
;
989 vertex_shader_output_struct_size
= ::LLVMABISizeOfType(
990 implementation
->data_layout
.get(), llvm_vertex_shader_output_struct
);
991 for(auto &member
: implementation
->vertex_shader_output_struct
->get_members(true))
993 for(auto &decoration
: member
.decorations
)
995 if(decoration
.value
== spirv::Decoration::built_in
)
998 util::get
<spirv::Decoration_built_in_parameters
>(decoration
.parameters
);
999 if(builtin
.built_in
== spirv::Built_in::position
)
1001 vertex_shader_position_output_offset
=
1002 ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
1003 llvm_vertex_shader_output_struct
,
1004 member
.llvm_member_index
);
1009 if(vertex_shader_position_output_offset
)
1011 if(auto *struct_type
=
1012 dynamic_cast<spirv_to_llvm::Struct_type_descriptor
*>(member
.type
.get()))
1014 std::size_t struct_offset
=
1015 ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
1016 llvm_vertex_shader_output_struct
,
1017 member
.llvm_member_index
);
1018 auto llvm_struct_type
= struct_type
->get_or_make_type().type
;
1019 for(auto &submember
: struct_type
->get_members(true))
1021 for(auto &decoration
: submember
.decorations
)
1023 if(decoration
.value
== spirv::Decoration::built_in
)
1025 auto &builtin
= util::get
<spirv::Decoration_built_in_parameters
>(
1026 decoration
.parameters
);
1027 if(builtin
.built_in
== spirv::Built_in::position
)
1029 vertex_shader_position_output_offset
=
1031 + ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
1033 submember
.llvm_member_index
);
1038 if(vertex_shader_position_output_offset
)
1042 if(vertex_shader_position_output_offset
)
1045 if(!vertex_shader_position_output_offset
)
1046 throw std::runtime_error("can't find vertex shader Position output");
1047 #warning finish implementing Graphics_pipeline::make
1051 throw std::runtime_error("unknown shader kind");
1053 #warning finish implementing Graphics_pipeline::make
1054 if(!vertex_shader_function
)
1055 throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1056 if(!create_info
.pViewportState
)
1057 throw std::runtime_error("missing viewport state");
1058 if(create_info
.pViewportState
->viewportCount
!= 1)
1059 throw std::runtime_error("unimplemented viewport count");
1060 if(!create_info
.pViewportState
->pViewports
)
1061 throw std::runtime_error("missing viewport list");
1062 if(!create_info
.pViewportState
->pScissors
)
1063 throw std::runtime_error("missing scissor rectangle list");
1064 assert(vertex_shader_position_output_offset
);
1065 return std::unique_ptr
<Graphics_pipeline
>(
1066 new Graphics_pipeline(std::move(implementation
),
1067 vertex_shader_function
,
1068 vertex_shader_output_struct_size
,
1069 *vertex_shader_position_output_offset
,
1070 fragment_shader_function
,
1071 create_info
.pViewportState
->pViewports
[0],
1072 create_info
.pViewportState
->pScissors
[0]));