2 * Copyright 2017 Jacob Lifshay
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "spirv_to_llvm/spirv_to_llvm.h"
25 #include "llvm_wrapper/llvm_wrapper.h"
26 #include "llvm_wrapper/orc_compile_stack.h"
27 #include "vulkan/util.h"
28 #include "util/soft_float.h"
29 #include "json/json.h"
39 llvm_wrapper::Module
Pipeline::optimize_module(llvm_wrapper::Module module
,
40 ::LLVMTargetMachineRef target_machine
)
42 switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine
))
44 case ::LLVMCodeGenLevelNone
:
45 case ::LLVMCodeGenLevelLess
:
47 case ::LLVMCodeGenLevelDefault
:
48 case ::LLVMCodeGenLevelAggressive
:
50 #warning finish implementing module optimizations
52 auto manager
= llvm_wrapper::Pass_manager::create_function_pass_manager(module
.get());
53 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
54 ::LLVMAddPromoteMemoryToRegisterPass(manager
.get());
55 ::LLVMAddScalarReplAggregatesPass(manager
.get());
56 ::LLVMAddScalarizerPass(manager
.get());
57 ::LLVMAddEarlyCSEMemSSAPass(manager
.get());
58 ::LLVMAddSCCPPass(manager
.get());
59 ::LLVMAddAggressiveDCEPass(manager
.get());
60 ::LLVMAddLICMPass(manager
.get());
61 ::LLVMAddIndVarSimplifyPass(manager
.get());
62 ::LLVMAddCFGSimplificationPass(manager
.get());
63 ::LLVMAddReassociatePass(manager
.get());
64 ::LLVMAddInstructionCombiningPass(manager
.get());
65 ::LLVMAddNewGVNPass(manager
.get());
66 ::LLVMAddCorrelatedValuePropagationPass(manager
.get());
67 ::LLVMInitializeFunctionPassManager(manager
.get());
68 for(auto fn
= ::LLVMGetFirstFunction(module
.get()); fn
; fn
= ::LLVMGetNextFunction(fn
))
69 ::LLVMRunFunctionPassManager(manager
.get(), fn
);
70 ::LLVMFinalizeFunctionPassManager(manager
.get());
73 auto manager
= llvm_wrapper::Pass_manager::create_module_pass_manager();
74 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
75 ::LLVMAddIPSCCPPass(manager
.get());
76 ::LLVMAddFunctionInliningPass(manager
.get());
77 ::LLVMAddDeadArgEliminationPass(manager
.get());
78 ::LLVMAddGlobalDCEPass(manager
.get());
79 ::LLVMRunPassManager(manager
.get(), module
.get());
82 auto manager
= llvm_wrapper::Pass_manager::create_function_pass_manager(module
.get());
83 ::LLVMAddAnalysisPasses(target_machine
, manager
.get());
84 ::LLVMAddCFGSimplificationPass(manager
.get());
85 ::LLVMAddPromoteMemoryToRegisterPass(manager
.get());
86 ::LLVMAddScalarReplAggregatesPass(manager
.get());
87 ::LLVMAddLICMPass(manager
.get());
88 ::LLVMAddIndVarSimplifyPass(manager
.get());
89 ::LLVMAddReassociatePass(manager
.get());
90 ::LLVMAddInstructionCombiningPass(manager
.get());
91 ::LLVMAddLoopUnrollPass(manager
.get());
92 ::LLVMAddSLPVectorizePass(manager
.get());
93 ::LLVMAddAggressiveDCEPass(manager
.get());
94 ::LLVMInitializeFunctionPassManager(manager
.get());
95 for(auto fn
= ::LLVMGetFirstFunction(module
.get()); fn
; fn
= ::LLVMGetNextFunction(fn
))
96 ::LLVMRunFunctionPassManager(manager
.get(), fn
);
97 ::LLVMFinalizeFunctionPassManager(manager
.get());
99 std::cerr
<< "optimized module:" << std::endl
;
100 ::LLVMDumpModule(module
.get());
107 struct Graphics_pipeline::Implementation
109 llvm_wrapper::Context llvm_context
= llvm_wrapper::Context::create();
110 spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver
;
111 llvm_wrapper::Orc_compile_stack jit_stack
;
112 llvm_wrapper::Target_data data_layout
;
113 std::vector
<spirv_to_llvm::Converted_module
> compiled_shaders
;
114 std::shared_ptr
<spirv_to_llvm::Struct_type_descriptor
> vertex_shader_output_struct
;
115 std::string
append_value_to_string(std::string str
,
116 spirv_to_llvm::Type_descriptor
&type
,
117 const void *value
) const
119 struct Visitor
: public spirv_to_llvm::Type_descriptor::Type_visitor
121 const Implementation
*this_
;
124 Visitor(const Implementation
*this_
, std::string
&str
, const void *value
) noexcept
130 virtual void visit(spirv_to_llvm::Simple_type_descriptor
&type
) override
132 auto llvm_type
= type
.get_or_make_type().type
;
133 switch(::LLVMGetTypeKind(llvm_type
))
135 case ::LLVMVoidTypeKind
:
136 case ::LLVMX86_FP80TypeKind
:
137 case ::LLVMFP128TypeKind
:
138 case ::LLVMPPC_FP128TypeKind
:
139 case ::LLVMLabelTypeKind
:
140 case ::LLVMFunctionTypeKind
:
141 case ::LLVMStructTypeKind
:
142 case ::LLVMArrayTypeKind
:
143 case ::LLVMPointerTypeKind
:
144 case ::LLVMVectorTypeKind
:
145 case ::LLVMMetadataTypeKind
:
146 case ::LLVMX86_MMXTypeKind
:
147 case ::LLVMTokenTypeKind
:
149 case ::LLVMHalfTypeKind
:
151 auto integer_value
= *static_cast<const std::uint16_t *>(value
);
153 util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value
);
154 str
= json::ast::Number_value::append_double_to_string(
155 static_cast<double>(float_value
), std::move(str
));
156 if(float_value
.isNaN())
159 str
= json::ast::Number_value::append_unsigned_integer_to_string(
160 integer_value
, std::move(str
), 0x10);
165 case ::LLVMFloatTypeKind
:
167 static_assert(sizeof(std::uint32_t) == sizeof(float)
168 && alignof(std::uint32_t) == alignof(float),
172 std::uint32_t integer_value
;
175 integer_value
= *static_cast<const std::uint32_t *>(value
);
176 str
= json::ast::Number_value::append_double_to_string(float_value
,
178 if(std::isnan(float_value
))
181 str
= json::ast::Number_value::append_unsigned_integer_to_string(
182 integer_value
, std::move(str
), 0x10);
187 case ::LLVMDoubleTypeKind
:
189 static_assert(sizeof(std::uint64_t) == sizeof(double)
190 && alignof(std::uint64_t) == alignof(double),
194 std::uint64_t integer_value
;
197 integer_value
= *static_cast<const std::uint64_t *>(value
);
198 str
= json::ast::Number_value::append_double_to_string(float_value
,
200 if(std::isnan(float_value
))
203 str
= json::ast::Number_value::append_unsigned_integer_to_string(
204 integer_value
, std::move(str
), 0x10);
209 case ::LLVMIntegerTypeKind
:
211 switch(::LLVMGetIntTypeWidth(llvm_type
))
215 auto integer_value
= *static_cast<const std::uint8_t *>(value
);
217 str
= json::ast::Number_value::append_unsigned_integer_to_string(
218 integer_value
, std::move(str
), 0x10);
220 str
= json::ast::Number_value::append_unsigned_integer_to_string(
221 integer_value
, std::move(str
));
223 str
= json::ast::Number_value::append_signed_integer_to_string(
224 static_cast<std::int8_t>(integer_value
), std::move(str
));
229 auto integer_value
= *static_cast<const std::uint16_t *>(value
);
231 str
= json::ast::Number_value::append_unsigned_integer_to_string(
232 integer_value
, std::move(str
), 0x10);
234 str
= json::ast::Number_value::append_unsigned_integer_to_string(
235 integer_value
, std::move(str
));
237 str
= json::ast::Number_value::append_signed_integer_to_string(
238 static_cast<std::int16_t>(integer_value
), std::move(str
));
243 auto integer_value
= *static_cast<const std::uint32_t *>(value
);
245 str
= json::ast::Number_value::append_unsigned_integer_to_string(
246 integer_value
, std::move(str
), 0x10);
248 str
= json::ast::Number_value::append_unsigned_integer_to_string(
249 integer_value
, std::move(str
));
251 str
= json::ast::Number_value::append_signed_integer_to_string(
252 static_cast<std::int32_t>(integer_value
), std::move(str
));
257 auto integer_value
= *static_cast<const std::uint64_t *>(value
);
259 str
= json::ast::Number_value::append_unsigned_integer_to_string(
260 integer_value
, std::move(str
), 0x10);
262 str
= json::ast::Number_value::append_unsigned_integer_to_string(
263 integer_value
, std::move(str
));
265 str
= json::ast::Number_value::append_signed_integer_to_string(
266 static_cast<std::int64_t>(integer_value
), std::move(str
));
273 assert(!"unhandled type");
274 throw std::runtime_error("unhandled type");
276 virtual void visit(spirv_to_llvm::Vector_type_descriptor
&type
) override
278 auto llvm_element_type
= type
.get_element_type()->get_or_make_type().type
;
279 std::size_t element_size
=
280 ::LLVMABISizeOfType(this_
->data_layout
.get(), llvm_element_type
);
281 std::size_t element_count
= type
.get_element_count();
284 for(std::size_t i
= 0; i
< element_count
; i
++)
288 str
= this_
->append_value_to_string(
290 *type
.get_element_type(),
291 static_cast<const char *>(value
) + i
* element_size
);
295 virtual void visit(spirv_to_llvm::Matrix_type_descriptor
&type
) override
297 assert(!"dumping matrix not implemented");
298 throw std::runtime_error("dumping matrix not implemented");
299 #warning dumping matrix not implemented
301 virtual void visit(spirv_to_llvm::Array_type_descriptor
&type
) override
303 auto llvm_element_type
= type
.get_element_type()->get_or_make_type().type
;
304 std::size_t element_size
=
305 ::LLVMABISizeOfType(this_
->data_layout
.get(), llvm_element_type
);
306 std::size_t element_count
= type
.get_element_count();
309 for(std::size_t i
= 0; i
< element_count
; i
++)
313 str
= this_
->append_value_to_string(
315 *type
.get_element_type(),
316 static_cast<const char *>(value
) + i
* element_size
);
320 virtual void visit(spirv_to_llvm::Pointer_type_descriptor
&type
) override
323 str
= json::ast::Number_value::append_unsigned_integer_to_string(
324 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value
)),
328 virtual void visit(spirv_to_llvm::Function_type_descriptor
&type
) override
330 str
+= "function:0x";
331 str
= json::ast::Number_value::append_unsigned_integer_to_string(
332 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value
)),
336 virtual void visit(spirv_to_llvm::Struct_type_descriptor
&type
) override
338 auto &&members
= type
.get_members(true);
339 auto llvm_type
= type
.get_or_make_type().type
;
342 for(auto &member
: members
)
346 str
= this_
->append_value_to_string(
349 static_cast<const char *>(value
)
350 + ::LLVMOffsetOfElement(
351 this_
->data_layout
.get(), llvm_type
, member
.llvm_member_index
));
356 type
.visit(Visitor(this, str
, value
));
361 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct
) const
363 std::cerr
<< "output: "
364 << implementation
->append_value_to_string(
365 {}, *implementation
->vertex_shader_output_struct
, output_struct
)
369 void Graphics_pipeline::run(std::uint32_t vertex_start_index
,
370 std::uint32_t vertex_end_index
,
371 std::uint32_t instance_id
,
372 const vulkan::Vulkan_image
&color_attachment
,
373 void *const *bindings
)
375 typedef std::uint32_t Pixel_type
;
376 assert(color_attachment
.descriptor
.tiling
== VK_IMAGE_TILING_LINEAR
);
377 auto color_attachment_memory_properties
= color_attachment
.descriptor
.get_memory_properties();
378 auto color_attachment_memory_properties_color_component
=
379 color_attachment_memory_properties
.get_color_component();
380 std::size_t color_attachment_stride
= color_attachment_memory_properties_color_component
.stride
;
381 std::size_t color_attachment_pixel_size
=
382 color_attachment_memory_properties_color_component
.pixel_size
;
383 assert(color_attachment_memory_properties_color_component
.offset_from_array_layer_start
== 0);
384 void *color_attachment_memory
= color_attachment
.memory
.get();
385 float viewport_x_scale
, viewport_x_offset
, viewport_y_scale
, viewport_y_offset
,
386 viewport_z_scale
, viewport_z_offset
;
388 float px
= viewport
.width
;
389 float ox
= viewport
.x
+ 0.5f
* viewport
.width
;
390 float py
= viewport
.height
;
391 float oy
= viewport
.y
+ 0.5f
* viewport
.height
;
392 float pz
= viewport
.maxDepth
- viewport
.minDepth
;
393 float oz
= viewport
.minDepth
;
394 viewport_x_scale
= px
* 0.5f
;
395 viewport_x_offset
= ox
;
396 viewport_y_scale
= py
* 0.5f
;
397 viewport_y_offset
= oy
;
398 viewport_z_scale
= pz
;
399 viewport_z_offset
= oz
;
401 constexpr std::size_t vec4_native_alignment
= alignof(float) * 4;
402 constexpr std::size_t max_alignment
= alignof(std::max_align_t
);
403 constexpr std::size_t vec4_alignment
=
404 vec4_native_alignment
> max_alignment
? max_alignment
: vec4_native_alignment
;
405 constexpr std::size_t ivec4_native_alignment
= alignof(std::int32_t) * 4;
406 constexpr std::size_t ivec4_alignment
=
407 ivec4_native_alignment
> max_alignment
? max_alignment
: ivec4_native_alignment
;
408 struct alignas(vec4_alignment
) Vec4
414 constexpr Vec4() noexcept
: x(), y(), z(), w()
417 constexpr explicit Vec4(float x
, float y
, float z
, float w
) noexcept
: x(x
),
424 struct alignas(ivec4_alignment
) Ivec4
430 constexpr Ivec4() noexcept
: x(), y(), z(), w()
433 constexpr explicit Ivec4(std::int32_t x
,
436 std::int32_t w
) noexcept
: x(x
),
443 auto interpolate_float
= [](float t
, float v0
, float v1
) noexcept
->float
445 return t
* v1
+ (1.0f
- t
) * v0
;
447 auto interpolate_vec4
= [interpolate_float
](
448 float t
, const Vec4
&v0
, const Vec4
&v1
) noexcept
->Vec4
450 return Vec4(interpolate_float(t
, v0
.x
, v1
.x
),
451 interpolate_float(t
, v0
.y
, v1
.y
),
452 interpolate_float(t
, v0
.z
, v1
.z
),
453 interpolate_float(t
, v0
.w
, v1
.w
));
455 static constexpr std::size_t triangle_vertex_count
= 3;
458 Vec4 vertexes
[triangle_vertex_count
];
459 constexpr Triangle() noexcept
: vertexes
{}
462 constexpr Triangle(const Vec4
&v0
, const Vec4
&v1
, const Vec4
&v2
) noexcept
463 : vertexes
{v0
, v1
, v2
}
467 auto solve_for_t
= [](float v0
, float v1
) noexcept
->float
469 // solves interpolate_float(t, v0, v1) == 0
470 return v0
/ (v0
- v1
);
472 auto clip_edge
= [solve_for_t
, interpolate_vec4
](const Vec4
&start_vertex
,
473 const Vec4
&end_vertex
,
474 Vec4
*output_vertexes
,
475 std::size_t &output_vertex_count
,
476 auto eval_vertex
) -> bool
478 // eval_vertex returns a non-negative number if the vertex is inside the clip volume
479 float start_vertex_signed_distance
= eval_vertex(start_vertex
);
480 float end_vertex_signed_distance
= eval_vertex(end_vertex
);
481 if(start_vertex_signed_distance
!= start_vertex_signed_distance
)
482 return false; // triangle has a NaN coordinate; skip it
483 if(start_vertex_signed_distance
< 0)
485 // start_vertex is outside
486 if(end_vertex_signed_distance
< 0)
488 // end_vertex is outside; do nothing
492 // end_vertex is inside
493 output_vertexes
[output_vertex_count
++] = interpolate_vec4(
494 solve_for_t(start_vertex_signed_distance
, end_vertex_signed_distance
),
497 output_vertexes
[output_vertex_count
++] = end_vertex
;
502 // start_vertex is inside
503 if(end_vertex_signed_distance
< 0)
505 // end_vertex is outside
506 output_vertexes
[output_vertex_count
++] = interpolate_vec4(
507 solve_for_t(start_vertex_signed_distance
, end_vertex_signed_distance
),
513 // end_vertex is inside
514 output_vertexes
[output_vertex_count
++] = end_vertex
;
519 auto clip_triangles
= [clip_edge
](
520 std::vector
<Triangle
> &triangles
, std::vector
<Triangle
> &temp_triangles
, auto eval_vertex
)
522 temp_triangles
.clear();
523 for(auto &input_ref
: triangles
)
525 Triangle input
= input_ref
; // copy to enable compiler optimizations
526 constexpr std::size_t max_clipped_output_vertex_count
= 4;
527 Vec4 output_vertexes
[max_clipped_output_vertex_count
];
528 std::size_t output_vertex_count
= 0;
529 bool skip_triangle
= false;
530 std::size_t end_vertex_index
= 1;
531 for(std::size_t start_vertex_index
= 0; start_vertex_index
< triangle_vertex_count
;
532 start_vertex_index
++)
534 if(!clip_edge(input
.vertexes
[start_vertex_index
],
535 input
.vertexes
[end_vertex_index
],
540 skip_triangle
= true;
543 if(++end_vertex_index
>= triangle_vertex_count
)
544 end_vertex_index
= 0;
548 switch(output_vertex_count
)
555 temp_triangles
.push_back(
556 Triangle(output_vertexes
[0], output_vertexes
[1], output_vertexes
[2]));
559 temp_triangles
.push_back(
560 Triangle(output_vertexes
[0], output_vertexes
[1], output_vertexes
[2]));
561 temp_triangles
.push_back(
562 Triangle(output_vertexes
[0], output_vertexes
[2], output_vertexes
[3]));
565 assert(!"clipping algorithm failed");
567 temp_triangles
.swap(triangles
);
569 std::vector
<Triangle
> triangles
;
570 std::vector
<Triangle
> temp_triangles
;
571 constexpr std::size_t chunk_max_size
= 96;
572 static_assert(chunk_max_size
% triangle_vertex_count
== 0, "");
573 std::unique_ptr
<unsigned char[]> chunk_vertex_buffer(
574 new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size
]);
575 while(vertex_start_index
< vertex_end_index
)
577 std::uint32_t chunk_size
= vertex_end_index
- vertex_start_index
;
578 if(chunk_size
> chunk_max_size
)
579 chunk_size
= chunk_max_size
;
580 auto current_vertex_start_index
= vertex_start_index
;
581 vertex_start_index
+= chunk_size
;
582 run_vertex_shader(current_vertex_start_index
,
583 current_vertex_start_index
+ chunk_size
,
585 chunk_vertex_buffer
.get(),
587 const unsigned char *current_vertex
=
588 chunk_vertex_buffer
.get() + vertex_shader_position_output_offset
;
590 for(std::uint32_t i
= 0; i
+ triangle_vertex_count
<= chunk_size
;
591 i
+= triangle_vertex_count
)
594 for(std::size_t j
= 0; j
< triangle_vertex_count
; j
++)
596 triangle
.vertexes
[j
] = *reinterpret_cast<const Vec4
*>(current_vertex
);
597 current_vertex
+= vertex_shader_output_struct_size
;
599 triangles
.push_back(triangle
);
601 // clip to 0 <= vertex.z
602 clip_triangles(triangles
,
604 [](const Vec4
&vertex
) noexcept
->float
608 // clip to vertex.z <= vertex.w
609 clip_triangles(triangles
,
611 [](const Vec4
&vertex
) noexcept
->float
613 return vertex
.w
- vertex
.z
;
615 // clip to -vertex.w <= vertex.x
616 clip_triangles(triangles
,
618 [](const Vec4
&vertex
) noexcept
->float
620 return vertex
.x
+ vertex
.w
;
622 // clip to vertex.x <= vertex.w
623 clip_triangles(triangles
,
625 [](const Vec4
&vertex
) noexcept
->float
627 return vertex
.w
- vertex
.x
;
629 // clip to -vertex.w <= vertex.y
630 clip_triangles(triangles
,
632 [](const Vec4
&vertex
) noexcept
->float
634 return vertex
.y
+ vertex
.w
;
636 // clip to vertex.y <= vertex.w
637 clip_triangles(triangles
,
639 [](const Vec4
&vertex
) noexcept
->float
641 return vertex
.w
- vertex
.y
;
643 VkOffset2D clipped_scissor_rect_min
= scissor_rect
.offset
;
644 VkOffset2D clipped_scissor_rect_end
= {
645 .x
= scissor_rect
.offset
.x
+ static_cast<std::int32_t>(scissor_rect
.extent
.width
),
646 .y
= scissor_rect
.offset
.y
+ static_cast<std::int32_t>(scissor_rect
.extent
.height
),
648 if(clipped_scissor_rect_min
.x
< 0)
649 clipped_scissor_rect_min
.x
= 0;
650 if(clipped_scissor_rect_min
.y
< 0)
651 clipped_scissor_rect_min
.y
= 0;
652 if(clipped_scissor_rect_end
.x
> color_attachment
.descriptor
.extent
.width
)
653 clipped_scissor_rect_end
.x
= color_attachment
.descriptor
.extent
.width
;
654 if(clipped_scissor_rect_end
.y
< color_attachment
.descriptor
.extent
.height
)
655 clipped_scissor_rect_end
.y
= color_attachment
.descriptor
.extent
.height
;
656 if(clipped_scissor_rect_end
.x
<= clipped_scissor_rect_min
.x
)
658 if(clipped_scissor_rect_end
.y
<= clipped_scissor_rect_min
.y
)
660 for(std::size_t triangle_index
= 0; triangle_index
< triangles
.size(); triangle_index
++)
662 Triangle triangle
= triangles
[triangle_index
];
663 Vec4 projected_triangle_and_inv_w
[triangle_vertex_count
];
664 Vec4 framebuffer_coordinates
[triangle_vertex_count
];
665 for(std::size_t i
= 0; i
< triangle_vertex_count
; i
++)
667 projected_triangle_and_inv_w
[i
].w
= 1.0f
/ triangle
.vertexes
[i
].w
;
668 projected_triangle_and_inv_w
[i
].x
=
669 triangle
.vertexes
[i
].x
* projected_triangle_and_inv_w
[i
].w
;
670 projected_triangle_and_inv_w
[i
].y
=
671 triangle
.vertexes
[i
].y
* projected_triangle_and_inv_w
[i
].w
;
672 projected_triangle_and_inv_w
[i
].z
=
673 triangle
.vertexes
[i
].z
* projected_triangle_and_inv_w
[i
].w
;
674 framebuffer_coordinates
[i
] =
675 Vec4(projected_triangle_and_inv_w
[i
].x
* viewport_x_scale
+ viewport_x_offset
,
676 projected_triangle_and_inv_w
[i
].y
* viewport_y_scale
+ viewport_y_offset
,
677 projected_triangle_and_inv_w
[i
].z
* viewport_z_scale
+ viewport_z_offset
,
680 float orientation
= 0;
681 for(std::size_t start_vertex_index
= 0, end_vertex_index
= 1;
682 start_vertex_index
< triangle_vertex_count
;
683 start_vertex_index
++)
685 float x1
= framebuffer_coordinates
[start_vertex_index
].x
;
686 float y1
= framebuffer_coordinates
[start_vertex_index
].y
;
687 float x2
= framebuffer_coordinates
[end_vertex_index
].x
;
688 float y2
= framebuffer_coordinates
[end_vertex_index
].y
;
689 orientation
+= x2
* y1
- x1
* y2
;
690 if(++end_vertex_index
>= triangle_vertex_count
)
691 end_vertex_index
= 0;
693 if(!(orientation
< 0)
694 && !(orientation
> 0)) // zero area triangle or triangle coordinate is NaN
696 // orientation > 0 for counter-clockwise triangle
697 // orientation < 0 for clockwise triangle
698 std::int32_t min_x
, end_x
, min_y
, end_y
;
700 for(std::size_t i
= 0; i
< triangle_vertex_count
; i
++)
702 // x and y will be >= 0 so we can use truncate instead of floor for speed
703 auto current_min_x
= static_cast<std::int32_t>(framebuffer_coordinates
[i
].x
);
704 auto current_min_y
= static_cast<std::int32_t>(framebuffer_coordinates
[i
].y
);
705 std::int32_t current_end_x
= current_min_x
+ 1;
706 std::int32_t current_end_y
= current_min_y
+ 1;
707 if(first
|| current_min_x
< min_x
)
708 min_x
= current_min_x
;
709 if(first
|| current_end_x
> end_x
)
710 end_x
= current_end_x
;
711 if(first
|| current_min_y
< min_y
)
712 min_y
= current_min_y
;
713 if(first
|| current_end_y
> end_y
)
714 end_y
= current_end_y
;
717 if(min_x
< clipped_scissor_rect_min
.x
)
718 min_x
= clipped_scissor_rect_min
.x
;
719 if(end_x
> clipped_scissor_rect_end
.x
)
720 end_x
= clipped_scissor_rect_end
.x
;
721 if(min_y
< clipped_scissor_rect_min
.y
)
722 min_y
= clipped_scissor_rect_min
.y
;
723 if(end_y
> clipped_scissor_rect_end
.y
)
724 end_y
= clipped_scissor_rect_end
.y
;
725 constexpr int log2_scale
= 16;
726 constexpr auto scale
= 1LL << log2_scale
;
727 typedef std::int64_t Edge_equation_integer_type
;
730 Edge_equation_integer_type a
;
731 Edge_equation_integer_type b
;
732 Edge_equation_integer_type c
;
733 Edge_equation_integer_type padding
;
734 constexpr Edge_equation() noexcept
: a(), b(), c(), padding()
737 constexpr Edge_equation(Edge_equation_integer_type a
,
738 Edge_equation_integer_type b
,
739 Edge_equation_integer_type c
) noexcept
: a(a
),
745 constexpr bool inside(std::int32_t x
, std::int32_t y
) const noexcept
747 return a
* x
+ b
* y
+ c
>= 0;
750 Edge_equation edge_equations
[triangle_vertex_count
];
751 bool skip_triangle
= false;
752 for(std::size_t start_vertex_index
= 0, end_vertex_index
= 1, other_vertex_index
= 2;
753 start_vertex_index
< triangle_vertex_count
;
754 start_vertex_index
++)
756 float x1_float
= framebuffer_coordinates
[start_vertex_index
].x
;
757 float y1_float
= framebuffer_coordinates
[start_vertex_index
].y
;
758 float x2_float
= framebuffer_coordinates
[end_vertex_index
].x
;
759 float y2_float
= framebuffer_coordinates
[end_vertex_index
].y
;
760 [[gnu::unused
]] float x3_float
= framebuffer_coordinates
[other_vertex_index
].x
;
761 [[gnu::unused
]] float y3_float
= framebuffer_coordinates
[other_vertex_index
].y
;
762 auto x1_fixed
= static_cast<Edge_equation_integer_type
>(x1_float
* scale
);
763 auto y1_fixed
= static_cast<Edge_equation_integer_type
>(y1_float
* scale
);
764 auto x2_fixed
= static_cast<Edge_equation_integer_type
>(x2_float
* scale
);
765 auto y2_fixed
= static_cast<Edge_equation_integer_type
>(y2_float
* scale
);
766 [[gnu::unused
]] auto x3_fixed
=
767 static_cast<Edge_equation_integer_type
>(x3_float
* scale
);
768 [[gnu::unused
]] auto y3_fixed
=
769 static_cast<Edge_equation_integer_type
>(y3_float
* scale
);
770 Edge_equation_integer_type a
;
771 Edge_equation_integer_type b
;
772 Edge_equation_integer_type c
;
774 // solve a * x1 + b * y1 + c == 0 &&
775 // a * x2 + b * y2 + c == 0 &&
776 // a * x3 + b * y3 + c >= 0
777 if(x1_fixed
== x2_fixed
&& y1_fixed
== y2_fixed
)
779 // rounded to a zero-area triangle
780 skip_triangle
= true;
783 Edge_equation_integer_type a_fixed
= (y1_fixed
- y2_fixed
) * scale
;
784 Edge_equation_integer_type b_fixed
= (x2_fixed
- x1_fixed
) * scale
;
785 Edge_equation_integer_type c_fixed
=
786 (x1_fixed
* y2_fixed
- x2_fixed
* y1_fixed
);
788 // offset to end up checking at pixel center instead of top-left pixel corner
789 c_fixed
+= (a_fixed
+ b_fixed
) / 2;
802 // handle top-left fill rule
803 if(a
< 0 || (a
== 0 && b
< 0))
805 // not a top-left edge, fixup c
806 // effectively changes the '>=' to '>' in Edge_equation::inside
810 edge_equations
[start_vertex_index
] = Edge_equation(a
, b
, c
);
811 if(++end_vertex_index
>= triangle_vertex_count
)
812 end_vertex_index
= 0;
813 if(++other_vertex_index
>= triangle_vertex_count
)
814 other_vertex_index
= 0;
818 auto fs
= this->fragment_shader_function
;
819 for(std::int32_t y
= min_y
; y
< end_y
; y
++)
821 for(std::int32_t x
= min_x
; x
< end_x
; x
++)
824 for(auto &edge_equation
: edge_equations
)
826 inside
&= edge_equation
.inside(x
, y
);
830 auto *pixel
= reinterpret_cast<Pixel_type
*>(
831 static_cast<unsigned char *>(color_attachment_memory
)
832 + (static_cast<std::size_t>(x
) * color_attachment_pixel_size
833 + static_cast<std::size_t>(y
) * color_attachment_stride
));
842 std::unique_ptr
<Graphics_pipeline
> Graphics_pipeline::make(
843 Pipeline_cache
*pipeline_cache
, const VkGraphicsPipelineCreateInfo
&create_info
)
845 assert(create_info
.sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
846 auto *render_pass
= Render_pass::from_handle(create_info
.renderPass
);
848 auto *pipeline_layout
= Pipeline_layout::from_handle(create_info
.layout
);
849 assert(pipeline_layout
);
850 if(create_info
.flags
& VK_PIPELINE_CREATE_DERIVATIVE_BIT
)
852 #warning implement creating derived pipelines
853 throw std::runtime_error("creating derived pipelines is not implemented");
855 auto implementation
= std::make_shared
<Implementation
>();
856 auto optimization_level
= ::LLVMCodeGenLevelDefault
;
857 if(create_info
.flags
& VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT
)
858 optimization_level
= ::LLVMCodeGenLevelNone
;
859 auto llvm_target_machine
=
860 llvm_wrapper::Target_machine::create_native_target_machine(optimization_level
);
861 implementation
->compiled_shaders
.reserve(create_info
.stageCount
);
862 util::Enum_set
<spirv::Execution_model
> found_shader_stages
;
863 for(std::size_t i
= 0; i
< create_info
.stageCount
; i
++)
865 auto &stage_info
= create_info
.pStages
[i
];
866 auto execution_models
=
867 vulkan::get_execution_models_from_shader_stage_flags(stage_info
.stage
);
868 assert(execution_models
.size() == 1);
869 auto execution_model
= *execution_models
.begin();
870 bool added_to_found_shader_stages
=
871 std::get
<1>(found_shader_stages
.insert(execution_model
));
872 if(!added_to_found_shader_stages
)
873 throw std::runtime_error("duplicate shader stage");
874 auto *shader_module
= Shader_module::from_handle(stage_info
.module
);
875 assert(shader_module
);
877 spirv::Dump_callbacks dump_callbacks
;
880 spirv::parse(dump_callbacks
, shader_module
->words(), shader_module
->word_count());
882 catch(spirv::Parser_error
&e
)
884 std::cerr
<< dump_callbacks
.ss
.str() << std::endl
;
887 std::cerr
<< dump_callbacks
.ss
.str() << std::endl
;
889 assert(create_info
.pVertexInputState
);
890 assert(create_info
.pVertexInputState
->sType
891 == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO
);
892 auto compiled_shader
= spirv_to_llvm::spirv_to_llvm(implementation
->llvm_context
.get(),
893 llvm_target_machine
.get(),
894 shader_module
->words(),
895 shader_module
->word_count(),
896 implementation
->compiled_shaders
.size(),
899 create_info
.pVertexInputState
);
900 std::cerr
<< "Translation to LLVM succeeded." << std::endl
;
901 ::LLVMDumpModule(compiled_shader
.module
.get());
903 ::LLVMVerifyModule(compiled_shader
.module
.get(), ::LLVMPrintMessageAction
, nullptr);
905 throw std::runtime_error("LLVM module verification failed");
906 implementation
->compiled_shaders
.push_back(std::move(compiled_shader
));
908 implementation
->data_layout
= llvm_target_machine
.create_target_data_layout();
909 implementation
->jit_stack
=
910 llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine
), optimize_module
);
911 Vertex_shader_function vertex_shader_function
= nullptr;
912 std::size_t vertex_shader_output_struct_size
= 0;
913 util::optional
<std::size_t> vertex_shader_position_output_offset
;
914 Fragment_shader_function fragment_shader_function
= nullptr;
915 for(auto &compiled_shader
: implementation
->compiled_shaders
)
917 implementation
->jit_stack
.add_eagerly_compiled_ir(
918 std::move(compiled_shader
.module
),
919 &spirv_to_llvm::Jit_symbol_resolver::resolve
,
920 static_cast<void *>(&implementation
->jit_symbol_resolver
));
921 auto shader_entry_point_address
= implementation
->jit_stack
.get_symbol_address(
922 compiled_shader
.entry_function_name
.c_str());
923 std::cerr
<< "shader entry: " << compiled_shader
.entry_function_name
<< ": "
924 << reinterpret_cast<void *>(shader_entry_point_address
) << std::endl
;
925 assert(shader_entry_point_address
);
926 switch(compiled_shader
.execution_model
)
928 case spirv::Execution_model::fragment
:
929 fragment_shader_function
=
930 reinterpret_cast<Fragment_shader_function
>(shader_entry_point_address
);
931 #warning finish implementing Graphics_pipeline::make
933 #warning finish implementing Graphics_pipeline::make
934 throw std::runtime_error("creating fragment shaders is not implemented");
935 case spirv::Execution_model::geometry
:
936 #warning finish implementing Graphics_pipeline::make
937 throw std::runtime_error("creating geometry shaders is not implemented");
938 case spirv::Execution_model::gl_compute
:
939 case spirv::Execution_model::kernel
:
940 throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
941 case spirv::Execution_model::tessellation_control
:
942 case spirv::Execution_model::tessellation_evaluation
:
943 #warning finish implementing Graphics_pipeline::make
944 throw std::runtime_error("creating tessellation shaders is not implemented");
945 case spirv::Execution_model::vertex
:
947 vertex_shader_function
=
948 reinterpret_cast<Vertex_shader_function
>(shader_entry_point_address
);
949 implementation
->vertex_shader_output_struct
= compiled_shader
.outputs_struct
;
950 auto llvm_vertex_shader_output_struct
=
951 implementation
->vertex_shader_output_struct
->get_or_make_type().type
;
952 vertex_shader_output_struct_size
= ::LLVMABISizeOfType(
953 implementation
->data_layout
.get(), llvm_vertex_shader_output_struct
);
954 for(auto &member
: implementation
->vertex_shader_output_struct
->get_members(true))
956 for(auto &decoration
: member
.decorations
)
958 if(decoration
.value
== spirv::Decoration::built_in
)
961 util::get
<spirv::Decoration_built_in_parameters
>(decoration
.parameters
);
962 if(builtin
.built_in
== spirv::Built_in::position
)
964 vertex_shader_position_output_offset
=
965 ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
966 llvm_vertex_shader_output_struct
,
967 member
.llvm_member_index
);
972 if(vertex_shader_position_output_offset
)
974 if(auto *struct_type
=
975 dynamic_cast<spirv_to_llvm::Struct_type_descriptor
*>(member
.type
.get()))
977 std::size_t struct_offset
=
978 ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
979 llvm_vertex_shader_output_struct
,
980 member
.llvm_member_index
);
981 auto llvm_struct_type
= struct_type
->get_or_make_type().type
;
982 for(auto &submember
: struct_type
->get_members(true))
984 for(auto &decoration
: submember
.decorations
)
986 if(decoration
.value
== spirv::Decoration::built_in
)
988 auto &builtin
= util::get
<spirv::Decoration_built_in_parameters
>(
989 decoration
.parameters
);
990 if(builtin
.built_in
== spirv::Built_in::position
)
992 vertex_shader_position_output_offset
=
994 + ::LLVMOffsetOfElement(implementation
->data_layout
.get(),
996 submember
.llvm_member_index
);
1001 if(vertex_shader_position_output_offset
)
1005 if(vertex_shader_position_output_offset
)
1008 if(!vertex_shader_position_output_offset
)
1009 throw std::runtime_error("can't find vertex shader Position output");
1010 #warning finish implementing Graphics_pipeline::make
1014 throw std::runtime_error("unknown shader kind");
1016 #warning finish implementing Graphics_pipeline::make
1017 if(!vertex_shader_function
)
1018 throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1019 if(!create_info
.pViewportState
)
1020 throw std::runtime_error("missing viewport state");
1021 if(create_info
.pViewportState
->viewportCount
!= 1)
1022 throw std::runtime_error("unimplemented viewport count");
1023 if(!create_info
.pViewportState
->pViewports
)
1024 throw std::runtime_error("missing viewport list");
1025 if(!create_info
.pViewportState
->pScissors
)
1026 throw std::runtime_error("missing scissor rectangle list");
1027 assert(vertex_shader_position_output_offset
);
1028 return std::unique_ptr
<Graphics_pipeline
>(
1029 new Graphics_pipeline(std::move(implementation
),
1030 vertex_shader_function
,
1031 vertex_shader_output_struct_size
,
1032 *vertex_shader_position_output_offset
,
1033 fragment_shader_function
,
1034 create_info
.pViewportState
->pViewports
[0],
1035 create_info
.pViewportState
->pScissors
[0]));