implemented vertex shader inputs
[kazan.git] / src / pipeline / pipeline.cpp
1 /*
2 * Copyright 2017 Jacob Lifshay
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 */
23 #include "pipeline.h"
24 #include "spirv_to_llvm/spirv_to_llvm.h"
25 #include "llvm_wrapper/llvm_wrapper.h"
26 #include "llvm_wrapper/orc_compile_stack.h"
27 #include "vulkan/util.h"
28 #include "util/soft_float.h"
29 #include "json/json.h"
30 #include <stdexcept>
31 #include <cassert>
32 #include <vector>
33 #include <iostream>
34
35 namespace vulkan_cpu
36 {
37 namespace pipeline
38 {
39 class Pipeline_cache
40 {
41 };
42
43 void Api_object_deleter<Pipeline_cache>::operator()(Pipeline_cache *pipeline_cache) const noexcept
44 {
45 delete pipeline_cache;
46 }
47
48 class Render_pass
49 {
50 };
51
52 void Api_object_deleter<Render_pass>::operator()(Render_pass *render_pass) const noexcept
53 {
54 delete render_pass;
55 }
56
57 template <>
58 Render_pass_handle Render_pass_handle::make(const VkRenderPassCreateInfo &render_pass_create_info)
59 {
60 #warning finish implementing Render_pass_handle::make
61 return Render_pass_handle(new Render_pass());
62 }
63
64 class Pipeline_layout
65 {
66 };
67
68 void Api_object_deleter<Pipeline_layout>::operator()(Pipeline_layout *pipeline_layout) const
69 noexcept
70 {
71 delete pipeline_layout;
72 }
73
74 template <>
75 Pipeline_layout_handle Pipeline_layout_handle::make(
76 const VkPipelineLayoutCreateInfo &pipeline_layout_create_info)
77 {
78 #warning finish implementing Pipeline_layout_handle::make
79 return Pipeline_layout_handle(new Pipeline_layout());
80 }
81
82 llvm_wrapper::Module Pipeline::optimize_module(llvm_wrapper::Module module,
83 ::LLVMTargetMachineRef target_machine)
84 {
85 switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine))
86 {
87 case ::LLVMCodeGenLevelNone:
88 case ::LLVMCodeGenLevelLess:
89 break;
90 case ::LLVMCodeGenLevelDefault:
91 case ::LLVMCodeGenLevelAggressive:
92 {
93 #warning finish implementing module optimizations
94 {
95 auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
96 ::LLVMAddAnalysisPasses(target_machine, manager.get());
97 ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
98 ::LLVMAddScalarReplAggregatesPass(manager.get());
99 ::LLVMAddScalarizerPass(manager.get());
100 ::LLVMAddEarlyCSEMemSSAPass(manager.get());
101 ::LLVMAddSCCPPass(manager.get());
102 ::LLVMAddAggressiveDCEPass(manager.get());
103 ::LLVMAddLICMPass(manager.get());
104 ::LLVMAddIndVarSimplifyPass(manager.get());
105 ::LLVMAddCFGSimplificationPass(manager.get());
106 ::LLVMAddReassociatePass(manager.get());
107 ::LLVMAddInstructionCombiningPass(manager.get());
108 ::LLVMAddNewGVNPass(manager.get());
109 ::LLVMAddCorrelatedValuePropagationPass(manager.get());
110 ::LLVMInitializeFunctionPassManager(manager.get());
111 for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
112 ::LLVMRunFunctionPassManager(manager.get(), fn);
113 ::LLVMFinalizeFunctionPassManager(manager.get());
114 }
115 {
116 auto manager = llvm_wrapper::Pass_manager::create_module_pass_manager();
117 ::LLVMAddAnalysisPasses(target_machine, manager.get());
118 ::LLVMAddIPSCCPPass(manager.get());
119 ::LLVMAddFunctionInliningPass(manager.get());
120 ::LLVMAddDeadArgEliminationPass(manager.get());
121 ::LLVMAddGlobalDCEPass(manager.get());
122 ::LLVMRunPassManager(manager.get(), module.get());
123 }
124 {
125 auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
126 ::LLVMAddAnalysisPasses(target_machine, manager.get());
127 ::LLVMAddCFGSimplificationPass(manager.get());
128 ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
129 ::LLVMAddScalarReplAggregatesPass(manager.get());
130 ::LLVMAddLICMPass(manager.get());
131 ::LLVMAddIndVarSimplifyPass(manager.get());
132 ::LLVMAddReassociatePass(manager.get());
133 ::LLVMAddInstructionCombiningPass(manager.get());
134 ::LLVMAddLoopUnrollPass(manager.get());
135 ::LLVMAddSLPVectorizePass(manager.get());
136 ::LLVMAddAggressiveDCEPass(manager.get());
137 ::LLVMInitializeFunctionPassManager(manager.get());
138 for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
139 ::LLVMRunFunctionPassManager(manager.get(), fn);
140 ::LLVMFinalizeFunctionPassManager(manager.get());
141 }
142 std::cerr << "optimized module:" << std::endl;
143 ::LLVMDumpModule(module.get());
144 break;
145 }
146 }
147 return module;
148 }
149
150 struct Graphics_pipeline::Implementation
151 {
152 llvm_wrapper::Context llvm_context = llvm_wrapper::Context::create();
153 spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver;
154 llvm_wrapper::Orc_compile_stack jit_stack;
155 llvm_wrapper::Target_data data_layout;
156 std::vector<spirv_to_llvm::Converted_module> compiled_shaders;
157 std::shared_ptr<spirv_to_llvm::Struct_type_descriptor> vertex_shader_output_struct;
158 std::string append_value_to_string(std::string str,
159 spirv_to_llvm::Type_descriptor &type,
160 const void *value) const
161 {
162 struct Visitor : public spirv_to_llvm::Type_descriptor::Type_visitor
163 {
164 const Implementation *this_;
165 std::string &str;
166 const void *value;
167 Visitor(const Implementation *this_, std::string &str, const void *value) noexcept
168 : this_(this_),
169 str(str),
170 value(value)
171 {
172 }
173 virtual void visit(spirv_to_llvm::Simple_type_descriptor &type) override
174 {
175 auto llvm_type = type.get_or_make_type().type;
176 switch(::LLVMGetTypeKind(llvm_type))
177 {
178 case ::LLVMVoidTypeKind:
179 case ::LLVMX86_FP80TypeKind:
180 case ::LLVMFP128TypeKind:
181 case ::LLVMPPC_FP128TypeKind:
182 case ::LLVMLabelTypeKind:
183 case ::LLVMFunctionTypeKind:
184 case ::LLVMStructTypeKind:
185 case ::LLVMArrayTypeKind:
186 case ::LLVMPointerTypeKind:
187 case ::LLVMVectorTypeKind:
188 case ::LLVMMetadataTypeKind:
189 case ::LLVMX86_MMXTypeKind:
190 case ::LLVMTokenTypeKind:
191 break;
192 case ::LLVMHalfTypeKind:
193 {
194 auto integer_value = *static_cast<const std::uint16_t *>(value);
195 auto float_value =
196 util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value);
197 str = json::ast::Number_value::append_double_to_string(
198 static_cast<double>(float_value), std::move(str));
199 if(float_value.isNaN())
200 {
201 str += " (0x";
202 str = json::ast::Number_value::append_unsigned_integer_to_string(
203 integer_value, std::move(str), 0x10);
204 str += ")";
205 }
206 return;
207 }
208 case ::LLVMFloatTypeKind:
209 {
210 static_assert(sizeof(std::uint32_t) == sizeof(float)
211 && alignof(std::uint32_t) == alignof(float),
212 "");
213 union
214 {
215 std::uint32_t integer_value;
216 float float_value;
217 };
218 integer_value = *static_cast<const std::uint32_t *>(value);
219 str = json::ast::Number_value::append_double_to_string(float_value,
220 std::move(str));
221 if(std::isnan(float_value))
222 {
223 str += " (0x";
224 str = json::ast::Number_value::append_unsigned_integer_to_string(
225 integer_value, std::move(str), 0x10);
226 str += ")";
227 }
228 return;
229 }
230 case ::LLVMDoubleTypeKind:
231 {
232 static_assert(sizeof(std::uint64_t) == sizeof(double)
233 && alignof(std::uint64_t) == alignof(double),
234 "");
235 union
236 {
237 std::uint64_t integer_value;
238 double float_value;
239 };
240 integer_value = *static_cast<const std::uint64_t *>(value);
241 str = json::ast::Number_value::append_double_to_string(float_value,
242 std::move(str));
243 if(std::isnan(float_value))
244 {
245 str += " (0x";
246 str = json::ast::Number_value::append_unsigned_integer_to_string(
247 integer_value, std::move(str), 0x10);
248 str += ")";
249 }
250 return;
251 }
252 case ::LLVMIntegerTypeKind:
253 {
254 switch(::LLVMGetIntTypeWidth(llvm_type))
255 {
256 case 8:
257 {
258 auto integer_value = *static_cast<const std::uint8_t *>(value);
259 str += "0x";
260 str = json::ast::Number_value::append_unsigned_integer_to_string(
261 integer_value, std::move(str), 0x10);
262 str += " ";
263 str = json::ast::Number_value::append_unsigned_integer_to_string(
264 integer_value, std::move(str));
265 str += " ";
266 str = json::ast::Number_value::append_signed_integer_to_string(
267 static_cast<std::int8_t>(integer_value), std::move(str));
268 return;
269 }
270 case 16:
271 {
272 auto integer_value = *static_cast<const std::uint16_t *>(value);
273 str += "0x";
274 str = json::ast::Number_value::append_unsigned_integer_to_string(
275 integer_value, std::move(str), 0x10);
276 str += " ";
277 str = json::ast::Number_value::append_unsigned_integer_to_string(
278 integer_value, std::move(str));
279 str += " ";
280 str = json::ast::Number_value::append_signed_integer_to_string(
281 static_cast<std::int16_t>(integer_value), std::move(str));
282 return;
283 }
284 case 32:
285 {
286 auto integer_value = *static_cast<const std::uint32_t *>(value);
287 str += "0x";
288 str = json::ast::Number_value::append_unsigned_integer_to_string(
289 integer_value, std::move(str), 0x10);
290 str += " ";
291 str = json::ast::Number_value::append_unsigned_integer_to_string(
292 integer_value, std::move(str));
293 str += " ";
294 str = json::ast::Number_value::append_signed_integer_to_string(
295 static_cast<std::int32_t>(integer_value), std::move(str));
296 return;
297 }
298 case 64:
299 {
300 auto integer_value = *static_cast<const std::uint64_t *>(value);
301 str += "0x";
302 str = json::ast::Number_value::append_unsigned_integer_to_string(
303 integer_value, std::move(str), 0x10);
304 str += " ";
305 str = json::ast::Number_value::append_unsigned_integer_to_string(
306 integer_value, std::move(str));
307 str += " ";
308 str = json::ast::Number_value::append_signed_integer_to_string(
309 static_cast<std::int64_t>(integer_value), std::move(str));
310 return;
311 }
312 }
313 break;
314 }
315 }
316 assert(!"unhandled type");
317 throw std::runtime_error("unhandled type");
318 }
319 virtual void visit(spirv_to_llvm::Vector_type_descriptor &type) override
320 {
321 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
322 std::size_t element_size =
323 ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
324 std::size_t element_count = type.get_element_count();
325 str += "<";
326 auto separator = "";
327 for(std::size_t i = 0; i < element_count; i++)
328 {
329 str += separator;
330 separator = ", ";
331 str = this_->append_value_to_string(
332 std::move(str),
333 *type.get_element_type(),
334 static_cast<const char *>(value) + i * element_size);
335 }
336 str += ">";
337 }
338 virtual void visit(spirv_to_llvm::Matrix_type_descriptor &type) override
339 {
340 assert(!"dumping matrix not implemented");
341 throw std::runtime_error("dumping matrix not implemented");
342 #warning dumping matrix not implemented
343 }
344 virtual void visit(spirv_to_llvm::Array_type_descriptor &type) override
345 {
346 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
347 std::size_t element_size =
348 ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
349 std::size_t element_count = type.get_element_count();
350 str += "[";
351 auto separator = "";
352 for(std::size_t i = 0; i < element_count; i++)
353 {
354 str += separator;
355 separator = ", ";
356 str = this_->append_value_to_string(
357 std::move(str),
358 *type.get_element_type(),
359 static_cast<const char *>(value) + i * element_size);
360 }
361 str += "]";
362 }
363 virtual void visit(spirv_to_llvm::Pointer_type_descriptor &type) override
364 {
365 str += "pointer:0x";
366 str = json::ast::Number_value::append_unsigned_integer_to_string(
367 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
368 std::move(str),
369 0x10);
370 }
371 virtual void visit(spirv_to_llvm::Function_type_descriptor &type) override
372 {
373 str += "function:0x";
374 str = json::ast::Number_value::append_unsigned_integer_to_string(
375 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
376 std::move(str),
377 0x10);
378 }
379 virtual void visit(spirv_to_llvm::Struct_type_descriptor &type) override
380 {
381 auto &&members = type.get_members(true);
382 auto llvm_type = type.get_or_make_type().type;
383 str += "{";
384 auto separator = "";
385 for(auto &member : members)
386 {
387 str += separator;
388 separator = ", ";
389 str = this_->append_value_to_string(
390 std::move(str),
391 *member.type,
392 static_cast<const char *>(value)
393 + ::LLVMOffsetOfElement(
394 this_->data_layout.get(), llvm_type, member.llvm_member_index));
395 }
396 str += "}";
397 }
398 };
399 type.visit(Visitor(this, str, value));
400 return str;
401 }
402 };
403
404 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct) const
405 {
406 std::cerr << "output: "
407 << implementation->append_value_to_string(
408 {}, *implementation->vertex_shader_output_struct, output_struct)
409 << std::endl;
410 }
411
412 void Graphics_pipeline::run(std::uint32_t vertex_start_index,
413 std::uint32_t vertex_end_index,
414 std::uint32_t instance_id,
415 const image::Image &color_attachment,
416 void *const *bindings)
417 {
418 typedef std::uint32_t Pixel_type;
419 assert(color_attachment.descriptor.tiling == VK_IMAGE_TILING_LINEAR);
420 std::size_t color_attachment_stride = color_attachment.descriptor.get_memory_stride();
421 std::size_t color_attachment_pixel_size = color_attachment.descriptor.get_memory_pixel_size();
422 unsigned char *color_attachment_memory = color_attachment.memory.get();
423 float viewport_x_scale, viewport_x_offset, viewport_y_scale, viewport_y_offset,
424 viewport_z_scale, viewport_z_offset;
425 {
426 float px = viewport.width;
427 float ox = viewport.x + 0.5f * viewport.width;
428 float py = viewport.height;
429 float oy = viewport.y + 0.5f * viewport.height;
430 float pz = viewport.maxDepth - viewport.minDepth;
431 float oz = viewport.minDepth;
432 viewport_x_scale = px * 0.5f;
433 viewport_x_offset = ox;
434 viewport_y_scale = py * 0.5f;
435 viewport_y_offset = oy;
436 viewport_z_scale = pz;
437 viewport_z_offset = oz;
438 }
439 constexpr std::size_t vec4_native_alignment = alignof(float) * 4;
440 constexpr std::size_t max_alignment = alignof(std::max_align_t);
441 constexpr std::size_t vec4_alignment =
442 vec4_native_alignment > max_alignment ? max_alignment : vec4_native_alignment;
443 constexpr std::size_t ivec4_native_alignment = alignof(std::int32_t) * 4;
444 constexpr std::size_t ivec4_alignment =
445 ivec4_native_alignment > max_alignment ? max_alignment : ivec4_native_alignment;
446 struct alignas(vec4_alignment) Vec4
447 {
448 float x;
449 float y;
450 float z;
451 float w;
452 constexpr Vec4() noexcept : x(), y(), z(), w()
453 {
454 }
455 constexpr explicit Vec4(float x, float y, float z, float w) noexcept : x(x),
456 y(y),
457 z(z),
458 w(w)
459 {
460 }
461 };
462 struct alignas(ivec4_alignment) Ivec4
463 {
464 std::int32_t x;
465 std::int32_t y;
466 std::int32_t z;
467 std::int32_t w;
468 constexpr Ivec4() noexcept : x(), y(), z(), w()
469 {
470 }
471 constexpr explicit Ivec4(std::int32_t x,
472 std::int32_t y,
473 std::int32_t z,
474 std::int32_t w) noexcept : x(x),
475 y(y),
476 z(z),
477 w(w)
478 {
479 }
480 };
481 auto interpolate_float = [](float t, float v0, float v1) noexcept->float
482 {
483 return t * v1 + (1.0f - t) * v0;
484 };
485 auto interpolate_vec4 = [interpolate_float](
486 float t, const Vec4 &v0, const Vec4 &v1) noexcept->Vec4
487 {
488 return Vec4(interpolate_float(t, v0.x, v1.x),
489 interpolate_float(t, v0.y, v1.y),
490 interpolate_float(t, v0.z, v1.z),
491 interpolate_float(t, v0.w, v1.w));
492 };
493 static constexpr std::size_t triangle_vertex_count = 3;
494 struct Triangle
495 {
496 Vec4 vertexes[triangle_vertex_count];
497 constexpr Triangle() noexcept : vertexes{}
498 {
499 }
500 constexpr Triangle(const Vec4 &v0, const Vec4 &v1, const Vec4 &v2) noexcept
501 : vertexes{v0, v1, v2}
502 {
503 }
504 };
505 auto solve_for_t = [](float v0, float v1) noexcept->float
506 {
507 // solves interpolate_float(t, v0, v1) == 0
508 return v0 / (v0 - v1);
509 };
510 auto clip_edge = [solve_for_t, interpolate_vec4](const Vec4 &start_vertex,
511 const Vec4 &end_vertex,
512 Vec4 *output_vertexes,
513 std::size_t &output_vertex_count,
514 auto eval_vertex) -> bool
515 {
516 // eval_vertex returns a non-negative number if the vertex is inside the clip volume
517 float start_vertex_signed_distance = eval_vertex(start_vertex);
518 float end_vertex_signed_distance = eval_vertex(end_vertex);
519 if(start_vertex_signed_distance != start_vertex_signed_distance)
520 return false; // triangle has a NaN coordinate; skip it
521 if(start_vertex_signed_distance < 0)
522 {
523 // start_vertex is outside
524 if(end_vertex_signed_distance < 0)
525 {
526 // end_vertex is outside; do nothing
527 }
528 else
529 {
530 // end_vertex is inside
531 output_vertexes[output_vertex_count++] = interpolate_vec4(
532 solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
533 start_vertex,
534 end_vertex);
535 output_vertexes[output_vertex_count++] = end_vertex;
536 }
537 }
538 else
539 {
540 // start_vertex is inside
541 if(end_vertex_signed_distance < 0)
542 {
543 // end_vertex is outside
544 output_vertexes[output_vertex_count++] = interpolate_vec4(
545 solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
546 start_vertex,
547 end_vertex);
548 }
549 else
550 {
551 // end_vertex is inside
552 output_vertexes[output_vertex_count++] = end_vertex;
553 }
554 }
555 return true;
556 };
557 auto clip_triangles = [clip_edge](
558 std::vector<Triangle> &triangles, std::vector<Triangle> &temp_triangles, auto eval_vertex)
559 {
560 temp_triangles.clear();
561 for(auto &input_ref : triangles)
562 {
563 Triangle input = input_ref; // copy to enable compiler optimizations
564 constexpr std::size_t max_clipped_output_vertex_count = 4;
565 Vec4 output_vertexes[max_clipped_output_vertex_count];
566 std::size_t output_vertex_count = 0;
567 bool skip_triangle = false;
568 std::size_t end_vertex_index = 1;
569 for(std::size_t start_vertex_index = 0; start_vertex_index < triangle_vertex_count;
570 start_vertex_index++)
571 {
572 if(!clip_edge(input.vertexes[start_vertex_index],
573 input.vertexes[end_vertex_index],
574 output_vertexes,
575 output_vertex_count,
576 eval_vertex))
577 {
578 skip_triangle = true;
579 break;
580 }
581 if(++end_vertex_index >= triangle_vertex_count)
582 end_vertex_index = 0;
583 }
584 if(skip_triangle)
585 continue;
586 switch(output_vertex_count)
587 {
588 case 0:
589 case 1:
590 case 2:
591 continue;
592 case 3:
593 temp_triangles.push_back(
594 Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
595 continue;
596 case 4:
597 temp_triangles.push_back(
598 Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
599 temp_triangles.push_back(
600 Triangle(output_vertexes[0], output_vertexes[2], output_vertexes[3]));
601 continue;
602 }
603 assert(!"clipping algorithm failed");
604 }
605 temp_triangles.swap(triangles);
606 };
607 std::vector<Triangle> triangles;
608 std::vector<Triangle> temp_triangles;
609 constexpr std::size_t chunk_max_size = 96;
610 static_assert(chunk_max_size % triangle_vertex_count == 0, "");
611 std::unique_ptr<unsigned char[]> chunk_vertex_buffer(
612 new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size]);
613 while(vertex_start_index < vertex_end_index)
614 {
615 std::uint32_t chunk_size = vertex_end_index - vertex_start_index;
616 if(chunk_size > chunk_max_size)
617 chunk_size = chunk_max_size;
618 auto current_vertex_start_index = vertex_start_index;
619 vertex_start_index += chunk_size;
620 run_vertex_shader(current_vertex_start_index,
621 current_vertex_start_index + chunk_size,
622 instance_id,
623 chunk_vertex_buffer.get(),
624 bindings);
625 const unsigned char *current_vertex =
626 chunk_vertex_buffer.get() + vertex_shader_position_output_offset;
627 triangles.clear();
628 for(std::uint32_t i = 0; i + triangle_vertex_count <= chunk_size;
629 i += triangle_vertex_count)
630 {
631 Triangle triangle;
632 for(std::size_t j = 0; j < triangle_vertex_count; j++)
633 {
634 triangle.vertexes[j] = *reinterpret_cast<const Vec4 *>(current_vertex);
635 current_vertex += vertex_shader_output_struct_size;
636 }
637 triangles.push_back(triangle);
638 }
639 // clip to 0 <= vertex.z
640 clip_triangles(triangles,
641 temp_triangles,
642 [](const Vec4 &vertex) noexcept->float
643 {
644 return vertex.z;
645 });
646 // clip to vertex.z <= vertex.w
647 clip_triangles(triangles,
648 temp_triangles,
649 [](const Vec4 &vertex) noexcept->float
650 {
651 return vertex.w - vertex.z;
652 });
653 // clip to -vertex.w <= vertex.x
654 clip_triangles(triangles,
655 temp_triangles,
656 [](const Vec4 &vertex) noexcept->float
657 {
658 return vertex.x + vertex.w;
659 });
660 // clip to vertex.x <= vertex.w
661 clip_triangles(triangles,
662 temp_triangles,
663 [](const Vec4 &vertex) noexcept->float
664 {
665 return vertex.w - vertex.x;
666 });
667 // clip to -vertex.w <= vertex.y
668 clip_triangles(triangles,
669 temp_triangles,
670 [](const Vec4 &vertex) noexcept->float
671 {
672 return vertex.y + vertex.w;
673 });
674 // clip to vertex.y <= vertex.w
675 clip_triangles(triangles,
676 temp_triangles,
677 [](const Vec4 &vertex) noexcept->float
678 {
679 return vertex.w - vertex.y;
680 });
681 VkOffset2D clipped_scissor_rect_min = scissor_rect.offset;
682 VkOffset2D clipped_scissor_rect_end = {
683 .x = scissor_rect.offset.x + static_cast<std::int32_t>(scissor_rect.extent.width),
684 .y = scissor_rect.offset.y + static_cast<std::int32_t>(scissor_rect.extent.height),
685 };
686 if(clipped_scissor_rect_min.x < 0)
687 clipped_scissor_rect_min.x = 0;
688 if(clipped_scissor_rect_min.y < 0)
689 clipped_scissor_rect_min.y = 0;
690 if(clipped_scissor_rect_end.x > color_attachment.descriptor.extent.width)
691 clipped_scissor_rect_end.x = color_attachment.descriptor.extent.width;
692 if(clipped_scissor_rect_end.y < color_attachment.descriptor.extent.height)
693 clipped_scissor_rect_end.y = color_attachment.descriptor.extent.height;
694 if(clipped_scissor_rect_end.x <= clipped_scissor_rect_min.x)
695 continue;
696 if(clipped_scissor_rect_end.y <= clipped_scissor_rect_min.y)
697 continue;
698 for(std::size_t triangle_index = 0; triangle_index < triangles.size(); triangle_index++)
699 {
700 Triangle triangle = triangles[triangle_index];
701 Vec4 projected_triangle_and_inv_w[triangle_vertex_count];
702 Vec4 framebuffer_coordinates[triangle_vertex_count];
703 for(std::size_t i = 0; i < triangle_vertex_count; i++)
704 {
705 projected_triangle_and_inv_w[i].w = 1.0f / triangle.vertexes[i].w;
706 projected_triangle_and_inv_w[i].x =
707 triangle.vertexes[i].x * projected_triangle_and_inv_w[i].w;
708 projected_triangle_and_inv_w[i].y =
709 triangle.vertexes[i].y * projected_triangle_and_inv_w[i].w;
710 projected_triangle_and_inv_w[i].z =
711 triangle.vertexes[i].z * projected_triangle_and_inv_w[i].w;
712 framebuffer_coordinates[i] =
713 Vec4(projected_triangle_and_inv_w[i].x * viewport_x_scale + viewport_x_offset,
714 projected_triangle_and_inv_w[i].y * viewport_y_scale + viewport_y_offset,
715 projected_triangle_and_inv_w[i].z * viewport_z_scale + viewport_z_offset,
716 0);
717 }
718 float orientation = 0;
719 for(std::size_t start_vertex_index = 0, end_vertex_index = 1;
720 start_vertex_index < triangle_vertex_count;
721 start_vertex_index++)
722 {
723 float x1 = framebuffer_coordinates[start_vertex_index].x;
724 float y1 = framebuffer_coordinates[start_vertex_index].y;
725 float x2 = framebuffer_coordinates[end_vertex_index].x;
726 float y2 = framebuffer_coordinates[end_vertex_index].y;
727 orientation += x2 * y1 - x1 * y2;
728 if(++end_vertex_index >= triangle_vertex_count)
729 end_vertex_index = 0;
730 }
731 if(!(orientation < 0)
732 && !(orientation > 0)) // zero area triangle or triangle coordinate is NaN
733 continue;
734 // orientation > 0 for counter-clockwise triangle
735 // orientation < 0 for clockwise triangle
736 std::int32_t min_x, end_x, min_y, end_y;
737 bool first = true;
738 for(std::size_t i = 0; i < triangle_vertex_count; i++)
739 {
740 // x and y will be >= 0 so we can use truncate instead of floor for speed
741 auto current_min_x = static_cast<std::int32_t>(framebuffer_coordinates[i].x);
742 auto current_min_y = static_cast<std::int32_t>(framebuffer_coordinates[i].y);
743 std::int32_t current_end_x = current_min_x + 1;
744 std::int32_t current_end_y = current_min_y + 1;
745 if(first || current_min_x < min_x)
746 min_x = current_min_x;
747 if(first || current_end_x > end_x)
748 end_x = current_end_x;
749 if(first || current_min_y < min_y)
750 min_y = current_min_y;
751 if(first || current_end_y > end_y)
752 end_y = current_end_y;
753 first = false;
754 }
755 if(min_x < clipped_scissor_rect_min.x)
756 min_x = clipped_scissor_rect_min.x;
757 if(end_x > clipped_scissor_rect_end.x)
758 end_x = clipped_scissor_rect_end.x;
759 if(min_y < clipped_scissor_rect_min.y)
760 min_y = clipped_scissor_rect_min.y;
761 if(end_y > clipped_scissor_rect_end.y)
762 end_y = clipped_scissor_rect_end.y;
763 constexpr int log2_scale = 16;
764 constexpr auto scale = 1LL << log2_scale;
765 typedef std::int64_t Edge_equation_integer_type;
766 struct Edge_equation
767 {
768 Edge_equation_integer_type a;
769 Edge_equation_integer_type b;
770 Edge_equation_integer_type c;
771 Edge_equation_integer_type padding;
772 constexpr Edge_equation() noexcept : a(), b(), c(), padding()
773 {
774 }
775 constexpr Edge_equation(Edge_equation_integer_type a,
776 Edge_equation_integer_type b,
777 Edge_equation_integer_type c) noexcept : a(a),
778 b(b),
779 c(c),
780 padding()
781 {
782 }
783 constexpr bool inside(std::int32_t x, std::int32_t y) const noexcept
784 {
785 return a * x + b * y + c >= 0;
786 }
787 };
788 Edge_equation edge_equations[triangle_vertex_count];
789 bool skip_triangle = false;
790 for(std::size_t start_vertex_index = 0, end_vertex_index = 1, other_vertex_index = 2;
791 start_vertex_index < triangle_vertex_count;
792 start_vertex_index++)
793 {
794 float x1_float = framebuffer_coordinates[start_vertex_index].x;
795 float y1_float = framebuffer_coordinates[start_vertex_index].y;
796 float x2_float = framebuffer_coordinates[end_vertex_index].x;
797 float y2_float = framebuffer_coordinates[end_vertex_index].y;
798 [[gnu::unused]] float x3_float = framebuffer_coordinates[other_vertex_index].x;
799 [[gnu::unused]] float y3_float = framebuffer_coordinates[other_vertex_index].y;
800 auto x1_fixed = static_cast<Edge_equation_integer_type>(x1_float * scale);
801 auto y1_fixed = static_cast<Edge_equation_integer_type>(y1_float * scale);
802 auto x2_fixed = static_cast<Edge_equation_integer_type>(x2_float * scale);
803 auto y2_fixed = static_cast<Edge_equation_integer_type>(y2_float * scale);
804 [[gnu::unused]] auto x3_fixed =
805 static_cast<Edge_equation_integer_type>(x3_float * scale);
806 [[gnu::unused]] auto y3_fixed =
807 static_cast<Edge_equation_integer_type>(y3_float * scale);
808 Edge_equation_integer_type a;
809 Edge_equation_integer_type b;
810 Edge_equation_integer_type c;
811 {
812 // solve a * x1 + b * y1 + c == 0 &&
813 // a * x2 + b * y2 + c == 0 &&
814 // a * x3 + b * y3 + c >= 0
815 if(x1_fixed == x2_fixed && y1_fixed == y2_fixed)
816 {
817 // rounded to a zero-area triangle
818 skip_triangle = true;
819 break;
820 }
821 Edge_equation_integer_type a_fixed = (y1_fixed - y2_fixed) * scale;
822 Edge_equation_integer_type b_fixed = (x2_fixed - x1_fixed) * scale;
823 Edge_equation_integer_type c_fixed =
824 (x1_fixed * y2_fixed - x2_fixed * y1_fixed);
825
826 // offset to end up checking at pixel center instead of top-left pixel corner
827 c_fixed += (a_fixed + b_fixed) / 2;
828
829 a = a_fixed;
830 b = b_fixed;
831 c = c_fixed;
832 if(orientation > 0)
833 {
834 // fix sign
835 a = -a;
836 b = -b;
837 c = -c;
838 }
839 }
840 // handle top-left fill rule
841 if(a < 0 || (a == 0 && b < 0))
842 {
843 // not a top-left edge, fixup c
844 // effectively changes the '>=' to '>' in Edge_equation::inside
845 c--;
846 }
847
848 edge_equations[start_vertex_index] = Edge_equation(a, b, c);
849 if(++end_vertex_index >= triangle_vertex_count)
850 end_vertex_index = 0;
851 if(++other_vertex_index >= triangle_vertex_count)
852 other_vertex_index = 0;
853 }
854 if(skip_triangle)
855 continue;
856 auto fs = this->fragment_shader_function;
857 for(std::int32_t y = min_y; y < end_y; y++)
858 {
859 for(std::int32_t x = min_x; x < end_x; x++)
860 {
861 bool inside = true;
862 for(auto &edge_equation : edge_equations)
863 {
864 inside &= edge_equation.inside(x, y);
865 }
866 if(inside)
867 {
868 auto *pixel = reinterpret_cast<Pixel_type *>(
869 color_attachment_memory
870 + (static_cast<std::size_t>(x) * color_attachment_pixel_size
871 + static_cast<std::size_t>(y) * color_attachment_stride));
872 fs(pixel);
873 }
874 }
875 }
876 };
877 }
878 }
879
880 std::unique_ptr<Graphics_pipeline> Graphics_pipeline::make(
881 Pipeline_cache *pipeline_cache, const VkGraphicsPipelineCreateInfo &create_info)
882 {
883 assert(create_info.sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
884 auto *render_pass = Render_pass_handle::from_handle(create_info.renderPass);
885 assert(render_pass);
886 auto *pipeline_layout = Pipeline_layout_handle::from_handle(create_info.layout);
887 assert(pipeline_layout);
888 if(create_info.flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT)
889 {
890 #warning implement creating derived pipelines
891 throw std::runtime_error("creating derived pipelines is not implemented");
892 }
893 auto implementation = std::make_shared<Implementation>();
894 auto optimization_level = ::LLVMCodeGenLevelDefault;
895 if(create_info.flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
896 optimization_level = ::LLVMCodeGenLevelNone;
897 auto llvm_target_machine =
898 llvm_wrapper::Target_machine::create_native_target_machine(optimization_level);
899 implementation->compiled_shaders.reserve(create_info.stageCount);
900 util::Enum_set<spirv::Execution_model> found_shader_stages;
901 for(std::size_t i = 0; i < create_info.stageCount; i++)
902 {
903 auto &stage_info = create_info.pStages[i];
904 auto execution_models =
905 vulkan::get_execution_models_from_shader_stage_flags(stage_info.stage);
906 assert(execution_models.size() == 1);
907 auto execution_model = *execution_models.begin();
908 bool added_to_found_shader_stages =
909 std::get<1>(found_shader_stages.insert(execution_model));
910 if(!added_to_found_shader_stages)
911 throw std::runtime_error("duplicate shader stage");
912 auto *shader_module = Shader_module_handle::from_handle(stage_info.module);
913 assert(shader_module);
914 {
915 spirv::Dump_callbacks dump_callbacks;
916 try
917 {
918 spirv::parse(dump_callbacks, shader_module->words(), shader_module->word_count());
919 }
920 catch(spirv::Parser_error &e)
921 {
922 std::cerr << dump_callbacks.ss.str() << std::endl;
923 throw;
924 }
925 std::cerr << dump_callbacks.ss.str() << std::endl;
926 }
927 assert(create_info.pVertexInputState);
928 assert(create_info.pVertexInputState->sType == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO);
929 auto compiled_shader = spirv_to_llvm::spirv_to_llvm(implementation->llvm_context.get(),
930 llvm_target_machine.get(),
931 shader_module->words(),
932 shader_module->word_count(),
933 implementation->compiled_shaders.size(),
934 execution_model,
935 stage_info.pName,
936 create_info.pVertexInputState);
937 std::cerr << "Translation to LLVM succeeded." << std::endl;
938 ::LLVMDumpModule(compiled_shader.module.get());
939 bool failed =
940 ::LLVMVerifyModule(compiled_shader.module.get(), ::LLVMPrintMessageAction, nullptr);
941 if(failed)
942 throw std::runtime_error("LLVM module verification failed");
943 implementation->compiled_shaders.push_back(std::move(compiled_shader));
944 }
945 implementation->data_layout = llvm_target_machine.create_target_data_layout();
946 implementation->jit_stack =
947 llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine), optimize_module);
948 Vertex_shader_function vertex_shader_function = nullptr;
949 std::size_t vertex_shader_output_struct_size = 0;
950 util::optional<std::size_t> vertex_shader_position_output_offset;
951 Fragment_shader_function fragment_shader_function = nullptr;
952 for(auto &compiled_shader : implementation->compiled_shaders)
953 {
954 implementation->jit_stack.add_eagerly_compiled_ir(
955 std::move(compiled_shader.module),
956 &spirv_to_llvm::Jit_symbol_resolver::resolve,
957 static_cast<void *>(&implementation->jit_symbol_resolver));
958 auto shader_entry_point_address = implementation->jit_stack.get_symbol_address(
959 compiled_shader.entry_function_name.c_str());
960 std::cerr << "shader entry: " << compiled_shader.entry_function_name << ": "
961 << reinterpret_cast<void *>(shader_entry_point_address) << std::endl;
962 assert(shader_entry_point_address);
963 switch(compiled_shader.execution_model)
964 {
965 case spirv::Execution_model::fragment:
966 fragment_shader_function =
967 reinterpret_cast<Fragment_shader_function>(shader_entry_point_address);
968 #warning finish implementing Graphics_pipeline::make
969 continue;
970 #warning finish implementing Graphics_pipeline::make
971 throw std::runtime_error("creating fragment shaders is not implemented");
972 case spirv::Execution_model::geometry:
973 #warning finish implementing Graphics_pipeline::make
974 throw std::runtime_error("creating geometry shaders is not implemented");
975 case spirv::Execution_model::gl_compute:
976 case spirv::Execution_model::kernel:
977 throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
978 case spirv::Execution_model::tessellation_control:
979 case spirv::Execution_model::tessellation_evaluation:
980 #warning finish implementing Graphics_pipeline::make
981 throw std::runtime_error("creating tessellation shaders is not implemented");
982 case spirv::Execution_model::vertex:
983 {
984 vertex_shader_function =
985 reinterpret_cast<Vertex_shader_function>(shader_entry_point_address);
986 implementation->vertex_shader_output_struct = compiled_shader.outputs_struct;
987 auto llvm_vertex_shader_output_struct =
988 implementation->vertex_shader_output_struct->get_or_make_type().type;
989 vertex_shader_output_struct_size = ::LLVMABISizeOfType(
990 implementation->data_layout.get(), llvm_vertex_shader_output_struct);
991 for(auto &member : implementation->vertex_shader_output_struct->get_members(true))
992 {
993 for(auto &decoration : member.decorations)
994 {
995 if(decoration.value == spirv::Decoration::built_in)
996 {
997 auto &builtin =
998 util::get<spirv::Decoration_built_in_parameters>(decoration.parameters);
999 if(builtin.built_in == spirv::Built_in::position)
1000 {
1001 vertex_shader_position_output_offset =
1002 ::LLVMOffsetOfElement(implementation->data_layout.get(),
1003 llvm_vertex_shader_output_struct,
1004 member.llvm_member_index);
1005 break;
1006 }
1007 }
1008 }
1009 if(vertex_shader_position_output_offset)
1010 break;
1011 if(auto *struct_type =
1012 dynamic_cast<spirv_to_llvm::Struct_type_descriptor *>(member.type.get()))
1013 {
1014 std::size_t struct_offset =
1015 ::LLVMOffsetOfElement(implementation->data_layout.get(),
1016 llvm_vertex_shader_output_struct,
1017 member.llvm_member_index);
1018 auto llvm_struct_type = struct_type->get_or_make_type().type;
1019 for(auto &submember : struct_type->get_members(true))
1020 {
1021 for(auto &decoration : submember.decorations)
1022 {
1023 if(decoration.value == spirv::Decoration::built_in)
1024 {
1025 auto &builtin = util::get<spirv::Decoration_built_in_parameters>(
1026 decoration.parameters);
1027 if(builtin.built_in == spirv::Built_in::position)
1028 {
1029 vertex_shader_position_output_offset =
1030 struct_offset
1031 + ::LLVMOffsetOfElement(implementation->data_layout.get(),
1032 llvm_struct_type,
1033 submember.llvm_member_index);
1034 break;
1035 }
1036 }
1037 }
1038 if(vertex_shader_position_output_offset)
1039 break;
1040 }
1041 }
1042 if(vertex_shader_position_output_offset)
1043 break;
1044 }
1045 if(!vertex_shader_position_output_offset)
1046 throw std::runtime_error("can't find vertex shader Position output");
1047 #warning finish implementing Graphics_pipeline::make
1048 continue;
1049 }
1050 }
1051 throw std::runtime_error("unknown shader kind");
1052 }
1053 #warning finish implementing Graphics_pipeline::make
1054 if(!vertex_shader_function)
1055 throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1056 if(!create_info.pViewportState)
1057 throw std::runtime_error("missing viewport state");
1058 if(create_info.pViewportState->viewportCount != 1)
1059 throw std::runtime_error("unimplemented viewport count");
1060 if(!create_info.pViewportState->pViewports)
1061 throw std::runtime_error("missing viewport list");
1062 if(!create_info.pViewportState->pScissors)
1063 throw std::runtime_error("missing scissor rectangle list");
1064 assert(vertex_shader_position_output_offset);
1065 return std::unique_ptr<Graphics_pipeline>(
1066 new Graphics_pipeline(std::move(implementation),
1067 vertex_shader_function,
1068 vertex_shader_output_struct_size,
1069 *vertex_shader_position_output_offset,
1070 fragment_shader_function,
1071 create_info.pViewportState->pViewports[0],
1072 create_info.pViewportState->pScissors[0]));
1073 }
1074 }
1075 }