Last active
October 23, 2018 10:12
-
-
Save DiamondLovesYou/ed624b20fe4777766a8629347484d3e7 to your computer and use it in GitHub Desktop.
LLVM module for a JIT-ed Rust function (with addrspacecast optimizations)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'jit-methods.7rcbfp3g-cgu.0' | |
source_filename = "jit-methods.7rcbfp3g-cgu.0" | |
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" | |
target triple = "amdgcn-amd-amdhsa-amdgiz" | |
%"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" = type { [0 x i8], %"ndarray::ViewRepr<&mut f32>", [0 x i8], float*, [0 x i64], %"ndarray::dimension::dim::Dim<[usize; 1]>", [0 x i64], %"ndarray::dimension::dim::Dim<[usize; 1]>", [0 x i64] } | |
%"ndarray::ViewRepr<&mut f32>" = type { [0 x i8], %"core::marker::PhantomData<&mut f32>", [0 x i8] } | |
%"core::marker::PhantomData<&mut f32>" = type {} | |
%"ndarray::dimension::dim::Dim<[usize; 1]>" = type { [0 x i64], [1 x i64], [0 x i64] } | |
%"hsa_rt_sys::hsa_kernel_dispatch_packet_s" = type { [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i64, [0 x i64], i8*, [0 x i64], i64, [0 x i64], %"hsa_rt_sys::hsa_signal_s", [0 x i64] } | |
%"hsa_rt_sys::hsa_signal_s" = type { [0 x i64], i64, [0 x i64] } | |
%"unwind::libunwind::_Unwind_Exception" = type { [0 x i64], i64, [0 x i64], void (i32, %"unwind::libunwind::_Unwind_Exception"*)*, [0 x i64], [6 x i64], [0 x i64] } | |
%"unwind::libunwind::_Unwind_Context" = type { [0 x i8] } | |
; ndarray::vector_foreach | |
; Function Attrs: nounwind nonlazybind | |
define amdgpu_kernel void @_ZN7ndarray14vector_foreach17h800258d818eaa10aE(%"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>", float %value) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality { | |
start: | |
%1 = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #4 | |
%2 = icmp eq i8 addrspace(4)* %1, null | |
br i1 %2, label %bb2.i.i, label %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit | |
bb2.i.i: ; preds = %start | |
tail call void @llvm.trap() #4 | |
unreachable | |
_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit: ; preds = %start | |
%.fca.3.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 3 | |
%.fca.7.1.0.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 7, 1, 0 | |
%.fca.5.1.0.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 5, 1, 0 | |
%3 = ptrtoint i8 addrspace(4)* %1 to i64 | |
%4 = tail call i32 @llvm.amdgcn.workitem.id.x() #4 | |
%5 = zext i32 %4 to i64 | |
%6 = tail call i32 @llvm.amdgcn.workgroup.id.x() #4 | |
%7 = zext i32 %6 to i64 | |
%8 = inttoptr i64 %3 to %"hsa_rt_sys::hsa_kernel_dispatch_packet_s"* | |
%9 = getelementptr inbounds %"hsa_rt_sys::hsa_kernel_dispatch_packet_s", %"hsa_rt_sys::hsa_kernel_dispatch_packet_s"* %8, i64 0, i32 5 | |
%10 = load i16, i16* %9, align 4, !noalias !2 | |
%11 = zext i16 %10 to i64 | |
%12 = mul nuw nsw i64 %11, %7 | |
%13 = add nuw nsw i64 %12, %5 | |
%14 = icmp ule i64 %.fca.5.1.0.extract, %13 | |
%15 = mul i64 %.fca.7.1.0.extract, %13 | |
%16 = getelementptr inbounds float, float* %.fca.3.extract, i64 %15 | |
%17 = icmp eq float* %16, null | |
%18 = or i1 %14, %17 | |
br i1 %18, label %bb5, label %bb11 | |
bb5: ; preds = %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit, %bb11 | |
ret void | |
bb11: ; preds = %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit | |
%19 = load float, float* %16, align 4 | |
%20 = fadd float %19, 1.000000e+00 | |
%21 = fmul float %20, %value | |
%22 = fadd float %21, 1.000000e+00 | |
%23 = fmul float %22, %value | |
%24 = fadd float %23, 1.000000e+00 | |
%25 = fmul float %24, %value | |
%26 = fadd float %25, 1.000000e+00 | |
%27 = fmul float %26, %value | |
%28 = fadd float %27, 1.000000e+00 | |
%29 = fmul float %28, %value | |
%30 = fadd float %29, 1.000000e+00 | |
%31 = fmul float %30, %value | |
%32 = fadd float %31, 1.000000e+00 | |
%33 = fmul float %32, %value | |
%34 = fadd float %33, 1.000000e+00 | |
%35 = fmul float %34, %value | |
%36 = fadd float %35, 1.000000e+00 | |
%37 = fmul float %36, %value | |
%38 = fadd float %37, 1.000000e+00 | |
%39 = fmul float %38, %value | |
%40 = fadd float %39, 1.000000e+00 | |
%41 = fmul float %40, %value | |
%42 = fadd float %41, 1.000000e+00 | |
%43 = fmul float %42, %value | |
%44 = fadd float %43, 1.000000e+00 | |
%45 = fmul float %44, %value | |
%46 = fadd float %45, 1.000000e+00 | |
%47 = fmul float %46, %value | |
%48 = fadd float %47, 1.000000e+00 | |
%49 = fmul float %48, %value | |
%50 = fadd float %49, 1.000000e+00 | |
%51 = fmul float %50, %value | |
store float %51, float* %16, align 4 | |
br label %bb5 | |
} | |
; Function Attrs: nounwind nonlazybind | |
declare i32 @rust_eh_personality(i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*) unnamed_addr #1 | |
; Function Attrs: nounwind readnone speculatable | |
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() unnamed_addr #2 | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.amdgcn.workitem.id.x() unnamed_addr #2 | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.amdgcn.workgroup.id.x() unnamed_addr #2 | |
; Function Attrs: noreturn nounwind | |
declare void @llvm.trap() #3 | |
attributes #0 = { nounwind nonlazybind "probe-stack"="__rust_probestack" "target-features"="+dpp,+s-memrealtime,+trap-handler,+16-bit-insts" } | |
attributes #1 = { nounwind nonlazybind "probe-stack"="__rust_probestack" "target-cpu"="gfx803" "target-features"="+dpp,+s-memrealtime,+trap-handler,+16-bit-insts" } | |
attributes #2 = { nounwind readnone speculatable } | |
attributes #3 = { noreturn nounwind } | |
attributes #4 = { nounwind } | |
!llvm.module.flags = !{!0, !1} | |
!0 = !{i32 7, !"PIE Level", i32 2} | |
!1 = !{i32 2, !"RtLibUseGOT", i32 1} | |
!2 = !{!3, !5, !7} | |
!3 = distinct !{!3, !4, !"_ZN94_$LT$legionella_std..workitem..AxisDimX$u20$as$u20$legionella_std..workitem..WorkGroupAxis$GT$14workgroup_size17h2eda80386d22ece3E: %p"} | |
!4 = distinct !{!4, !"_ZN94_$LT$legionella_std..workitem..AxisDimX$u20$as$u20$legionella_std..workitem..WorkGroupAxis$GT$14workgroup_size17h2eda80386d22ece3E"} | |
!5 = distinct !{!5, !6, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$9global_id17hab4fbc456f370bafE: %self"} | |
!6 = distinct !{!6, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$9global_id17hab4fbc456f370bafE"} | |
!7 = distinct !{!7, !8, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$11global_id_x17h0d11ea891cad49ecE: %self"} | |
!8 = distinct !{!8, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$11global_id_x17h0d11ea891cad49ecE"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.hsa_code_object_version 2,1 | |
.hsa_code_object_isa 8,0,3,"AMD","AMDGPU" | |
.section .text._ZN7ndarray14vector_foreach17h800258d818eaa10aE,#alloc,#execinstr | |
.globl _ZN7ndarray14vector_foreach17h800258d818eaa10aE | |
.p2align 8 | |
.type _ZN7ndarray14vector_foreach17h800258d818eaa10aE,@function | |
.amdgpu_hsa_kernel _ZN7ndarray14vector_foreach17h800258d818eaa10aE | |
_ZN7ndarray14vector_foreach17h800258d818eaa10aE: | |
.amd_kernel_code_t | |
amd_code_version_major = 1 | |
amd_code_version_minor = 2 | |
amd_machine_kind = 1 | |
amd_machine_version_major = 8 | |
amd_machine_version_minor = 0 | |
amd_machine_version_stepping = 3 | |
kernel_code_entry_byte_offset = 256 | |
kernel_code_prefetch_byte_size = 0 | |
granulated_workitem_vgpr_count = 1 | |
granulated_wavefront_sgpr_count = 1 | |
priority = 0 | |
float_mode = 192 | |
priv = 0 | |
enable_dx10_clamp = 1 | |
debug_mode = 0 | |
enable_ieee_mode = 1 | |
enable_sgpr_private_segment_wave_byte_offset = 0 | |
user_sgpr_count = 10 | |
enable_trap_handler = 0 | |
enable_sgpr_workgroup_id_x = 1 | |
enable_sgpr_workgroup_id_y = 0 | |
enable_sgpr_workgroup_id_z = 0 | |
enable_sgpr_workgroup_info = 0 | |
enable_vgpr_workitem_id = 0 | |
enable_exception_msb = 0 | |
granulated_lds_size = 0 | |
enable_exception = 0 | |
enable_sgpr_private_segment_buffer = 1 | |
enable_sgpr_dispatch_ptr = 1 | |
enable_sgpr_queue_ptr = 1 | |
enable_sgpr_kernarg_segment_ptr = 1 | |
enable_sgpr_dispatch_id = 0 | |
enable_sgpr_flat_scratch_init = 0 | |
enable_sgpr_private_segment_size = 0 | |
enable_sgpr_grid_workgroup_count_x = 0 | |
enable_sgpr_grid_workgroup_count_y = 0 | |
enable_sgpr_grid_workgroup_count_z = 0 | |
enable_ordered_append_gds = 0 | |
private_element_size = 1 | |
is_ptr64 = 1 | |
is_dynamic_callstack = 0 | |
is_debug_enabled = 0 | |
is_xnack_enabled = 0 | |
workitem_private_segment_byte_size = 0 | |
workgroup_group_segment_byte_size = 0 | |
gds_segment_byte_size = 0 | |
kernarg_segment_byte_size = 28 | |
workgroup_fbarrier_count = 0 | |
wavefront_sgpr_count = 13 | |
workitem_vgpr_count = 5 | |
reserved_vgpr_first = 0 | |
reserved_vgpr_count = 0 | |
reserved_sgpr_first = 0 | |
reserved_sgpr_count = 0 | |
debug_wavefront_private_segment_offset_sgpr = 0 | |
debug_private_segment_buffer_sgpr = 0 | |
kernarg_segment_alignment = 4 | |
group_segment_alignment = 4 | |
private_segment_alignment = 4 | |
wavefront_size = 6 | |
call_convention = -1 | |
runtime_loader_kernel_symbol = 0 | |
.end_amd_kernel_code_t | |
s_cmp_lg_u64 s[4:5], 0 | |
s_cbranch_scc0 BB0_4 | |
s_add_u32 s4, s4, 4 | |
s_addc_u32 s5, s5, 0 | |
v_mov_b32_e32 v1, s4 | |
v_mov_b32_e32 v2, s5 | |
flat_load_ushort v2, v[1:2] | |
v_mov_b32_e32 v1, 0 | |
s_load_dwordx2 s[2:3], s[8:9], 0x0 | |
s_load_dwordx2 s[0:1], s[8:9], 0x8 | |
s_load_dwordx2 s[6:7], s[8:9], 0x10 | |
s_waitcnt lgkmcnt(0) | |
v_mov_b32_e32 v3, s3 | |
s_waitcnt vmcnt(0) | |
v_mad_u64_u32 v[0:1], s[4:5], v2, s10, v[0:1] | |
v_cmp_gt_u64_e64 s[0:1], s[0:1], v[0:1] | |
v_mul_lo_i32 v1, s6, v1 | |
v_mul_hi_u32 v2, s6, v0 | |
v_mul_lo_i32 v4, s7, v0 | |
v_mul_lo_i32 v0, s6, v0 | |
v_add_u32_e32 v1, vcc, v2, v1 | |
v_add_u32_e32 v1, vcc, v4, v1 | |
v_lshlrev_b64 v[0:1], 2, v[0:1] | |
v_add_u32_e32 v0, vcc, s2, v0 | |
v_addc_u32_e32 v1, vcc, v3, v1, vcc | |
v_cmp_ne_u64_e32 vcc, 0, v[0:1] | |
s_and_b64 s[0:1], s[0:1], vcc | |
s_and_saveexec_b64 s[2:3], s[0:1] | |
s_cbranch_execz BB0_3 | |
BB0_2: | |
flat_load_dword v2, v[0:1] | |
s_load_dword s0, s[8:9], 0x18 | |
s_waitcnt vmcnt(0) lgkmcnt(0) | |
v_add_f32_e32 v2, 1.0, v2 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mad_f32 v2, v2, s0, 1.0 | |
v_mul_f32_e32 v2, s0, v2 | |
flat_store_dword v[0:1], v2 | |
BB0_3: | |
s_endpgm | |
BB0_4: | |
s_mov_b64 s[0:1], s[6:7] | |
s_trap 2 | |
s_trap 2 | |
.Lfunc_end0: | |
.size _ZN7ndarray14vector_foreach17h800258d818eaa10aE, .Lfunc_end0-_ZN7ndarray14vector_foreach17h800258d818eaa10aE | |
.section ".note.GNU-stack" | |
.amd_amdgpu_isa "amdgcn-amd-amdhsa-amdgiz-gfx803" | |
.amd_amdgpu_hsa_metadata | |
--- | |
Version: [ 1, 0 ] | |
Kernels: | |
- Name: _ZN7ndarray14vector_foreach17h800258d818eaa10aE | |
SymbolName: '_ZN7ndarray14vector_foreach17h800258d818eaa10aE@kd' | |
Args: | |
- Size: 24 | |
Align: 8 | |
ValueKind: ByValue | |
ValueType: Struct | |
- Name: value | |
Size: 4 | |
Align: 4 | |
ValueKind: ByValue | |
ValueType: F32 | |
CodeProps: | |
KernargSegmentSize: 28 | |
GroupSegmentFixedSize: 0 | |
PrivateSegmentFixedSize: 0 | |
KernargSegmentAlign: 8 | |
WavefrontSize: 64 | |
NumSGPRs: 13 | |
NumVGPRs: 5 | |
MaxFlatWorkGroupSize: 256 | |
... | |
.end_amd_amdgpu_hsa_metadata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment