diff --git a/Project.toml b/Project.toml index a3abcba9c..ef95ef580 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "KomaMRI" uuid = "6a340f8b-2cdf-4c04-99be-4953d9b66d0a" authors = ["Carlos Castillo Passi "] -version = "0.7.1" +version = "0.7.2" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -12,7 +12,6 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" -Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d" Interact = "c601a237-2ae4-5e1e-952c-7a85b0c7eef1" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" @@ -37,7 +36,6 @@ CUDA = "3" FileIO = "1" Functors = "0.4" HDF5 = "0.16" -Hwloc = "2" Interact = "0.10" Interpolations = "0.13, 0.14" JLD2 = "0.4" diff --git a/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti b/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti index ae344cc54..7232d95ee 100644 --- a/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti +++ b/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti @@ -1,63 +1,63 @@ ### KOMA Multi-Shot Spiral CPU ### - 4.468904 seconds (348.87 k allocations: 7.061 GiB, 3.55% gc time, 0.74% compilation time) - 4.445230 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time) - 4.444298 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time) - 4.438783 seconds (304.62 k allocations: 7.059 GiB, 3.23% gc time) - 4.438602 seconds (304.62 k allocations: 7.059 GiB, 3.16% gc time) - 4.421823 seconds (304.62 k allocations: 7.059 GiB, 3.17% gc time) - 4.422771 seconds (304.49 k allocations: 7.059 GiB, 3.13% gc time) - 4.418151 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time) - 4.419588 seconds (304.62 k allocations: 7.059 GiB, 3.12% gc time) - 4.423932 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time) - 4.438833 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time) - 4.438832 seconds (304.62 k allocations: 7.059 GiB, 3.04% gc time) - 4.486883 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time) - 4.700309 seconds (304.49 k allocations: 7.059 GiB, 2.98% gc time) - 4.691741 seconds (304.62 k allocations: 7.059 GiB, 2.94% gc time) - 4.752007 seconds (304.36 k allocations: 7.059 GiB, 2.83% gc time) - 4.466893 seconds (304.49 k allocations: 7.059 GiB, 3.01% gc time) - 4.464153 seconds (304.49 k allocations: 7.059 GiB, 3.03% gc time) - 4.493246 seconds (304.62 k allocations: 7.059 GiB, 3.19% gc time) - 4.457662 seconds (304.62 k allocations: 7.059 GiB, 3.00% gc time) + 1.791288 seconds (348.88 k allocations: 7.061 GiB, 6.10% gc time, 2.04% compilation time) + 1.788641 seconds (305.13 k allocations: 7.059 GiB, 3.22% gc time) + 1.797306 seconds (305.01 k allocations: 7.059 GiB, 3.97% gc time) + 1.760078 seconds (305.16 k allocations: 7.059 GiB, 3.21% gc time) + 1.780291 seconds (304.88 k allocations: 7.059 GiB, 4.14% gc time) + 1.803833 seconds (304.74 k allocations: 7.059 GiB, 4.24% gc time) + 1.783240 seconds (305.01 k allocations: 7.059 GiB, 3.56% gc time) + 1.779908 seconds (305.00 k allocations: 7.059 GiB, 3.28% gc time) + 1.765165 seconds (304.60 k allocations: 7.059 GiB, 3.57% gc time) + 1.797075 seconds (305.39 k allocations: 7.059 GiB, 3.16% gc time) + 1.761110 seconds (305.29 k allocations: 7.059 GiB, 2.69% gc time) + 1.771107 seconds (305.01 k allocations: 7.059 GiB, 2.82% gc time) + 1.790191 seconds (304.60 k allocations: 7.059 GiB, 3.04% gc time) + 1.743203 seconds (305.00 k allocations: 7.059 GiB, 2.63% gc time) + 1.811720 seconds (305.14 k allocations: 7.059 GiB, 2.85% gc time) + 1.775254 seconds (305.00 k allocations: 7.059 GiB, 3.58% gc time) + 1.768405 seconds (304.86 k allocations: 7.059 GiB, 2.96% gc time) + 2.550956 seconds (304.59 k allocations: 7.059 GiB, 35.09% gc time) + 1.737120 seconds (305.14 k allocations: 7.059 GiB, 2.59% gc time) + 1.774394 seconds (305.01 k allocations: 7.059 GiB, 3.46% gc time) ### KOMA Multi-Shot Spiral GPU0 ### - 0.191547 seconds (377.14 k allocations: 23.680 MiB, 17.10% compilation time) - 0.150679 seconds (332.70 k allocations: 21.300 MiB) - 0.151408 seconds (332.65 k allocations: 21.297 MiB) - 0.150065 seconds (332.85 k allocations: 21.309 MiB) - 0.151362 seconds (332.89 k allocations: 21.311 MiB) - 0.147650 seconds (332.80 k allocations: 21.305 MiB) - 0.151219 seconds (332.80 k allocations: 21.306 MiB) - 0.150325 seconds (332.70 k allocations: 21.300 MiB) - 0.148065 seconds (332.84 k allocations: 21.308 MiB) - 0.152075 seconds (332.94 k allocations: 21.314 MiB) - 0.150261 seconds (332.70 k allocations: 21.300 MiB) - 0.150011 seconds (332.94 k allocations: 21.314 MiB) - 0.149172 seconds (332.80 k allocations: 21.306 MiB) - 0.148890 seconds (332.75 k allocations: 21.302 MiB) - 0.148253 seconds (332.94 k allocations: 21.315 MiB) - 0.152094 seconds (332.75 k allocations: 21.303 MiB) - 0.147646 seconds (332.75 k allocations: 21.302 MiB) - 0.151022 seconds (332.94 k allocations: 21.315 MiB) - 0.149571 seconds (332.75 k allocations: 21.303 MiB) - 0.147972 seconds (332.80 k allocations: 21.306 MiB) + 0.222617 seconds (410.00 k allocations: 24.362 MiB, 10.73% gc time, 26.09% compilation time) + 0.149812 seconds (355.18 k allocations: 21.673 MiB) + 0.150912 seconds (355.30 k allocations: 21.673 MiB) + 0.154048 seconds (355.06 k allocations: 21.669 MiB) + 0.153823 seconds (355.26 k allocations: 21.673 MiB) + 0.150777 seconds (355.44 k allocations: 21.681 MiB) + 0.151338 seconds (355.24 k allocations: 21.677 MiB) + 0.155108 seconds (355.14 k allocations: 21.671 MiB) + 0.152609 seconds (355.07 k allocations: 21.667 MiB) + 0.152995 seconds (355.29 k allocations: 21.678 MiB) + 0.149100 seconds (355.31 k allocations: 21.676 MiB) + 0.148364 seconds (355.13 k allocations: 21.671 MiB) + 0.147747 seconds (355.24 k allocations: 21.675 MiB) + 0.152601 seconds (355.35 k allocations: 21.675 MiB) + 0.148840 seconds (355.51 k allocations: 21.683 MiB) + 0.150223 seconds (355.15 k allocations: 21.673 MiB) + 0.151326 seconds (355.41 k allocations: 21.681 MiB) + 0.149905 seconds (355.60 k allocations: 21.687 MiB) + 0.150076 seconds (355.47 k allocations: 21.685 MiB) + 0.152087 seconds (354.98 k allocations: 21.662 MiB) ### KOMA Multi-Shot Spiral GPU1 ### - 0.383762 seconds (387.52 k allocations: 24.026 MiB, 3.61% gc time, 8.37% compilation time) - 0.326810 seconds (343.31 k allocations: 21.657 MiB, 2.48% gc time) - 0.317470 seconds (343.31 k allocations: 21.658 MiB, 2.54% gc time) - 0.315657 seconds (343.32 k allocations: 21.659 MiB, 2.59% gc time) - 0.312953 seconds (343.41 k allocations: 21.663 MiB, 2.58% gc time) - 0.317549 seconds (343.50 k allocations: 21.669 MiB, 2.56% gc time) - 0.316872 seconds (343.41 k allocations: 21.663 MiB, 2.57% gc time) - 0.318535 seconds (343.50 k allocations: 21.669 MiB, 2.54% gc time) - 0.317094 seconds (343.45 k allocations: 21.666 MiB, 2.54% gc time) - 0.317818 seconds (343.45 k allocations: 21.666 MiB, 2.57% gc time) - 0.319701 seconds (343.45 k allocations: 21.666 MiB, 2.51% gc time) - 0.320600 seconds (343.36 k allocations: 21.660 MiB, 2.50% gc time) - 0.316938 seconds (343.36 k allocations: 21.660 MiB, 2.56% gc time) - 0.318238 seconds (343.31 k allocations: 21.657 MiB, 2.51% gc time) - 0.316747 seconds (343.45 k allocations: 21.666 MiB, 2.58% gc time) - 0.323734 seconds (343.40 k allocations: 21.663 MiB, 2.49% gc time) - 0.320136 seconds (343.40 k allocations: 21.663 MiB, 2.52% gc time) - 0.316931 seconds (343.50 k allocations: 21.669 MiB, 2.55% gc time) - 0.316066 seconds (343.40 k allocations: 21.663 MiB, 2.53% gc time) - 0.315290 seconds (343.37 k allocations: 21.663 MiB, 2.57% gc time) + 0.375906 seconds (410.02 k allocations: 24.403 MiB, 6.18% gc time, 8.52% compilation time) + 0.328961 seconds (365.91 k allocations: 22.043 MiB, 2.27% gc time) + 0.324912 seconds (365.93 k allocations: 22.044 MiB, 2.21% gc time) + 0.317966 seconds (365.93 k allocations: 22.043 MiB, 2.31% gc time) + 0.317574 seconds (365.99 k allocations: 22.047 MiB, 2.41% gc time) + 0.316693 seconds (365.93 k allocations: 22.043 MiB, 2.25% gc time) + 0.318666 seconds (365.97 k allocations: 22.047 MiB, 2.37% gc time) + 0.316730 seconds (365.83 k allocations: 22.038 MiB, 2.29% gc time) + 0.317443 seconds (365.69 k allocations: 22.031 MiB, 2.33% gc time) + 0.318769 seconds (365.94 k allocations: 22.044 MiB, 2.36% gc time) + 0.320683 seconds (365.92 k allocations: 22.043 MiB, 2.22% gc time) + 0.319358 seconds (365.88 k allocations: 22.041 MiB, 2.38% gc time) + 0.318528 seconds (365.78 k allocations: 22.035 MiB, 2.33% gc time) + 0.322693 seconds (365.73 k allocations: 22.034 MiB, 2.31% gc time) + 0.323850 seconds (365.83 k allocations: 22.038 MiB, 2.66% gc time) + 0.320972 seconds (365.85 k allocations: 22.040 MiB, 2.41% gc time) + 0.324835 seconds (365.77 k allocations: 22.035 MiB, 2.22% gc time) + 0.322921 seconds (365.93 k allocations: 22.044 MiB, 2.19% gc time) + 0.320311 seconds (365.80 k allocations: 22.037 MiB, 2.24% gc time) + 0.324080 seconds (365.67 k allocations: 22.031 MiB, 2.23% gc time) diff --git a/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl b/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl index ca2e8bc23..7792eb0cf 100644 --- a/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl +++ b/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl @@ -39,22 +39,20 @@ if (ARGS == String[]) #No arguments, use defaults simParams = Dict{String,Any}( "Nblocks" => 20, "gpu" => true, - "gpu_device" => 0, - "Nthreads" => 1 + "gpu_device" => 0 ) else simParams = Dict{String,Any}( "Nblocks" => 20, "gpu" => ARGS[1] == "gpu" ? true : false, - "gpu_device" => parse(Int64, ARGS[2]), - "Nthreads" => parse(Int64, ARGS[3]) + "gpu_device" => parse(Int64, ARGS[2]) ) end Nexp = 20 raw = @suppress simulate(phantom, seq, sys; simParams) #warmup for i = 1:Nexp - raw = simulate(phantom, seq, sys; simParams) + local raw = simulate(phantom, seq, sys; simParams) end # plot_signal(raw; range=[50.5, 54]) #; show_sim_blocks=true) diff --git a/examples/3.koma_paper/comparison_speed/run_test.sh b/examples/3.koma_paper/comparison_speed/run_test.sh index 6142cc653..52e9268d3 100644 --- a/examples/3.koma_paper/comparison_speed/run_test.sh +++ b/examples/3.koma_paper/comparison_speed/run_test.sh @@ -1,7 +1,7 @@ #!/bin/bash echo "### KOMA Multi-Shot Spiral CPU ###" | tee .out -julia $1 ./MRiLab_speed.jl cpu 0 8 | tee -a .out #CPU +julia -t 8 $1 ./MRiLab_speed.jl cpu 0 | tee -a .out #CPU echo "### KOMA Multi-Shot Spiral GPU0 ###" | tee -a .out -julia $1 ./MRiLab_speed.jl gpu 0 1 | tee -a .out #GPU0 +julia -t 8 $1 ./MRiLab_speed.jl gpu 0 | tee -a .out #GPU0 echo "### KOMA Multi-Shot Spiral GPU1 ###" | tee -a .out -julia $1 ./MRiLab_speed.jl gpu 1 1 | tee -a .out #GPU1 +julia -t 8 $1 ./MRiLab_speed.jl gpu 1 | tee -a .out #GPU1 diff --git a/src/KomaMRI.jl b/src/KomaMRI.jl index 0b4e3fc56..1b0e6235d 100644 --- a/src/KomaMRI.jl +++ b/src/KomaMRI.jl @@ -11,7 +11,7 @@ using Scanf, ProgressMeter #Datatypes using Parameters #Simulation -using CUDA, Interpolations, Hwloc +using CUDA, Interpolations #Reconstruction using MRIReco, MRIFiles @reexport using MRIReco: RawAcquisitionData, AcquisitionData, reconstruction diff --git a/src/simulation/SimulatorCore.jl b/src/simulation/SimulatorCore.jl index 0910ec097..c11901a14 100644 --- a/src/simulation/SimulatorCore.jl +++ b/src/simulation/SimulatorCore.jl @@ -1,5 +1,3 @@ -const Nphyscores = Hwloc.num_physical_cores() - abstract type SimulationMethod end #get all available types by using subtypes(KomaMRI.SimulationMethod) abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation) @@ -28,7 +26,7 @@ separating the spins of the phantom `obj` in `Nthreads`. """ function run_spin_precession_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T}, sig::AbstractArray{Complex{T}}, Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod; - Nthreads=Nphyscores) where {T<:Real} + Nthreads=Threads.nthreads()) where {T<:Real} parts = kfoldperm(length(obj), Nthreads, type="ordered") @@ -60,7 +58,7 @@ different number threads to excecute the process. """ function run_spin_excitation_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T}, Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod; - Nthreads=Nphyscores) where {T<:Real} + Nthreads=Threads.nthreads()) where {T<:Real} parts = kfoldperm(length(obj), Nthreads; type="ordered") @@ -98,7 +96,7 @@ take advantage of CPU parallel processing. """ function run_sim_time_iter!(obj::Phantom, seq::DiscreteSequence, sig::AbstractArray{Complex{T}}, Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod; - Nblocks=1, Nthreads=Nphyscores, parts=[1:length(seq)], w=nothing) where {T<:Real} + Nblocks=1, Nthreads=Threads.nthreads(), parts=[1:length(seq)], w=nothing) where {T<:Real} # Simulation rfs = 0 samples = 1 @@ -168,16 +166,15 @@ julia> plot_signal(ismrmrd) """ function simulate(obj::Phantom, seq::Sequence, sys::Scanner; simParams=Dict{String,Any}(), w=nothing) #Simulation parameter parsing, and setting defaults - enable_gpu = get(simParams, "gpu", true) + enable_gpu = get(simParams, "gpu", true); if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end gpu_device = get(simParams, "gpu_device", 0) - Nthreads = get(simParams, "Nthreads", enable_gpu ? 1 : Nphyscores) + Nthreads = get(simParams, "Nthreads", enable_gpu ? 1 : Threads.nthreads()) Nblocks = get(simParams, "Nblocks", 20) Δt = get(simParams, "Δt", 1e-3) Δt_rf = get(simParams, "Δt_rf", 5e-5) sim_method = get(simParams, "sim_method", Bloch()) precision = get(simParams, "precision", "f32") - return_type = get(simParams, "return_type", "raw") - if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end + return_type = get(simParams, "return_type", "raw") # Simulation init t, Δt = get_uniform_times(seq, Δt; Δt_rf) t = [t; t[end]+Δt[end]]