diff --git a/Project.toml b/Project.toml
index a3abcba9c..ef95ef580 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "KomaMRI"
 uuid = "6a340f8b-2cdf-4c04-99be-4953d9b66d0a"
 authors = ["Carlos Castillo Passi <cncastillo@uc.cl>"]
-version = "0.7.1"
+version = "0.7.2"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -12,7 +12,6 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
-Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 Interact = "c601a237-2ae4-5e1e-952c-7a85b0c7eef1"
 Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
@@ -37,7 +36,6 @@ CUDA = "3"
 FileIO = "1"
 Functors = "0.4"
 HDF5 = "0.16"
-Hwloc = "2"
 Interact = "0.10"
 Interpolations = "0.13, 0.14"
 JLD2 = "0.4"
diff --git a/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti b/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti
index ae344cc54..7232d95ee 100644
--- a/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti
+++ b/examples/3.koma_paper/comparison_speed/Koma_rtx2080ti_gtx1650ti
@@ -1,63 +1,63 @@
 ### KOMA Multi-Shot Spiral CPU ###
-  4.468904 seconds (348.87 k allocations: 7.061 GiB, 3.55% gc time, 0.74% compilation time)
-  4.445230 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time)
-  4.444298 seconds (304.62 k allocations: 7.059 GiB, 3.22% gc time)
-  4.438783 seconds (304.62 k allocations: 7.059 GiB, 3.23% gc time)
-  4.438602 seconds (304.62 k allocations: 7.059 GiB, 3.16% gc time)
-  4.421823 seconds (304.62 k allocations: 7.059 GiB, 3.17% gc time)
-  4.422771 seconds (304.49 k allocations: 7.059 GiB, 3.13% gc time)
-  4.418151 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time)
-  4.419588 seconds (304.62 k allocations: 7.059 GiB, 3.12% gc time)
-  4.423932 seconds (304.62 k allocations: 7.059 GiB, 3.05% gc time)
-  4.438833 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time)
-  4.438832 seconds (304.62 k allocations: 7.059 GiB, 3.04% gc time)
-  4.486883 seconds (304.62 k allocations: 7.059 GiB, 3.07% gc time)
-  4.700309 seconds (304.49 k allocations: 7.059 GiB, 2.98% gc time)
-  4.691741 seconds (304.62 k allocations: 7.059 GiB, 2.94% gc time)
-  4.752007 seconds (304.36 k allocations: 7.059 GiB, 2.83% gc time)
-  4.466893 seconds (304.49 k allocations: 7.059 GiB, 3.01% gc time)
-  4.464153 seconds (304.49 k allocations: 7.059 GiB, 3.03% gc time)
-  4.493246 seconds (304.62 k allocations: 7.059 GiB, 3.19% gc time)
-  4.457662 seconds (304.62 k allocations: 7.059 GiB, 3.00% gc time)
+  1.791288 seconds (348.88 k allocations: 7.061 GiB, 6.10% gc time, 2.04% compilation time)
+  1.788641 seconds (305.13 k allocations: 7.059 GiB, 3.22% gc time)
+  1.797306 seconds (305.01 k allocations: 7.059 GiB, 3.97% gc time)
+  1.760078 seconds (305.16 k allocations: 7.059 GiB, 3.21% gc time)
+  1.780291 seconds (304.88 k allocations: 7.059 GiB, 4.14% gc time)
+  1.803833 seconds (304.74 k allocations: 7.059 GiB, 4.24% gc time)
+  1.783240 seconds (305.01 k allocations: 7.059 GiB, 3.56% gc time)
+  1.779908 seconds (305.00 k allocations: 7.059 GiB, 3.28% gc time)
+  1.765165 seconds (304.60 k allocations: 7.059 GiB, 3.57% gc time)
+  1.797075 seconds (305.39 k allocations: 7.059 GiB, 3.16% gc time)
+  1.761110 seconds (305.29 k allocations: 7.059 GiB, 2.69% gc time)
+  1.771107 seconds (305.01 k allocations: 7.059 GiB, 2.82% gc time)
+  1.790191 seconds (304.60 k allocations: 7.059 GiB, 3.04% gc time)
+  1.743203 seconds (305.00 k allocations: 7.059 GiB, 2.63% gc time)
+  1.811720 seconds (305.14 k allocations: 7.059 GiB, 2.85% gc time)
+  1.775254 seconds (305.00 k allocations: 7.059 GiB, 3.58% gc time)
+  1.768405 seconds (304.86 k allocations: 7.059 GiB, 2.96% gc time)
+  2.550956 seconds (304.59 k allocations: 7.059 GiB, 35.09% gc time)
+  1.737120 seconds (305.14 k allocations: 7.059 GiB, 2.59% gc time)
+  1.774394 seconds (305.01 k allocations: 7.059 GiB, 3.46% gc time)
 ### KOMA Multi-Shot Spiral GPU0 ###
-  0.191547 seconds (377.14 k allocations: 23.680 MiB, 17.10% compilation time)
-  0.150679 seconds (332.70 k allocations: 21.300 MiB)
-  0.151408 seconds (332.65 k allocations: 21.297 MiB)
-  0.150065 seconds (332.85 k allocations: 21.309 MiB)
-  0.151362 seconds (332.89 k allocations: 21.311 MiB)
-  0.147650 seconds (332.80 k allocations: 21.305 MiB)
-  0.151219 seconds (332.80 k allocations: 21.306 MiB)
-  0.150325 seconds (332.70 k allocations: 21.300 MiB)
-  0.148065 seconds (332.84 k allocations: 21.308 MiB)
-  0.152075 seconds (332.94 k allocations: 21.314 MiB)
-  0.150261 seconds (332.70 k allocations: 21.300 MiB)
-  0.150011 seconds (332.94 k allocations: 21.314 MiB)
-  0.149172 seconds (332.80 k allocations: 21.306 MiB)
-  0.148890 seconds (332.75 k allocations: 21.302 MiB)
-  0.148253 seconds (332.94 k allocations: 21.315 MiB)
-  0.152094 seconds (332.75 k allocations: 21.303 MiB)
-  0.147646 seconds (332.75 k allocations: 21.302 MiB)
-  0.151022 seconds (332.94 k allocations: 21.315 MiB)
-  0.149571 seconds (332.75 k allocations: 21.303 MiB)
-  0.147972 seconds (332.80 k allocations: 21.306 MiB)
+  0.222617 seconds (410.00 k allocations: 24.362 MiB, 10.73% gc time, 26.09% compilation time)
+  0.149812 seconds (355.18 k allocations: 21.673 MiB)
+  0.150912 seconds (355.30 k allocations: 21.673 MiB)
+  0.154048 seconds (355.06 k allocations: 21.669 MiB)
+  0.153823 seconds (355.26 k allocations: 21.673 MiB)
+  0.150777 seconds (355.44 k allocations: 21.681 MiB)
+  0.151338 seconds (355.24 k allocations: 21.677 MiB)
+  0.155108 seconds (355.14 k allocations: 21.671 MiB)
+  0.152609 seconds (355.07 k allocations: 21.667 MiB)
+  0.152995 seconds (355.29 k allocations: 21.678 MiB)
+  0.149100 seconds (355.31 k allocations: 21.676 MiB)
+  0.148364 seconds (355.13 k allocations: 21.671 MiB)
+  0.147747 seconds (355.24 k allocations: 21.675 MiB)
+  0.152601 seconds (355.35 k allocations: 21.675 MiB)
+  0.148840 seconds (355.51 k allocations: 21.683 MiB)
+  0.150223 seconds (355.15 k allocations: 21.673 MiB)
+  0.151326 seconds (355.41 k allocations: 21.681 MiB)
+  0.149905 seconds (355.60 k allocations: 21.687 MiB)
+  0.150076 seconds (355.47 k allocations: 21.685 MiB)
+  0.152087 seconds (354.98 k allocations: 21.662 MiB)
 ### KOMA Multi-Shot Spiral GPU1 ###
-  0.383762 seconds (387.52 k allocations: 24.026 MiB, 3.61% gc time, 8.37% compilation time)
-  0.326810 seconds (343.31 k allocations: 21.657 MiB, 2.48% gc time)
-  0.317470 seconds (343.31 k allocations: 21.658 MiB, 2.54% gc time)
-  0.315657 seconds (343.32 k allocations: 21.659 MiB, 2.59% gc time)
-  0.312953 seconds (343.41 k allocations: 21.663 MiB, 2.58% gc time)
-  0.317549 seconds (343.50 k allocations: 21.669 MiB, 2.56% gc time)
-  0.316872 seconds (343.41 k allocations: 21.663 MiB, 2.57% gc time)
-  0.318535 seconds (343.50 k allocations: 21.669 MiB, 2.54% gc time)
-  0.317094 seconds (343.45 k allocations: 21.666 MiB, 2.54% gc time)
-  0.317818 seconds (343.45 k allocations: 21.666 MiB, 2.57% gc time)
-  0.319701 seconds (343.45 k allocations: 21.666 MiB, 2.51% gc time)
-  0.320600 seconds (343.36 k allocations: 21.660 MiB, 2.50% gc time)
-  0.316938 seconds (343.36 k allocations: 21.660 MiB, 2.56% gc time)
-  0.318238 seconds (343.31 k allocations: 21.657 MiB, 2.51% gc time)
-  0.316747 seconds (343.45 k allocations: 21.666 MiB, 2.58% gc time)
-  0.323734 seconds (343.40 k allocations: 21.663 MiB, 2.49% gc time)
-  0.320136 seconds (343.40 k allocations: 21.663 MiB, 2.52% gc time)
-  0.316931 seconds (343.50 k allocations: 21.669 MiB, 2.55% gc time)
-  0.316066 seconds (343.40 k allocations: 21.663 MiB, 2.53% gc time)
-  0.315290 seconds (343.37 k allocations: 21.663 MiB, 2.57% gc time)
+  0.375906 seconds (410.02 k allocations: 24.403 MiB, 6.18% gc time, 8.52% compilation time)
+  0.328961 seconds (365.91 k allocations: 22.043 MiB, 2.27% gc time)
+  0.324912 seconds (365.93 k allocations: 22.044 MiB, 2.21% gc time)
+  0.317966 seconds (365.93 k allocations: 22.043 MiB, 2.31% gc time)
+  0.317574 seconds (365.99 k allocations: 22.047 MiB, 2.41% gc time)
+  0.316693 seconds (365.93 k allocations: 22.043 MiB, 2.25% gc time)
+  0.318666 seconds (365.97 k allocations: 22.047 MiB, 2.37% gc time)
+  0.316730 seconds (365.83 k allocations: 22.038 MiB, 2.29% gc time)
+  0.317443 seconds (365.69 k allocations: 22.031 MiB, 2.33% gc time)
+  0.318769 seconds (365.94 k allocations: 22.044 MiB, 2.36% gc time)
+  0.320683 seconds (365.92 k allocations: 22.043 MiB, 2.22% gc time)
+  0.319358 seconds (365.88 k allocations: 22.041 MiB, 2.38% gc time)
+  0.318528 seconds (365.78 k allocations: 22.035 MiB, 2.33% gc time)
+  0.322693 seconds (365.73 k allocations: 22.034 MiB, 2.31% gc time)
+  0.323850 seconds (365.83 k allocations: 22.038 MiB, 2.66% gc time)
+  0.320972 seconds (365.85 k allocations: 22.040 MiB, 2.41% gc time)
+  0.324835 seconds (365.77 k allocations: 22.035 MiB, 2.22% gc time)
+  0.322921 seconds (365.93 k allocations: 22.044 MiB, 2.19% gc time)
+  0.320311 seconds (365.80 k allocations: 22.037 MiB, 2.24% gc time)
+  0.324080 seconds (365.67 k allocations: 22.031 MiB, 2.23% gc time)
diff --git a/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl b/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl
index ca2e8bc23..7792eb0cf 100644
--- a/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl
+++ b/examples/3.koma_paper/comparison_speed/MRiLab_speed.jl
@@ -39,22 +39,20 @@ if (ARGS == String[]) #No arguments, use defaults
     simParams = Dict{String,Any}(
         "Nblocks" => 20,
         "gpu" => true,
-        "gpu_device" => 0,
-        "Nthreads" => 1
+        "gpu_device" => 0
     )
 else
     simParams = Dict{String,Any}(
         "Nblocks" => 20,
         "gpu" => ARGS[1] == "gpu" ? true : false,
-        "gpu_device" => parse(Int64, ARGS[2]),
-        "Nthreads" => parse(Int64, ARGS[3])
+        "gpu_device" => parse(Int64, ARGS[2])
     )
 end
 
 Nexp = 20
 raw = @suppress simulate(phantom, seq, sys; simParams) #warmup
 for i = 1:Nexp
-    raw = simulate(phantom, seq, sys; simParams)
+    local raw = simulate(phantom, seq, sys; simParams)
 end
 # plot_signal(raw; range=[50.5, 54]) #; show_sim_blocks=true)
 
diff --git a/examples/3.koma_paper/comparison_speed/run_test.sh b/examples/3.koma_paper/comparison_speed/run_test.sh
index 6142cc653..52e9268d3 100644
--- a/examples/3.koma_paper/comparison_speed/run_test.sh
+++ b/examples/3.koma_paper/comparison_speed/run_test.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 echo "### KOMA Multi-Shot Spiral CPU ###" 	| tee .out
-julia $1 ./MRiLab_speed.jl cpu 0 8		| tee -a .out #CPU
+julia -t 8 $1 ./MRiLab_speed.jl cpu 0	| tee -a .out #CPU
 echo "### KOMA Multi-Shot Spiral GPU0 ###" 	| tee -a .out
-julia $1 ./MRiLab_speed.jl gpu 0 1		| tee -a .out #GPU0
+julia -t 8 $1 ./MRiLab_speed.jl gpu 0	| tee -a .out #GPU0
 echo "### KOMA Multi-Shot Spiral GPU1 ###" 	| tee -a .out
-julia $1 ./MRiLab_speed.jl gpu 1 1		| tee -a .out #GPU1
+julia -t 8 $1 ./MRiLab_speed.jl gpu 1	| tee -a .out #GPU1
diff --git a/src/KomaMRI.jl b/src/KomaMRI.jl
index 0b4e3fc56..1b0e6235d 100644
--- a/src/KomaMRI.jl
+++ b/src/KomaMRI.jl
@@ -11,7 +11,7 @@ using Scanf, ProgressMeter
 #Datatypes
 using Parameters
 #Simulation
-using CUDA, Interpolations, Hwloc
+using CUDA, Interpolations
 #Reconstruction
 using MRIReco, MRIFiles
 @reexport using MRIReco: RawAcquisitionData, AcquisitionData, reconstruction
diff --git a/src/simulation/SimulatorCore.jl b/src/simulation/SimulatorCore.jl
index 0910ec097..c11901a14 100644
--- a/src/simulation/SimulatorCore.jl
+++ b/src/simulation/SimulatorCore.jl
@@ -1,5 +1,3 @@
-const Nphyscores = Hwloc.num_physical_cores()
-
 abstract type SimulationMethod end #get all available types by using subtypes(KomaMRI.SimulationMethod)
 abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation)
 
@@ -28,7 +26,7 @@ separating the spins of the phantom `obj` in `Nthreads`.
 """
 function run_spin_precession_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T}, sig::AbstractArray{Complex{T}},
     Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
-    Nthreads=Nphyscores) where {T<:Real}
+    Nthreads=Threads.nthreads()) where {T<:Real}
 
     parts = kfoldperm(length(obj), Nthreads, type="ordered")
 
@@ -60,7 +58,7 @@ different number threads to excecute the process.
 """
 function run_spin_excitation_parallel!(obj::Phantom{T}, seq::DiscreteSequence{T}, 
     Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
-    Nthreads=Nphyscores) where {T<:Real}
+    Nthreads=Threads.nthreads()) where {T<:Real}
 
     parts = kfoldperm(length(obj), Nthreads; type="ordered")
 
@@ -98,7 +96,7 @@ take advantage of CPU parallel processing.
 """
 function run_sim_time_iter!(obj::Phantom, seq::DiscreteSequence, sig::AbstractArray{Complex{T}}, 
     Xt::SpinStateRepresentation{T}, sim_method::SimulationMethod;
-    Nblocks=1, Nthreads=Nphyscores, parts=[1:length(seq)], w=nothing) where {T<:Real}
+    Nblocks=1, Nthreads=Threads.nthreads(), parts=[1:length(seq)], w=nothing) where {T<:Real}
     # Simulation
     rfs = 0
     samples = 1
@@ -168,16 +166,15 @@ julia> plot_signal(ismrmrd)
 """
 function simulate(obj::Phantom, seq::Sequence, sys::Scanner; simParams=Dict{String,Any}(), w=nothing)
     #Simulation parameter parsing, and setting defaults
-    enable_gpu  = get(simParams, "gpu", true)
+    enable_gpu  = get(simParams, "gpu", true); if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end
     gpu_device  = get(simParams, "gpu_device", 0)
-    Nthreads    = get(simParams, "Nthreads", enable_gpu ? 1 : Nphyscores)
+    Nthreads    = get(simParams, "Nthreads", enable_gpu ? 1 : Threads.nthreads())
     Nblocks     = get(simParams, "Nblocks", 20)
     Δt          = get(simParams, "Δt", 1e-3)
     Δt_rf       = get(simParams, "Δt_rf", 5e-5)
     sim_method  = get(simParams, "sim_method", Bloch())
     precision   = get(simParams, "precision", "f32")
-    return_type = get(simParams, "return_type", "raw")
-    if enable_gpu check_use_cuda(); enable_gpu &= use_cuda[] end
+    return_type = get(simParams, "return_type", "raw") 
     # Simulation init
     t, Δt = get_uniform_times(seq, Δt; Δt_rf)
     t = [t; t[end]+Δt[end]]