Skip to content

Commit

Permalink
Merge pull request #250 from roboflow/feature/add_benchmarking_capabi…
Browse files Browse the repository at this point in the history
…lities

Inference benchmarking
  • Loading branch information
paulguerrie authored Feb 12, 2024
2 parents 979546d + 0d038ca commit ac39653
Show file tree
Hide file tree
Showing 28 changed files with 1,407 additions and 88 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ create_wheels_for_gpu_notebook:
python .release/pypi/inference.core.setup.py bdist_wheel
python .release/pypi/inference.gpu.setup.py bdist_wheel
python .release/pypi/inference.sdk.setup.py bdist_wheel
python .release/pypi/inference.cli.setup.py bdist_wheel

upload_wheels:
twine upload dist/*.whl
35 changes: 35 additions & 0 deletions development/benchmark_scripts/benchmark_yolov8_in_api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
output_dir=$1
batch_size=$2
clients=$3
requests=$4


python -m inference_cli.main benchmark api-speed -m yolov8n-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8n_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8n-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8n_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8s-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8s_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8s-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8s_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8m-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8m_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8m-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8m_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8l-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8l_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8l-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8l_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8x-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8x_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8x-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8x_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8n-seg-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8n_seg_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8n-seg-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8n_seg_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8s-seg-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8s_seg_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8s-seg-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8s_seg_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8m-seg-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8m_seg_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8m-seg-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8m_seg_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8l-seg-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8l_seg_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8l-seg-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8l_seg_1280_bs_${batch_size}_clients_${clients}_via_http.json

python -m inference_cli.main benchmark api-speed -m yolov8x-seg-640 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8x_seg_640_bs_${batch_size}_clients_${clients}_via_http.json
python -m inference_cli.main benchmark api-speed -m yolov8x-seg-1280 -c $clients -br $requests -bs $batch_size -o ${output_dir}/yolov8x_seg_1280_bs_${batch_size}_clients_${clients}_via_http.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
output_dir=$1
batch_size=$2
inferences=$3

python -m inference_cli.main benchmark python-package-speed -m yolov8n-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8n_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8n-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8n_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8s-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8s_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8s-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8s_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8m-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8m_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8m-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8m_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8l-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8l_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8l-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8l_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8x-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8x_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8x-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8x_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8n-seg-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8n_seg_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8n-seg-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8n_seg_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8s-seg-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8s_seg_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8s-seg-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8s_seg_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8m-seg-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8m_seg_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8m-seg-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8m_seg_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8l-seg-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8l_seg_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8l-seg-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8l_seg_1280_bs_$batch_size.json

python -m inference_cli.main benchmark python-package-speed -m yolov8x-seg-640 -bi $inferences -bs $batch_size -o $output_dir/yolov8x_seg_640_bs_$batch_size.json
python -m inference_cli.main benchmark python-package-speed -m yolov8x-seg-1280 -bi $inferences -bs $batch_size -o $output_dir/yolov8x_seg_1280_bs_$batch_size.json
61 changes: 61 additions & 0 deletions docs/inference_helpers/inference_cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,67 @@ from `inference_sdk` library. See [sdk docs](./inference_sdk.md) to discover
which options can be configured via `*.yml` file - configuration keys must match
with names of fields in `InferenceConfiguration` object.

### inference benchmark

!!! note

The command is introduced in `inference_cli>=0.9.10`

`inference benchmark` is a set of command suited to run benchmarks of `inference`. There are two types of benchmark
available `inference benchmark api-speed` - to test `inference` HTTP server and `inference benchmark python-package-speed`
to verify the performance of `inference` Python package.

!!! tip

Use `inference benchmark api-speed --help` / `inference benchmark python-package-speed --help` to
display all options of benchmark commands.

!!! tip

Roboflow API key can be provided via `ROBOFLOW_API_KEY` environment variable

#### Running benchmark of Python package

Basic benchmark can be run using the following command:

```bash
inference benchmark python-package-speed \
-m {your_model_id} \
-d {pre-configured dataset name or path to directory with images} \
-o {output_directory}
```
Command runs specified number of inferences using pointed model and saves statistics (including benchmark
parameter, throughput, latency, errors and platform details) in pointed directory.

#### Running benchmark of `inference server`

!!! note

Before running API benchmark - make sure the server is up and running:
```bash
inference server start
```
Basic benchmark can be run using the following command:

```bash
inference benchmark api-speed \
-m {your_model_id} \
-d {pre-configured dataset name or path to directory with images} \
-o {output_directory}
```
Command runs specified number of inferences using pointed model and saves statistics (including benchmark
parameter, throughput, latency, errors and platform details) in pointed directory.

This benchmark has more configuration options to support different ways HTTP API profiling. In default mode,
single client will be spawned, and it will send one request after another sequentially. This may be suboptimal
in specific cases, so one may specify number of concurrent clients using `-c {number_of_clients}` option.
Each client will send next request once previous is handled. This option will also not cover all scenarios
of tests. For instance one may want to send `x` requests each second (which is closer to the scenario of
production environment where multiple clients are sending requests concurrently). In this scenario, `--rps {value}`
option can be used (and `-c` will be ignored). Value provided in `--rps` option specifies how many requests
are to be spawned **each second** without waiting for previous requests to be handled. In I/O intensive benchmark
scenarios - we suggest running command from multiple separate processes and possibly multiple hosts.

## Supported Devices

Roboflow Inference CLI currently supports the following device targets:
Expand Down
3 changes: 2 additions & 1 deletion inference/core/cache/model_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def get_cache_file_path(file: str, model_id: Optional[str] = None) -> str:

def clear_cache(model_id: Optional[str] = None) -> None:
cache_dir = get_cache_dir(model_id=model_id)
shutil.rmtree(cache_dir)
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)


def get_cache_dir(model_id: Optional[str] = None) -> str:
Expand Down
18 changes: 13 additions & 5 deletions inference/core/interfaces/http/http_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@
RuntimePayloadError,
WorkflowsCompilerError,
)
from inference.models.aliases import resolve_roboflow_model_alias

if LAMBDA:
from inference.core.usage import trackUsage
Expand Down Expand Up @@ -307,11 +308,12 @@ async def process_inference_request(
Returns:
InferenceResponse: The response containing the inference results.
"""
self.model_manager.add_model(
inference_request.model_id, inference_request.api_key
de_aliased_model_id = resolve_roboflow_model_alias(
model_id=inference_request.model_id
)
self.model_manager.add_model(de_aliased_model_id, inference_request.api_key)
resp = await self.model_manager.infer_from_request(
inference_request.model_id, inference_request, **kwargs
de_aliased_model_id, inference_request, **kwargs
)
return orjson_response(resp)

Expand Down Expand Up @@ -473,7 +475,10 @@ async def model_add(request: AddModelRequest):
ModelsDescriptions: The object containing models descriptions
"""
logger.debug(f"Reached /model/add")
self.model_manager.add_model(request.model_id, request.api_key)
de_aliased_model_id = resolve_roboflow_model_alias(
model_id=request.model_id
)
self.model_manager.add_model(de_aliased_model_id, request.api_key)
models_descriptions = self.model_manager.describe_models()
return ModelsDescriptions.from_models_descriptions(
models_descriptions=models_descriptions
Expand All @@ -496,7 +501,10 @@ async def model_remove(request: ClearModelRequest):
ModelsDescriptions: The object containing models descriptions
"""
logger.debug(f"Reached /model/remove")
self.model_manager.remove(request.model_id)
de_aliased_model_id = resolve_roboflow_model_alias(
model_id=request.model_id
)
self.model_manager.remove(de_aliased_model_id)
models_descriptions = self.model_manager.describe_models()
return ModelsDescriptions.from_models_descriptions(
models_descriptions=models_descriptions
Expand Down
2 changes: 1 addition & 1 deletion inference/core/interfaces/stream/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def send_predictions(
from inference.core.interfaces.stream.inference_pipeline import InferencePipeline
from inference.core.interfaces.stream.sinks import UDPSink
udp_sink = UDPSink(ip_address="127.0.0.1", port=9090)
udp_sink = UDPSink.init(ip_address="127.0.0.1", port=9090)
pipeline = InferencePipeline.init(
model_id="your-model/3",
Expand Down
6 changes: 5 additions & 1 deletion inference/core/managers/decorators/fixed_size_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ def add_model(
self.remove(to_remove_model_id)

self._key_queue.append(queue_id)
return super().add_model(model_id, api_key, model_id_alias=model_id_alias)
try:
return super().add_model(model_id, api_key, model_id_alias=model_id_alias)
except Exception as error:
self._key_queue.remove(model_id)
raise error

def clear(self) -> None:
"""Removes all models from the manager."""
Expand Down
2 changes: 1 addition & 1 deletion inference/core/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.9.10rc2"
__version__ = "0.9.10rc3"


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit ac39653

Please sign in to comment.