diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 8a58dc27df1f83..78a364c18ca4e6 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -64,7 +64,7 @@ implemented in your solutions. Click the buttons below to see the chosen benchma :outline: :expand: - :material-regular:`bar_chart;1.4em` OVMS for GenAI (coming soon) + :material-regular:`bar_chart;1.4em` OVMS for GenAI diff --git a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json index f96fb11e6b029d..0d53c3813542d2 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json +++ b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json @@ -1,45 +1,330 @@ [ + { + "Platform": "Intel® Xeon® Platinum 8380", + "Model": "meta-llama/Llama-2-7b-chat-hf", + "featured_SKU": false, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ + { + "Throughput": { + "0.2": 94.97, + "0.4": 187.12, + "0.6": 271.85, + "0.8": 290.81, + "1.0": 291.39, + "2.0": 291.45, + "inf": 291.59 + }, + "Latency": { + "0.2": 74.35, + "0.4": 122.25, + "0.6": 467.49, + "0.8": 749.39, + "1.0": 771.39, + "2.0": 773.31, + "inf": 783.63 + } + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 94.83, + "0.4": 187.83, + "0.6": 272.32, + "0.8": 284.07, + "1.0": 291.88, + "2.0": 291.91, + "inf": 288.62 + }, + "Latency": { + "0.2": 82.31, + "0.4": 134.38, + "0.6": 495.99, + "0.8": 794.41, + "1.0": 798.39, + "2.0": 800.33, + "inf": 809.56 + } + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8480+", + "Model": "meta-llama/Llama-2-7b-chat-hf", + "featured_SKU": true, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ + { + "Throughput": { + "0.2": 95.15, + "0.4": 188.31, + "0.6": 279.3, + "0.8": 366.78, + "1.0": 454.27, + "2.0": 788.9, + "inf": 825.97 + }, + "Latency": { + "0.2": 60.88, + "0.4": 71.96, + "0.6": 83.45, + "0.8": 103.77, + "1.0": 128.12, + "2.0": 237.62, + "inf": 253.59 + } + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 95.06, + "0.4": 188.47, + "0.6": 280.54, + "0.8": 367.47, + "1.0": 450.81, + "2.0": 774.57, + "inf": 793.78 + }, + "Latency": { + "0.2": 63.84, + "0.4": 76.22, + "0.6": 87.21, + "0.8": 104.75, + "1.0": 136.77, + "2.0": 259.2, + "inf": 273.58 + } + } + ] + } + } + }, { "Platform": "Intel® Xeon® Platinum 8580", - "Model": "mistralai/Mistral-7B-v0.1", - "PlatformType": "None", + "Model": "meta-llama/Llama-2-7b-chat-hf", + "featured_SKU": true, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "Vllm": { + "OpenVINO Model Server": { "Precisions": [ { "Throughput": { - "0.2": "350.06", - "0.6": "486.89", - "0.8": "575.92", - "2.0": "778.07" + "0.2": 95.29, + "0.4": 188.33, + "0.6": 280.09, + "0.8": 367.29, + "1.0": 453.21, + "2.0": 780.05, + "inf": 751.34 + }, + "Latency": { + "0.2": 52.44, + "0.4": 70.06, + "0.6": 84.54, + "0.8": 108.91, + "1.0": 136.45, + "2.0": 253.55, + "inf": 281.85 } - }, + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 95.0, + "0.4": 188.26, + "0.6": 279.78, + "0.8": 366.69, + "1.0": 450.26, + "2.0": 770.74, + "inf": 794.39 + }, + "Latency": { + "0.2": 58.07, + "0.4": 77.65, + "0.6": 91.14, + "0.8": 113.61, + "1.0": 144.21, + "2.0": 269.13, + "inf": 273.27 + } + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8380", + "Model": "meta-llama/Meta-Llama-3-8B-Instruct", + "featured_SKU": false, + "whats_new_model": true, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ { + "Throughput": { + "0.2": 82.46, + "0.4": 162.73, + "0.6": 240.08, + "0.8": 273.75, + "1.0": 275.85, + "2.0": 276.3, + "inf": 275.15 + }, "Latency": { - "0.2": "60.93", - "0.6": "91.63", - "0.8": "113.61", - "2.0": "240.25" + "0.2": 76.49, + "0.4": 122.1, + "0.6": 318.14, + "0.8": 785.8, + "1.0": 805.58, + "2.0": 809.37, + "inf": 816.2 } } ] }, - "Ovms": { + "vLLM with OpenVINO backend": { "Precisions": [ { "Throughput": { - "0.2": "90.98", - "0.6": "266.24", - "0.8": "351.63", - "2.0": "195.16" + "0.2": 82.32, + "0.4": 162.98, + "0.6": 239.28, + "2.0": 270.37 + }, + "Latency": { + "0.2": 87.92, + "0.4": 142.3, + "0.6": 343.36, + "2.0": 873.0 } - }, + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8480+", + "Model": "meta-llama/Meta-Llama-3-8B-Instruct", + "featured_SKU": true, + "whats_new_model": true, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ { + "Throughput": { + "0.2": 82.61, + "0.4": 164.44, + "0.6": 244.92, + "0.8": 323.34, + "1.0": 400.78, + "2.0": 731.9, + "inf": 848.45 + }, "Latency": { - "0.2": "54.9", - "0.6": "78.78", - "0.8": "95.78", - "2.0": "352.23" + "0.2": 60.77, + "0.4": 69.1, + "0.6": 74.36, + "0.8": 81.41, + "1.0": 100.17, + "2.0": 206.5, + "inf": 246.56 + } + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 82.54, + "0.4": 163.66, + "0.6": 243.88, + "0.8": 322.75, + "1.0": 400.46, + "2.0": 727.1 + }, + "Latency": { + "0.2": 65.37, + "0.4": 75.87, + "0.6": 81.14, + "0.8": 93.91, + "1.0": 107.13, + "2.0": 229.57 + } + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8580", + "Model": "meta-llama/Meta-Llama-3-8B-Instruct", + "featured_SKU": true, + "whats_new_model": true, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ + { + "Throughput": { + "0.2": 82.55, + "0.4": 164.52, + "0.6": 243.96, + "0.8": 323.07, + "1.0": 399.68, + "2.0": 727.18, + "inf": 856.72 + }, + "Latency": { + "0.2": 54.57, + "0.4": 69.17, + "0.6": 80.32, + "0.8": 92.94, + "1.0": 111.06, + "2.0": 215.46, + "inf": 245.72 + } + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 82.64, + "0.6": 243.81, + "0.8": 321.8, + "1.0": 398.78, + "2.0": 722.48, + "inf": 792.34 + }, + "Latency": { + "0.2": 61.49, + "0.6": 90.54, + "0.8": 106.25, + "1.0": 123.6, + "2.0": 245.91, + "inf": 279.21 } } ] @@ -47,46 +332,168 @@ } }, { - "Platform": "Intel® Xeon® Platinum 8530", + "Platform": "Intel® Xeon® Platinum 8380", "Model": "mistralai/Mistral-7B-v0.1", - "PlatformType": "None", + "featured_SKU": false, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "Vllm": { + "OpenVINO Model Server": { + "Precisions": [ + { + "Throughput": { + "0.2": 91.74, + "0.4": 180.4, + "0.6": 262.97, + "0.8": 287.36, + "1.0": 289.08, + "2.0": 289.06, + "inf": 290.69 + }, + "Latency": { + "0.2": 74.84, + "0.4": 115.4, + "0.6": 345.64, + "0.8": 757.42, + "1.0": 776.6, + "2.0": 778.29, + "inf": 784.42 + } + } + ] + }, + "vLLM with OpenVINO backend": { "Precisions": [ { "Throughput": { - "0.2": "350.06", - "0.6": "486.89", - "0.8": "575.92", - "2.0": "778.07" + "0.2": 97.21, + "0.4": 192.46, + "0.6": 265.82, + "0.8": 273.24, + "1.0": 272.65, + "inf": 274.0 + }, + "Latency": { + "0.2": 166.77, + "0.4": 161.76, + "0.6": 666.89, + "0.8": 802.15, + "1.0": 810.26, + "inf": 807.71 } - }, + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8480+", + "Model": "mistralai/Mistral-7B-v0.1", + "featured_SKU": true, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { + "Precisions": [ { + "Throughput": { + "0.2": 90.95, + "0.4": 181.06, + "0.6": 267.29, + "0.8": 351.62, + "1.0": 431.45, + "2.0": 751.85, + "inf": 596.0 + }, "Latency": { - "0.2": "60.93", - "0.6": "91.63", - "0.8": "113.61", - "2.0": "240.25" + "0.2": 59.95, + "0.4": 63.41, + "0.6": 73.42, + "0.8": 85.99, + "1.0": 98.67, + "2.0": 205.2, + "inf": 205.97 } } ] }, - "Ovms": { + "vLLM with OpenVINO backend": { + "Precisions": [ + { + "Throughput": { + "0.2": 98.18, + "0.4": 194.35, + "0.6": 287.28, + "0.8": 376.31, + "1.0": 460.32, + "2.0": 771.81, + "inf": 789.38 + }, + "Latency": { + "0.2": 64.88, + "0.4": 73.3, + "0.6": 84.37, + "0.8": 100.8, + "1.0": 133.98, + "2.0": 240.99, + "inf": 251.55 + } + } + ] + } + } + }, + { + "Platform": "Intel® Xeon® Platinum 8580", + "Model": "mistralai/Mistral-7B-v0.1", + "featured_SKU": true, + "whats_new_model": false, + "PlatformType": "Server Platforms (Intel® Xeon®)", + "Parameters": { + "OpenVINO Model Server": { "Precisions": [ { "Throughput": { - "0.2": "90.98", - "0.6": "266.24", - "0.8": "351.63", - "2.0": "195.16" + "0.2": 91.2, + "0.4": 180.14, + "0.6": 267.75, + "0.8": 351.12, + "1.0": 428.31, + "2.0": 744.99, + "inf": 852.05 + }, + "Latency": { + "0.2": 54.31, + "0.4": 67.14, + "0.6": 77.59, + "0.8": 92.17, + "1.0": 112.75, + "2.0": 225.48, + "inf": 241.49 } - }, + } + ] + }, + "vLLM with OpenVINO backend": { + "Precisions": [ { + "Throughput": { + "0.2": 98.1, + "0.4": 194.47, + "0.6": 286.97, + "0.8": 375.84, + "1.0": 460.21, + "2.0": 764.54, + "inf": 787.97 + }, "Latency": { - "0.2": "54.9", - "0.6": "78.78", - "0.8": "95.78", - "2.0": "352.23" + "0.2": 62.26, + "0.4": 78.08, + "0.6": 91.61, + "0.8": 116.71, + "1.0": 141.76, + "2.0": 250.38, + "inf": 254.25 } } ]