From 49816bed7c3186c5cacee76c52298ce820c8100b Mon Sep 17 00:00:00 2001
From: Yury Gaydaychuk <yury.gaydaychuk@intel.com>
Date: Thu, 12 Dec 2024 12:37:27 +0100
Subject: [PATCH 1/2] mode added

---
 .../tools/commit_slider/utils/cfg.json        |   1 +
 .../tools/commit_slider/utils/modes.py        | 180 ++++++++++++++++++
 2 files changed, 181 insertions(+)

diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json
index 3e70fbd9f98df1..18f2db370fb65b 100644
--- a/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json
+++ b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json
@@ -4,6 +4,7 @@
         "bmPerf" : "BenchmarkAppPerformanceMode",
         "compareBlobs" : "CompareBlobsMode",
         "ac" : "AccuracyCheckerMode",
+        "llmBench" : "LLMBenchMode",
         "nop" : "NopMode"
     },
     "traversalMap" : {
diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py b/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
index 6c1024ef1234a9..dfd829824f9798 100644
--- a/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
+++ b/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
@@ -284,6 +284,186 @@ def getCommitInfo(self, commit):
                 ci=super().getCommitInfo(commit),
                 d=commit.perfRel)
 
+
+class LLMBenchMode(Mode):
+    def __init__(self, cfg):
+        super().__init__(cfg)
+        self.perfRel = 0
+        self.createCash()
+
+    def isPerformanceBased(self):
+        return True
+
+    def prepareRun(self, list, cfg):
+        super().prepareRun(list, cfg)
+        sampleCommit = list[0]
+        sampleCommit = sampleCommit.replace('"', "")
+        self.commonLogger.info(
+            "Prepare sample commit - {commit}".format(commit=sampleCommit)
+        )
+        commitLogger = getCommitLogger(cfg, sampleCommit)
+        foundThroughput = 0
+        isCommitCashed, cashedThroughput = self.getCommitIfCashed(sampleCommit)
+        if isCommitCashed:
+            logMsg = "Cashed commit - {commit}".format(commit=sampleCommit)
+            self.commonLogger.info(logMsg)
+            commitLogger.info(logMsg)
+            foundThroughput = cashedThroughput
+        else:
+            handleCommit(sampleCommit, cfg)
+            output = fetchAppOutput(cfg, sampleCommit)
+            commitLogger.info(output)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            self.setCommitCash(sampleCommit, float(foundThroughput))
+        self.sampleThroughput = float(foundThroughput)
+        return list
+
+    def checkCfg(self, cfg):
+        super().checkCfg(cfg)
+        if not ("perfAppropriateDeviation" in cfg["runConfig"]):
+            raise CfgError("Appropriate deviation is not configured")
+        else:
+            self.apprDev = cfg["runConfig"]["perfAppropriateDeviation"]
+        if ("metric" in cfg["runConfig"]):
+            self.outPattern = self.specifyMetric(cfg["runConfig"]["metric"])
+        else:
+            self.outPattern = self.specifyMetric()
+
+
+    def specifyMetric(self, metric: str = "First token latency"):
+        if metric in [
+            "First token latency"]:
+            res = r"First token latency:\s*([0-9]*[.][0-9]*)\s*ms/token"
+            return res
+        raise CfgError("Metric {} is not supported".format(metric))
+
+    def preliminaryCheck(self, list, cfg):
+        # # model path checking - todo is necessary ?
+        # if cfg["preliminaryCheckCfg"]["checkBenchmarkModelPath"]:
+        #     cmdStr = cfg["appCmd"]
+        #     matcher = re.search(
+        #         "benchmark.*-m[\s*]([^\S]*)",
+        #         cmdStr,
+        #         flags=re.MULTILINE
+        #         )
+        #     if matcher is not None:
+        #         # pass if app is not openvino benchmark_app
+        #         try:
+        #             modelPath = extractModelPath(cmdStr)
+        #             if not os.path.isfile(modelPath):
+        #                 raise PreliminaryAnalysisError(
+        #                     "path {modelPath} does not exist, check config".format(
+        #                         modelPath=modelPath
+        #                     ),
+        #                     PreliminaryAnalysisError.PreliminaryErrType.WRONG_COMMANDLINE
+        #                 )
+        #         except (IndexError, ValueError):
+        #             raise PreliminaryAnalysisError(
+        #                 "commandline '{cmdStr}' is not correct, check config".format(
+        #                     cmdStr=cmdStr
+        #                 ),
+        #                 PreliminaryAnalysisError.PreliminaryErrType.WRONG_COMMANDLINE
+        #             )
+
+        # common if-degradation-exists check
+        super().preliminaryCheck(list, cfg)
+
+        # performance - specific check if results for borders are stable,
+        isLeftStable = not cfg["preliminaryCheckCfg"]["leftCheck"] or\
+            self.preliminaryStabilityCheck(list[0], cfg)
+        isRightStable = not cfg["preliminaryCheckCfg"]["rightCheck"] or\
+            self.preliminaryStabilityCheck(list[-1], cfg)
+        if (not isLeftStable or not isRightStable):
+            raise PreliminaryAnalysisError(
+                "{lCommit} is {lStable}, {rCommit} is {rStable}".format(
+                    lCommit=list[0],
+                    rCommit=list[-1],
+                    lStable="stable" if isLeftStable else "unstable",
+                    rStable="stable" if isRightStable else "unstable"
+                ),
+                PreliminaryAnalysisError.PreliminaryErrType.UNSTABLE_APPLICATION
+                )
+
+    def compareCommits(self, lCommit: str, rCommit: str, cfg: map):
+        leftThroughput = self.getPseudoMetric(lCommit, cfg)
+        rightThroughput = self.getPseudoMetric(rCommit, cfg)
+        isBad, curRel = self.traversal.numericComparator(
+            leftThroughput, rightThroughput, self.apprDev
+        )
+        if isBad:
+            self.perfRel = curRel
+        curCommit = rCommit.replace('"', "")
+        commitLogger = getCommitLogger(cfg, curCommit)
+        commitLogger.info("Performance relation is {rel}".format(rel=curRel))
+        commitLogger.info(
+            "Commit is {status}".format(status=("bad" if isBad else "good"))
+        )
+        return isBad
+
+    def getPseudoMetric(self, commit, cfg):
+        commit = commit.replace('"', "")
+        curThroughput = 0
+        commitLogger = getCommitLogger(cfg, commit)
+        isCommitCashed, cashedThroughput = self.getCommitIfCashed(commit)
+        pc = Mode.CommitPath.PathCommit(
+            commit,
+            Mode.CommitPath.CommitState.DEFAULT
+        )
+        self.setOutputInfo(pc)
+        self.commitPath.accept(self.traversal, pc)
+        if isCommitCashed:
+            logMsg = "Cashed commit - {commit}".format(commit=commit)
+            self.commonLogger.info(logMsg)
+            commitLogger.info(logMsg)
+            curThroughput = cashedThroughput
+        else:
+            self.commonLogger.info("New commit: {commit}".format(
+                commit=commit)
+            )
+            handleCommit(commit, cfg)
+            output = fetchAppOutput(cfg, commit)
+            commitLogger.info(output)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            curThroughput = float(foundThroughput)
+            self.setCommitCash(commit, curThroughput)
+        return curThroughput
+
+    def preliminaryStabilityCheck(self, commit, cfg):
+        commit = commit.replace('"', "")
+        curThroughput = 0
+
+        self.commonLogger.info(
+            "Preliminary check of commit: {commit}".format(
+                commit=commit)
+        )
+        handleCommit(commit, cfg)
+        throughputList = []
+        dev = self.apprDev = cfg["runConfig"]["perfAppropriateDeviation"]
+        for i in range(cfg["preliminaryCheckCfg"]["tryCount"]):
+            output = fetchAppOutput(cfg, commit)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            curThroughput = float(foundThroughput)
+            throughputList.append(curThroughput)
+        resStable = checkStability(throughputList, dev)
+        if resStable:
+            self.setCommitCash(commit, curThroughput)
+        return resStable
+
+    def setOutputInfo(self, pathCommit):
+        pathCommit.perfRel = self.perfRel
+
+    def getCommitInfo(self, commit):
+        return "{ci}, perf. ratio = {d}".format(
+                ci=super().getCommitInfo(commit),
+                d=commit.perfRel)
+
+
 class AccuracyCheckerMode(Mode):
     def __init__(self, cfg):
         super().__init__(cfg)

From 2a8ce4f965eed8ea735984ff30b544c088c230cb Mon Sep 17 00:00:00 2001
From: Yury Gaydaychuk <yury.gaydaychuk@intel.com>
Date: Sun, 15 Dec 2024 19:23:48 +0100
Subject: [PATCH 2/2] cfg added

---
 .../utils/cfg_samples/llm_bench.json          | 72 +++++++++++++++++++
 .../tools/commit_slider/utils/modes.py        | 26 -------
 2 files changed, 72 insertions(+), 26 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json

diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json
new file mode 100644
index 00000000000000..5ad25c48ec2ad9
--- /dev/null
+++ b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json
@@ -0,0 +1,72 @@
+{
+    "appCmd":"source {venvName}/bin/activate && cd {appPath} && python3.10 -m pip install --upgrade pip && python3.10 -m pip install openvino=={wheelVersion} openvino_genai=={wheelVersion} openvino_tokenizers=={wheelVersion} --find-links={precommitPath}wheels/ && python3.10 -m pip install -r requirements.txt && {cmd}",
+    "appPath" : "{appPath}",
+    "venvCfg":{
+       "venvEnabled":true,
+       "venvDir":"{workPath}/venv/",
+       "venvName":"tempVenv"
+    },
+    "commandList":[
+ 
+    ],
+    "runConfig":{
+       "mode":"llmBench",
+       "traversal":"firstFailedVersion",
+       "perfAppropriateDeviation" : 0.05,
+       "commitList" : {
+         "getCommitListCmd" : "git log {c1}..{c2} --boundary --pretty=\"%h\""
+     }
+    },
+    "dlbConfig":{
+       "launchedAsJob":false,
+       "toolName":"",
+       "wheelVersionsMap":{
+ 
+       },
+       "commonPath":"{commitPath}",
+       "subPath":"{subPath}",
+       "appPath":"",
+       "appCmd":""
+    },
+    "cachedPathConfig":{
+       "enabled":true,
+       "scheme":"mandatory",
+       "passCmdList":false,
+       "changeAppPath":false,
+       "commonPath":"{commitPath}",
+       "subPath":"{subPath}",
+       "cashMap":{
+ 
+       }
+    },
+    "substitutionRules":[
+       {
+          "name":"precommitPath",
+          "enabled":true,
+          "type":"map",
+          "placeholder":"precommitPath",
+          "from":"$.cachedPathConfig.cashMap",
+          "to":"$.appCmd"
+       },
+       {
+          "name":"wheelVersion",
+          "enabled":true,
+          "type":"map",
+          "placeholder":"wheelVersion",
+          "from":"$.dlbConfig.wheelVersionsMap",
+          "to":"$.appCmd"
+       }
+    ],
+    "subscriptions":[
+       {
+          "name":"wheelPathsMap",
+          "enabled":true
+       },
+       {
+          "name":"wheelVersionsMap",
+          "enabled":true
+       }
+    ],
+    "verboseOutput" : true,
+    "checkIfBordersDiffer" : true
+ }
\ No newline at end of file
diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py b/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
index dfd829824f9798..9286bf2d33bc07 100644
--- a/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
+++ b/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
@@ -341,32 +341,6 @@ def specifyMetric(self, metric: str = "First token latency"):
 
     def preliminaryCheck(self, list, cfg):
         # # model path checking - todo is necessary ?
-        # if cfg["preliminaryCheckCfg"]["checkBenchmarkModelPath"]:
-        #     cmdStr = cfg["appCmd"]
-        #     matcher = re.search(
-        #         "benchmark.*-m[\s*]([^\S]*)",
-        #         cmdStr,
-        #         flags=re.MULTILINE
-        #         )
-        #     if matcher is not None:
-        #         # pass if app is not openvino benchmark_app
-        #         try:
-        #             modelPath = extractModelPath(cmdStr)
-        #             if not os.path.isfile(modelPath):
-        #                 raise PreliminaryAnalysisError(
-        #                     "path {modelPath} does not exist, check config".format(
-        #                         modelPath=modelPath
-        #                     ),
-        #                     PreliminaryAnalysisError.PreliminaryErrType.WRONG_COMMANDLINE
-        #                 )
-        #         except (IndexError, ValueError):
-        #             raise PreliminaryAnalysisError(
-        #                 "commandline '{cmdStr}' is not correct, check config".format(
-        #                     cmdStr=cmdStr
-        #                 ),
-        #                 PreliminaryAnalysisError.PreliminaryErrType.WRONG_COMMANDLINE
-        #             )
-
         # common if-degradation-exists check
         super().preliminaryCheck(list, cfg)