diff --git a/src/aks-preview/azext_aks_preview/debug/im/__init__.py b/src/aks-preview/azext_aks_preview/debug/im/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/aks-preview/azext_aks_preview/debug/im/data collection.md b/src/aks-preview/azext_aks_preview/debug/im/data collection.md new file mode 100644 index 0000000000..3d2e3f84b9 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/data collection.md @@ -0,0 +1,58 @@ +# Data Collection + +## vmss run-command in azure-cli + +### use case: invoke + +```bash +az vmss run-command invoke -g MC_azcli-aks-dev_dev100_westus2 -n aks-nodepool1-28844989-vmss --command-id RunShellScript --instance-id 0 --scripts 'for i in $(seq $1 $2); do echo $i; done' --parameters 1 100000 +``` + +- synchronous operation, need to wait until the operation is completed +- the output will be truncated and cannot be automatically exported to external storage + +### use case: CRUD + +```bash +az vmss run-command list -g MC_azcli-aks-dev_dev100_westus2 --vmss-name aks-nodepool1-28844989-vmss --instance-id "0" +az vmss run-command show -g MC_azcli-aks-dev_dev100_westus2 -n aks-nodepool1-28844989-vmss --instance-id 0 --name + +# run command for the first time +az vmss run-command create -g MC_azcli-aks-dev_dev100_westus2 --vmss-name aks-nodepool1-28844989-vmss --instance-id "0" --run-command-name "t1" --script 'for i in $(seq $abc $xyz); do echo $i; done' --parameters abc=1 xyz=100000 +az vmss run-command show -g MC_azcli-aks-dev_dev100_westus2 --vmss-name aks-nodepool1-28844989-vmss --instance-id "0" --run-command-name "t1" --instance-view # show command result + +# run command for the second time +az vmss run-command update -g MC_azcli-aks-dev_dev100_westus2 --vmss-name aks-nodepool1-28844989-vmss --instance-id "0" --run-command-name "t1" --script 'for i in $(seq $abc $xyz); do echo $i; done' --parameters abc=1 xyz=1000000 --output-blob-uri "https://aksclidebug.blob.core.windows.net/aksclidebug/abc?xxx" + +# generate storage account container sas token +end=`date -u -d "30 minutes" '+%Y-%m-%dT%H:%MZ'` +az storage container generate-sas --account-name aksclidebug -n aksclidebug --permissions acrw --expiry $end --https-only + +# run command for the third time +az vmss run-command update -g MC_azcli-aks-dev_dev100_westus2 --vmss-name aks-nodepool1-28844989-vmss --instance-id "0" --run-command-name "t1" --script 'for i in $(seq $abc $xyz); do echo $i; done' --parameters abc=1 xyz=1000000 --output-blob-uri "https://aksclidebug.blob.core.windows.net/aksclidebug/xyz?xxx" +``` + +- asynchronous operation, could be executed multiple times via update command +- the output will be truncated, but it can be automatically exported to external storage in its entirety + +## kubectl debug + +### use busybox to debug + +```bash +node_name=$(kubectl get no -o json | jq -r '.items[0].metadata.name') +kubectl debug no/${node_name} -i --image=mcr.microsoft.com/cbl-mariner/busybox:2.0 +busybox_pod_name=$(kubectl get po -o json | jq '.items[]|select(.status.phase=="Running")|select(.spec.containers[0].image=="mcr.microsoft.com/cbl-mariner/busybox:2.0")|.metadata.name') +kubectl exec ${busybox_pod_name} -- nslookup google.com +``` + +### get journal log + +```bash +node_name=$(kubectl get no -o json | jq -r '.items[0].metadata.name') +kubectl debug no/${node_name} -i --image=mcr.microsoft.com/cbl-mariner/base/core:2.0 +debug_pod_name=$(kubectl get po -o json | jq '.items[]|select(.status.phase=="Running")|select(.spec.containers[0].image=="mcr.microsoft.com/cbl-mariner/base/core:2.0")|.metadata.name') +kubectl exec ${debug_pod_name} -- tdnf install systemd tar -y +kubectl exec ${debug_pod_name} -- chroot /host sh -c "journalctl > journal.log" +kubectl cp ${debug_pod_name}:/host/journal.log journal.log +``` diff --git a/src/aks-preview/azext_aks_preview/debug/im/data_collector.py b/src/aks-preview/azext_aks_preview/debug/im/data_collector.py new file mode 100644 index 0000000000..ca106ab810 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/data_collector.py @@ -0,0 +1,56 @@ +from typing import Dict +from random import randint + + +class DataCollector: + def __init__(self) -> None: + self.data = None + + def run(self) -> None: + # some code to collect data + pass + + def get_data(self, refresh_cached_data=False): + if self.data is None or refresh_cached_data: + self.run() + return self.data + + def gc(): + # clean up the resources used by the data collector + pass + + def export(): + # export the data to a file/remote storage + pass + + +class DataCollectorCoreDNSConfigMap(DataCollector): + def __init__(self) -> None: + super().__init__() + + def run(self) -> None: + # some code to collect data + self.data = "a" if randint(0, 1) else "b" + + +class DataCollectorIGDNS(DataCollector): + def __init__(self) -> None: + super().__init__() + + def run(self) -> None: + # some code to collect data + self.data = "c" if randint(0, 1) else "d" + + +class SharedDataCollector(): + def __init__(self) -> None: + self.data_collectors: Dict[str, DataCollector] = { + "core_dns_config_map": DataCollectorCoreDNSConfigMap(), + "ig_dns": DataCollectorIGDNS(), + } + + def get_core_dns_config_map_data(self): + return self.data_collectors["core_dns_config_map"].get_data() + + def get_ig_dns_data(self): + return self.data_collectors["ig_dns"].get_data() diff --git a/src/aks-preview/azext_aks_preview/debug/im/knowledge_base.py b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base.py new file mode 100644 index 0000000000..19936b9c78 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base.py @@ -0,0 +1,32 @@ +from typing import Dict, List + +from .types import DebugStep, ActionStep +from .knowledge_base_debug import DebugStepA, DebugStepB, DebugStepC +from .knowledge_base_action import ActionStepA, ActionStepB, ActionStepC + + +class KnowledgeBase: + def __init__(self) -> None: + self.debug_steps: Dict[str, DebugStep] = { + "a": DebugStepA(), + "b": DebugStepB(), + "c": DebugStepC() + } + self.action_steps: Dict[str, ActionStep] = { + "a": ActionStepA(), + "b": ActionStepB(), + "c": ActionStepC() + } + + def get_debug_step_by_name(self, name: str) -> DebugStep: + return self.debug_steps[name] + + def get_action_step_by_name(self, name: str) -> ActionStep: + return self.action_steps[name] + + def get_debug_steps_by_scenario(self, scenario: str) -> List[DebugStep]: + results = [] + for v in self.debug_steps.values(): + if scenario in v.tags: + results.append(v) + return results diff --git a/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_action.py b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_action.py new file mode 100644 index 0000000000..9871dade74 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_action.py @@ -0,0 +1,25 @@ +from .types import Step, ActionStep + + +class ActionStepA(ActionStep): + def __init__(self) -> None: + super().__init__() + + def run(self) -> Step: + print(self) + + +class ActionStepB(ActionStep): + def __init__(self) -> None: + super().__init__() + + def run(self) -> Step: + print(self) + + +class ActionStepC(ActionStep): + def __init__(self) -> None: + super().__init__() + + def run(self) -> Step: + print(self) diff --git a/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_debug.py b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_debug.py new file mode 100644 index 0000000000..5fcf29467c --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/knowledge_base_debug.py @@ -0,0 +1,44 @@ +from .types import Step, DebugStep, NoActionStep +from .knowledge_base_action import ActionStepA, ActionStepB, ActionStepC + + +class DebugStepA(DebugStep): + def __init__(self) -> None: + super().__init__() + self.tags.append("dns") + + def run(self) -> Step: + print(self) + data = self.shared_data_collector.get_core_dns_config_map_data() + if data == "a": + self.next_steps.append(ActionStepA()) + elif data == "b": + self.next_steps.append(DebugStepB()) + else: + self.next_steps.append(NoActionStep()) + + +class DebugStepB(DebugStep): + def __init__(self) -> None: + super().__init__() + self.tags.append("dns") + + def run(self) -> Step: + print(self) + data = self.shared_data_collector.get_ig_dns_data() + if data == "c": + self.next_steps.append(ActionStepB()) + elif data == "d": + self.next_steps.append(ActionStepC()) + else: + self.next_steps.append(NoActionStep()) + + +class DebugStepC(DebugStep): + def __init__(self) -> None: + super().__init__() + self.tags.append("egress") + + def run(self) -> Step: + print(self) + self.next_steps.append(NoActionStep()) diff --git a/src/aks-preview/azext_aks_preview/debug/im/orchestrator.py b/src/aks-preview/azext_aks_preview/debug/im/orchestrator.py new file mode 100644 index 0000000000..da84157775 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/orchestrator.py @@ -0,0 +1,26 @@ +from .data_collector import SharedDataCollector +from .knowledge_base import KnowledgeBase +from .types import DebugStep + + +class Orchestrator(): + def __init__(self) -> None: + self.steps = [] + self.shared_data_collector = SharedDataCollector() + self.knowledge_base = KnowledgeBase() + + def run(self, scenario: str) -> None: + self.steps = self.knowledge_base.get_debug_steps_by_scenario(scenario) + current_steps = [] + next_steps = self.steps + round_cnt = 0 + while next_steps: + round_cnt += 1 + print(f"Round {round_cnt}") + current_steps = next_steps + next_steps = [] + for step in current_steps: + if isinstance(step, DebugStep): + step.attch_shared_data_collector(self.shared_data_collector) + step.run() + next_steps.extend(step.get_next_steps()) diff --git a/src/aks-preview/azext_aks_preview/debug/im/types.py b/src/aks-preview/azext_aks_preview/debug/im/types.py new file mode 100644 index 0000000000..854123d1bc --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/types.py @@ -0,0 +1,40 @@ +from __future__ import annotations +from typing import List + +from .data_collector import SharedDataCollector + + +class Step: + def __init__(self) -> None: + self.tags: List[str] = [] + self.next_steps: List[Step] = [] + + def run(self) -> Step: + pass + + def get_next_steps(self) -> List[Step]: + return self.next_steps + + +class DebugStep(Step): + def __init__(self) -> None: + super().__init__() + + def attch_shared_data_collector(self, shared_data_collector: SharedDataCollector) -> None: + self.shared_data_collector = shared_data_collector + + +class ActionStep(Step): + def __init__(self) -> None: + super().__init__() + + +class NoActionStep(Step): + def __init__(self) -> None: + super().__init__() + + def run(self) -> Step: + print("NoActionStep") + + def get_next_steps(self) -> List[Step]: + return [] diff --git a/src/aks-preview/azext_aks_preview/debug/im/utils.py b/src/aks-preview/azext_aks_preview/debug/im/utils.py new file mode 100644 index 0000000000..989a0cf425 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/im/utils.py @@ -0,0 +1,8 @@ +import subprocess + + +def get_configmap(namespace, name): + return subprocess.check_output( + ["kubectl", "get", "cm", "-n", namespace, name, "-o", "json"], + universal_newlines=True, + ) diff --git a/src/aks-preview/azext_aks_preview/debug/main.py b/src/aks-preview/azext_aks_preview/debug/main.py new file mode 100644 index 0000000000..27a58644d1 --- /dev/null +++ b/src/aks-preview/azext_aks_preview/debug/main.py @@ -0,0 +1,9 @@ +from im.orchestrator import Orchestrator + + +def main(): + Orchestrator().run("dns") + + +if __name__ == "__main__": + main()