Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust comments about deployment_config and semgrep ci HTTP requests #338

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 86 additions & 86 deletions semgrep_output_v1.atd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
(* Prelude *)
(*****************************************************************************)
(* Specification of the Semgrep CLI JSON output formats using ATD
* (see https://atd.readthedocs.io/en/latest/ for more information on ATD).
* (see https://atd.readthedocs.io/en/latest/ for information on ATD).
*
* This file specifies mainly the JSON formats of:
* - the output of the 'semgrep scan --json' command
Expand Down Expand Up @@ -1120,16 +1120,13 @@ type killing_parent = {
* types used to communicate with the Semgrep backend and are not meant
* to be consumed directly by Semgrep users or tools wrapping Semgrep.
*
* The sequence of HTTP requests for 'semgrep ci' is mostly:
* - /api/agent/deployments/current with token
* and response with deployment name in a deployment config
* (TODO? get rid of this one? useful?)
* The sequence of HTTP requests for 'semgrep ci' is:
* - /api/cli/scans when starting a scan, with information about the project
* and response with scan_id and scan_config including the rules to use
* - /results to send the findings to the backend
* and response with scan_id and scan_response including the rules to use
* - /api/agent/scans/<scan_id>/results to send the findings to the backend
* and response with errors and task_id
* - /complete when done, with the exit code and a few more information
* and response with app_block_override and reason
* - /api/agent/scans/<scan_id>/complete when done, with the exit code and a
* few more information and response with app_block_override and reason
*
* alt: we could move all of this in a separate semgrep_posts_v1.atd file
* or semgrep_webapp_v1.atd
Expand All @@ -1139,14 +1136,6 @@ type killing_parent = {
(* Features *)
(* ----------------------------- *)

(* whether a certain feature is available for a deployment *)
type has_features = {
~has_autofix : bool;
~has_deepsemgrep : bool;
~has_triage_via_comment : bool;
~has_dependency_query : bool;
}

type features = {
~autofix: bool;
~deepsemgrep: bool;
Expand Down Expand Up @@ -1185,64 +1174,8 @@ type action = [
]

(* ----------------------------- *)
(* CI configurations *)
(* CI scan response *)
(* ----------------------------- *)

(* Response by the backend to the CLI to the POST deployments/current *)
type deployment_config <ocaml attr="deriving show"> = {
id : int;
(* the important piece, the deployment name (e.g., "returntocorp" *)
name : string;
~organization_id : int;
(* All three below seem similar to 'name' mostly (e.g., "returntocorp") *)
~display_name : string;
~scm_name : string;
~slug : string;
(* ex: "github" *)
~source_type : string;
(* ex: "member" *)
~default_user_role : string;
(* TODO? factorize in ci_config_from_cloud instead of here and scan_config? *)
inherit has_features;
}

(* Content of a possible .semgrepconfig.yml in the repository.
*
* This config allows to configure Semgrep per repo, e.g., to store
* a category/tag like "webapp" in a repo so that the Semgrep WebApp can
* return a set of relevant rules automatically for this repo in scan_config
* later when given this ci_config_from_repo in the scan_request.
*)
type ci_config_from_repo = {
(* version of the .semgrepconfig.yml format. "v1" right now (useful?) *)
~version <python default="Version('v1')"> <ts default="'v1'">: version;
?tags: tag list option;
}
(* ex: "webapp" *)
type tag = string

(* Response by the backend to the CLI to the POST deployments/scans/config
* DEPRECATED? seems to be used only by semgrep lsp.
*)
type scan_config = {
deployment_id: int;
deployment_name: string;
(* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
policy_names: string list;
(* rules raw content in JSON format (but still sent as a string) *)
rule_config: string;
inherit features;
inherit triage_ignored;
(* glob patterns *)
~ignored_files: string list;
(* since 1.37.0 *)
?enabled_products: product list option;
(* since 1.64.0 *)
~actions: action list;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;
}

(* Response from the backend to the CLI to the POST /api/cli/scans *)
type scan_response = {
info: scan_info;
Expand All @@ -1255,16 +1188,15 @@ type scan_response = {
type scan_info = {
?id: int option; (* the scan id, null for dry-runs *)
enabled_products: product list;
(* redundant with deployment_config?
* TODO? remove the intermediate call to get the deployment and
* start a scan to /api/cli/scans/ without first accessing
* api/agent/deployments/current?
(* Those fields are also in deployment_config but they are also
* here so that 'semgrep ci' does not need an extra HTTP request to the
* deployment endpoint to get this info.
*)
deployment_id: int;
deployment_name: string;
}

(* config specific to the scan, eg *)
(* config specific to the scan *)
type scan_configuration = {
rules: raw_json; (* can we type this better *)
inherit triage_ignored;
Expand Down Expand Up @@ -1303,13 +1235,6 @@ type historical_configuration = {
?lookback_days: int option;
}

(* ----------------------------- *)
(* CI Deployment response *)
(* ----------------------------- *)
type deployment_response = {
deployment: deployment_config;
}

(* ----------------------------- *)
(* CI Scan request *)
(* ----------------------------- *)
Expand Down Expand Up @@ -1423,6 +1348,21 @@ type scan_metadata = {
?sms_scan_id: string option;
}

(* Content of a possible .semgrepconfig.yml in the repository.
*
* This config allows to configure Semgrep per repo, e.g., to store
* a category/tag like "webapp" in a repo so that the Semgrep WebApp can
* return a set of relevant rules automatically for this repo in scan_config
* later when given this ci_config_from_repo in the scan_request.
*)
type ci_config_from_repo = {
(* version of the .semgrepconfig.yml format. "v1" right now (useful?) *)
~version <python default="Version('v1')"> <ts default="'v1'">: version;
?tags: tag list option;
}
(* ex: "webapp" *)
type tag = string

(* ----------------------------- *)
(* Findings *)
(* ----------------------------- *)
Expand Down Expand Up @@ -1735,6 +1675,66 @@ type ci_scan_failure = {
stderr: string;
}

(* ----------------------------- *)
(* Other comms *)
(* ----------------------------- *)

(* Response by the backend to the CLI to the POST api/agent/deployments/current
* DEPRECATED: this is now used only in 'semgrep login' and in
* 'semgrep show whoami' to just check whether the token is valid.
* Some of the information in deployment_config is now returned
* directly in scan_response (e.g., the deployment_name)
*)
type deployment_config <ocaml attr="deriving show"> = {
id : int;
(* the important piece, the deployment name (e.g., "returntocorp" *)
name : string;
~organization_id : int;
(* All three below seem similar to 'name' mostly (e.g., "returntocorp") *)
~display_name : string;
~scm_name : string;
~slug : string;
(* ex: "github" *)
~source_type : string;
(* ex: "member" *)
~default_user_role : string;
inherit has_features;
}

(* whether a certain feature is available for a deployment *)
type has_features = {
~has_autofix : bool;
~has_deepsemgrep : bool;
~has_triage_via_comment : bool;
~has_dependency_query : bool;
}

type deployment_response = {
deployment: deployment_config;
}

(* Response by the backend to the CLI to the POST deployments/scans/config
* DEPRECATED? seems to be used only by semgrep lsp. Similar to scan_response.
aryx marked this conversation as resolved.
Show resolved Hide resolved
*)
type scan_config = {
deployment_id: int;
deployment_name: string;
(* ex: "audit", "comment", "block" TODO use enum? TODO: seems dead *)
policy_names: string list;
(* rules raw content in JSON format (but still sent as a string) *)
rule_config: string;
inherit features;
inherit triage_ignored;
(* glob patterns *)
~ignored_files: string list;
(* since 1.37.0 *)
?enabled_products: product list option;
(* since 1.64.0 *)
~actions: action list;
(* since 1.47.0 but not created by the backend (nor used by the CLI) *)
?ci_config_from_cloud: ci_config_from_cloud option;
}

(* ----------------------------- *)
(* TODO a better CI config from cloud *)
(* ----------------------------- *)
Expand Down
Loading
Loading