Skip to content

Commit

Permalink
NEOS-1703: add support for entity specific anonymization (#3123)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzelei authored Jan 10, 2025
1 parent a8689a7 commit 04a26cc
Show file tree
Hide file tree
Showing 13 changed files with 1,038 additions and 776 deletions.
1,239 changes: 633 additions & 606 deletions backend/gen/go/protos/mgmt/v1alpha1/transformer.pb.go

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion backend/protos/mgmt/v1alpha1/transformer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,12 @@ message TransformPiiText {
// If not provided, will use a default language if specified on the server.
// Depending on the server configuration, may attempt to autodetect from input.
optional string language = 6;

// A map of entity names to anonymizer configurations.
// The key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.
// The value in this map corresponds to the anonymizer configuration to use for that entity.
// If not provided, the default_anonymizer will be used.
map<string, PiiAnonymizer> entity_anonymizers = 7;
}

message PiiDenyRecognizer {
Expand All @@ -280,7 +286,7 @@ message PiiAnonymizer {
}

message Replace {
// The value to replace. If not provided, a template token of the anonymizer is used.
// The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)
optional string value = 1;
}
message Redact {}
Expand Down
26 changes: 25 additions & 1 deletion docs/openapi/mgmt/v1alpha1/anonymization.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ components:
value:
type: string
title: value
description: The value to replace. If not provided, a template token of the anonymizer is used.
description: 'The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)'
title: Replace
additionalProperties: false
mgmt.v1alpha1.PiiDenyRecognizer:
Expand Down Expand Up @@ -1237,9 +1237,33 @@ components:
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
entityAnonymizers:
type: object
title: entity_anonymizers
additionalProperties:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
description: |-
A map of entity names to anonymizer configurations.
The key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.
The value in this map corresponds to the anonymizer configuration to use for that entity.
If not provided, the default_anonymizer will be used.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry:
type: object
properties:
key:
type: string
title: key
value:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
title: EntityAnonymizersEntry
additionalProperties: false
mgmt.v1alpha1.TransformString:
type: object
allOf:
Expand Down
26 changes: 25 additions & 1 deletion docs/openapi/mgmt/v1alpha1/job.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4284,7 +4284,7 @@ components:
value:
type: string
title: value
description: The value to replace. If not provided, a template token of the anonymizer is used.
description: 'The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)'
title: Replace
additionalProperties: false
mgmt.v1alpha1.PiiDenyRecognizer:
Expand Down Expand Up @@ -5033,9 +5033,33 @@ components:
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
entityAnonymizers:
type: object
title: entity_anonymizers
additionalProperties:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
description: |-
A map of entity names to anonymizer configurations.
The key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.
The value in this map corresponds to the anonymizer configuration to use for that entity.
If not provided, the default_anonymizer will be used.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry:
type: object
properties:
key:
type: string
title: key
value:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
title: EntityAnonymizersEntry
additionalProperties: false
mgmt.v1alpha1.TransformString:
type: object
allOf:
Expand Down
26 changes: 25 additions & 1 deletion docs/openapi/mgmt/v1alpha1/transformer.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ components:
value:
type: string
title: value
description: The value to replace. If not provided, a template token of the anonymizer is used.
description: 'The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)'
title: Replace
additionalProperties: false
mgmt.v1alpha1.PiiDenyRecognizer:
Expand Down Expand Up @@ -1754,9 +1754,33 @@ components:
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
entityAnonymizers:
type: object
title: entity_anonymizers
additionalProperties:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
description: |-
A map of entity names to anonymizer configurations.
The key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.
The value in this map corresponds to the anonymizer configuration to use for that entity.
If not provided, the default_anonymizer will be used.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry:
type: object
properties:
key:
type: string
title: key
value:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
title: EntityAnonymizersEntry
additionalProperties: false
mgmt.v1alpha1.TransformString:
type: object
allOf:
Expand Down
26 changes: 25 additions & 1 deletion docs/openapi/neosync.mgmt.v1alpha1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4706,7 +4706,7 @@ components:
title: value
description: >-
The value to replace. If not provided, a template token of the
anonymizer is used.
anonymizer is used (e.g. A PERSON entity is replaced with: <PERSON>)
title: Replace
additionalProperties: false
mgmt.v1alpha1.PiiDenyRecognizer:
Expand Down Expand Up @@ -5085,9 +5085,33 @@ components:
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
entityAnonymizers:
type: object
title: entity_anonymizers
additionalProperties:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
description: |-
A map of entity names to anonymizer configurations.
The key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.
The value in this map corresponds to the anonymizer configuration to use for that entity.
If not provided, the default_anonymizer will be used.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry:
type: object
properties:
key:
type: string
title: key
value:
allOf:
- title: value
- $ref: '#/components/schemas/mgmt.v1alpha1.PiiAnonymizer'
title: EntityAnonymizersEntry
additionalProperties: false
mgmt.v1alpha1.TransformString:
type: object
allOf:
Expand Down
30 changes: 17 additions & 13 deletions docs/protos/mgmt/v1alpha1/transformer.proto.mdx

Large diffs are not rendered by default.

50 changes: 49 additions & 1 deletion docs/protos/proto_docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -1686,7 +1686,7 @@
"fields": [
{
"name": "value",
"description": "The value to replace. If not provided, a template token of the anonymizer is used.",
"description": "The value to replace. If not provided, a template token of the anonymizer is used (e.g. A PERSON entity is replaced with: \u003cPERSON\u003e)",
"label": "optional",
"type": "string",
"longType": "string",
Expand Down Expand Up @@ -2247,6 +2247,54 @@
"isoneof": true,
"oneofdecl": "_language",
"defaultValue": ""
},
{
"name": "entity_anonymizers",
"description": "A map of entity names to anonymizer configurations.\nThe key in this map corresponds to a recognized entity (e.g. PERSON, PHONE_NUMBER) which can be found via the GetPiiTextEntities() RPC method.\nThe value in this map corresponds to the anonymizer configuration to use for that entity.\nIf not provided, the default_anonymizer will be used.",
"label": "repeated",
"type": "EntityAnonymizersEntry",
"longType": "TransformPiiText.EntityAnonymizersEntry",
"fullType": "mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry",
"ismap": true,
"isoneof": false,
"oneofdecl": "",
"defaultValue": ""
}
]
},
{
"name": "EntityAnonymizersEntry",
"longName": "TransformPiiText.EntityAnonymizersEntry",
"fullName": "mgmt.v1alpha1.TransformPiiText.EntityAnonymizersEntry",
"description": "",
"hasExtensions": false,
"hasFields": true,
"hasOneofs": false,
"extensions": [],
"fields": [
{
"name": "key",
"description": "",
"label": "",
"type": "string",
"longType": "string",
"fullType": "string",
"ismap": false,
"isoneof": false,
"oneofdecl": "",
"defaultValue": ""
},
{
"name": "value",
"description": "",
"label": "",
"type": "PiiAnonymizer",
"longType": "PiiAnonymizer",
"fullType": "mgmt.v1alpha1.PiiAnonymizer",
"ismap": false,
"isoneof": false,
"oneofdecl": "",
"defaultValue": ""
}
]
},
Expand Down
14 changes: 12 additions & 2 deletions frontend/packages/sdk/src/client/mgmt/v1alpha1/transformer_pb.ts

Large diffs are not rendered by default.

33 changes: 26 additions & 7 deletions internal/ee/transformers/functions/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,11 @@ func TransformPiiText(

analysisResults := removeAllowedPhrases(*analyzeResp.JSON200, value, config.GetAllowedPhrases())

defaultAnon, ok, err := getDefaultAnonymizer(config.GetDefaultAnonymizer())
anonymizers, err := buildAnonymizers(config)
if err != nil {
return "", fmt.Errorf("unable to build default anonymizer: %w", err)
}
anonymizers := map[string]presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties{}
if ok {
anonymizers["DEFAULT"] = *defaultAnon
return "", fmt.Errorf("unable to build anonymizers: %w", err)
}

anonResp, err := anonymizeClient.PostAnonymizeWithResponse(ctx, presidioapi.AnonymizeRequest{
AnalyzerResults: presidioapi.ToAnonymizeRecognizerResults(analysisResults),
Text: value,
Expand All @@ -66,6 +63,28 @@ func TransformPiiText(
return *anonResp.JSON200.Text, nil
}

func buildAnonymizers(config *mgmtv1alpha1.TransformPiiText) (map[string]presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties, error) {
output := map[string]presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties{}
defaultAnon, ok, err := toPresidioAnonymizerConfig(config.GetDefaultAnonymizer())
if err != nil {
return nil, fmt.Errorf("unable to build default anonymizer: %w", err)
}
if ok {
output["DEFAULT"] = *defaultAnon
}
for entity, anonymizer := range config.GetEntityAnonymizers() {
ap, ok, err := toPresidioAnonymizerConfig(anonymizer)
if err != nil {
return nil, fmt.Errorf("unable to build entity %s anonymizer: %w", entity, err)
}
if ok {
output[entity] = *ap
}
}

return output, nil
}

func removeAllowedPhrases(
results []presidioapi.RecognizerResultWithAnaysisExplanation,
text string,
Expand Down Expand Up @@ -103,7 +122,7 @@ func buildAdhocRecognizers(dtos []*mgmtv1alpha1.PiiDenyRecognizer) []presidioapi
return output
}

func getDefaultAnonymizer(dto *mgmtv1alpha1.PiiAnonymizer) (*presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties, bool, error) {
func toPresidioAnonymizerConfig(dto *mgmtv1alpha1.PiiAnonymizer) (*presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties, bool, error) {
switch cfg := dto.GetConfig().(type) {
case *mgmtv1alpha1.PiiAnonymizer_Redact_:
ap := &presidioapi.AnonymizeRequest_Anonymizers_AdditionalProperties{}
Expand Down
Loading

0 comments on commit 04a26cc

Please sign in to comment.