trial_datastore.proto

// Copyright 2021 AI Redefined Inc. <dev+cogment@ai-r.com>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package cogmentAPI;

import "cogment/api/common.proto";

// API for trial datastore, it stores trial samples
service TrialDatastoreSP {
  // -- Online operations (e.g. during training)

  // Retrieve the trials matching the given request
  rpc RetrieveTrials(RetrieveTrialsRequest) returns (RetrieveTrialsReply) {}
  // Retrieve samples from matching trials, trials can be ongoing
  rpc RetrieveSamples(RetrieveSamplesRequest) returns (stream RetrieveSampleReply) {}

  // -- Offline trial data managmement operations

  // Add a trial to the activity logger, once a trial is added, samples can be retrieved
  // Expected header metadata
  // - trial-id
  rpc AddTrial(AddTrialRequest) returns (AddTrialReply) {}
  // Add samples to a trial
  // Expected header metadata
  // - trial-id
  rpc AddSample(stream AddSampleRequest) returns (AddSamplesReply) {}
  // Delete the trials matching the given request, on failure no trial is deleted
  rpc DeleteTrials(DeleteTrialsRequest) returns (DeleteTrialsReply) {}
}

// --- Reply/Request Messages

message RetrieveTrialsRequest {
  repeated string trial_ids = 1; // List of desired trial ids, if empty all trials are returned

  uint32 timeout = 2; // Wait for trials that might be created within this duration (in ms).

  uint32 trials_count = 3; // Desired number of trial in the reply, 0 means no limit
  string trial_handle = 4; // Leave empty for the initial request, use previously
                           // received `RetrieveTrialsReply.next_trial_handle` and
                           // provide otherwise the same request to access the next trials
}

message RetrieveTrialsReply {
  repeated StoredTrialInfo trial_infos = 1;

  string next_trial_handle = 2;
}

message RetrieveSamplesRequest {
  // Defines a list of filter that returned samples will all match
  repeated string trial_ids = 1;     // List of desired trial ids, if empty no data will be returned
  repeated string actor_names = 2;   // List of desired actor names, if empty all actor samples will be returned
  repeated string actor_classes = 3; // List of desired actor classes, if empty all actor samples will be returned
  repeated string actor_implementations = 4; // List of desired actor implementations, if empty all actor samples will be returned
  repeated StoredTrialSampleField selected_sample_fields = 5; // Which fields of `StoredTrialSample.ActorSample` should be returned, if empty all fields are returned
}

message RetrieveSampleReply {
  StoredTrialSample trial_sample = 1;
}

message DeleteTrialsRequest {
  repeated string trial_ids = 1;
}

message DeleteTrialsReply {}

message AddTrialRequest {
  string user_id = 1;
  TrialParams trial_params = 2;
}

message AddTrialReply {}

message AddSampleRequest {
  StoredTrialSample trial_sample = 1;
}

message AddSamplesReply {}

// --- TrialDatastore main messages

message StoredTrialInfo {
  string trial_id = 1;
  TrialState last_state = 2; // Last known trial state
  string user_id = 3;
  uint32 samples_count = 4;
  TrialParams params = 5;
}

message StoredTrialSample {
  // Represents a sample generated by a trial at a given tick.
  // Data in this sample can be filtered (as specified in `RetrieveSamplesRequest`)
  string user_id = 1;
  string trial_id = 2;
  uint64 tick_id = 3;
  fixed64 timestamp = 4; // When tick occured as a nanosecond unix timestamp
  TrialState state = 5;

  repeated StoredTrialActorSample actor_samples = 6;

  repeated bytes payloads = 7; // Grouped payloads of the observations, actions, rewards and messages.
}

enum StoredTrialSampleField {
  STORED_TRIAL_SAMPLE_FIELD_UNKNOWN = 0;
  STORED_TRIAL_SAMPLE_FIELD_OBSERVATION = 1;
  STORED_TRIAL_SAMPLE_FIELD_ACTION = 2;
  STORED_TRIAL_SAMPLE_FIELD_REWARD = 3;
  STORED_TRIAL_SAMPLE_FIELD_RECEIVED_REWARDS = 4;
  STORED_TRIAL_SAMPLE_FIELD_SENT_REWARDS = 5;
  STORED_TRIAL_SAMPLE_FIELD_RECEIVED_MESSAGES = 6;
  STORED_TRIAL_SAMPLE_FIELD_SENT_MESSAGES = 7;
}

message StoredTrialActorSample {
  // Represents a sample generated by an actor in a trial at a given tick
  // Only makes sense as a part of StoredTrialSample
  // Actor are referenced by their index in the trials' params `TrialParams.actors` field.
  // Where it make sense, the actor index can be set to -1 to reference the trial's environment.
  //
  // Payloads (ie observations data, actions data, reward user data and messages payloads)
  // are grouped in the `payloads` field and referenced by their index in this field

  uint32 actor = 1; // Index of the actor

  // Observation received by the actor at t
  optional uint32 observation = 2; // Index in the payload of the parent StoredTrialSample

  // Action performed by the actor at t
  optional uint32 action = 3; // Index in the payload of the parent StoredTrialSample

  // Reward received by the actor for t
  optional float reward = 4; // Aggregated reward value
  repeated StoredTrialActorSampleReward received_rewards = 6;

  // Rewards sent by the actor for t
  repeated StoredTrialActorSampleReward sent_rewards = 7;

  // Messages received by the actor before t+1
  repeated StoredTrialActorSampleMessage received_messages = 8;

  // Messages sent by the actor before t+1
  repeated StoredTrialActorSampleMessage sent_messages = 9;
}

message StoredTrialActorSampleReward {
  // Represents a reward sent or received by an actor
  // Only makes sense as a part of StoredTrialActorSample
  int32 sender = 1; // Index of the actor, -1 for the environment, ignored for sent rewards
  int32 receiver = 2; // Index of the actor, -1 for the environment, ignored for received rewards
  float reward = 4;
  float confidence = 5;
  optional uint32 user_data = 6; // Index in the payload of the parent StoredTrialSample
}

message StoredTrialActorSampleMessage {
  // Represents a message sent or received by an actor
  // Only makes sense as a part of StoredTrialActorSample
  int32 sender = 1; // Index of the actor, -1 for the environment, ignored for sent messages
  int32 receiver = 2; // Index of the actor, -1 for the environment, ignored for received messages
  uint32 payload = 3; // Index in the payload of the parent StoredTrialSample
}