diff --git a/Dockerfile.release b/Dockerfile.release index 35d2a659c..efd5d2f44 100644 --- a/Dockerfile.release +++ b/Dockerfile.release @@ -8,7 +8,6 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends ca-certificates && \ rm -rf /var/lib/apt/lists/* - # Use `1025` G/UID so users can switch between this and `heighliner` image without a need to chown the files. RUN groupadd -g 1025 pocket && useradd -u 1025 -g pocket -m -s /sbin/nologin pocket diff --git a/api/poktroll/application/event.pulsar.go b/api/poktroll/application/event.pulsar.go index 043aa2264..290bbde55 100644 --- a/api/poktroll/application/event.pulsar.go +++ b/api/poktroll/application/event.pulsar.go @@ -3,11 +3,11 @@ package application import ( _ "cosmossdk.io/api/cosmos/base/v1beta1" + _ "github.com/pokt-network/poktroll/api/poktroll/shared" fmt "fmt" _ "github.com/cosmos/cosmos-proto" runtime "github.com/cosmos/cosmos-proto/runtime" _ "github.com/cosmos/gogoproto/gogoproto" - _ "github.com/pokt-network/poktroll/api/poktroll/shared" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoiface "google.golang.org/protobuf/runtime/protoiface" protoimpl "google.golang.org/protobuf/runtime/protoimpl" diff --git a/app/upgrades.go b/app/upgrades.go index ab88cc07d..59816f97c 100644 --- a/app/upgrades.go +++ b/app/upgrades.go @@ -12,6 +12,7 @@ import ( // so `cosmovisor` can automatically pull the binary from GitHub. var allUpgrades = []upgrades.Upgrade{ upgrades.Upgrade_0_0_4, + upgrades.Upgrade_0_0_9, } // setUpgrades sets upgrade handlers for all upgrades and executes KVStore migration if an upgrade plan file exists. diff --git a/app/upgrades/historical.go b/app/upgrades/historical.go index 0c9d83861..1801d865c 100644 --- a/app/upgrades/historical.go +++ b/app/upgrades/historical.go @@ -11,6 +11,7 @@ package upgrades import ( "context" + "fmt" storetypes "cosmossdk.io/store/types" upgradetypes "cosmossdk.io/x/upgrade/types" @@ -29,6 +30,7 @@ func defaultUpgradeHandler( configurator module.Configurator, ) upgradetypes.UpgradeHandler { return func(ctx context.Context, plan upgradetypes.Plan, vm module.VersionMap) (module.VersionMap, error) { + fmt.Println("Starting the migration in defaultUpgradeHandler.") return mm.RunMigrations(ctx, configurator, vm) } } @@ -87,3 +89,10 @@ var Upgrade_0_0_4 = Upgrade{ // No changes to the KVStore in this upgrade. StoreUpgrades: storetypes.StoreUpgrades{}, } + +// Upgrade_0_0_9 is a small upgrade on TestNet. +var Upgrade_0_0_9 = Upgrade{ + PlanName: "v0.0.9", + CreateUpgradeHandler: defaultUpgradeHandler, + StoreUpgrades: storetypes.StoreUpgrades{}, +} diff --git a/docusaurus/docs/develop/developer_guide/chain_halt_troubleshooting.md b/docusaurus/docs/develop/developer_guide/chain_halt_troubleshooting.md index 72da1f4f3..4f5796793 100644 --- a/docusaurus/docs/develop/developer_guide/chain_halt_troubleshooting.md +++ b/docusaurus/docs/develop/developer_guide/chain_halt_troubleshooting.md @@ -8,13 +8,15 @@ title: Chain Halt Troubleshooting - [Understanding Chain Halts](#understanding-chain-halts) - [Definition and Causes](#definition-and-causes) - [Impact on Network](#impact-on-network) -- [Troubleshooting Process](#troubleshooting-process) +- [Troubleshooting `wrong Block.Header.AppHash`](#troubleshooting-wrong-blockheaderapphash) - [Step 1: Identifying the Issue](#step-1-identifying-the-issue) - [Step 2: Collecting Node Data](#step-2-collecting-node-data) - [Step 3: Analyzing Discrepancies](#step-3-analyzing-discrepancies) - [Step 4: Decoding and Interpreting Data](#step-4-decoding-and-interpreting-data) - [Step 5: Comparing Records](#step-5-comparing-records) - [Step 6: Investigation and Resolution](#step-6-investigation-and-resolution) +- [Troubleshooting `wrong Block.Header.LastResultsHash`](#troubleshooting-wrong-blockheaderlastresultshash) +- [Syncing from genesis](#syncing-from-genesis) ## Understanding Chain Halts @@ -40,7 +42,7 @@ Chain halts can have severe consequences for the network: Given these impacts, swift and effective troubleshooting is crucial to maintain network health and user trust. -## Troubleshooting Process +## Troubleshooting `wrong Block.Header.AppHash` ### Step 1: Identifying the Issue @@ -94,3 +96,20 @@ Based on the identified discrepancies: 2. Develop a fix or patch to address the issue. 3. If necessary, initiate discussions with the validator community to reach social consensus on how to proceed. 4. Implement the agreed-upon solution and monitor the network closely during and after the fix. + +## Troubleshooting `wrong Block.Header.LastResultsHash` + +Errors like the following can occur from using the incorrect binary version at a certain height. + +```bash +reactor validation error: wrong Block.Header.LastResultsHash. +``` + +The solution is to use the correct binary version to sync the full node at the correct height. + +Tools like [cosmosvisor](https://docs.cosmos.network/v0.45/run-node/cosmovisor.html) make it easier +to sync a node from genesis, using the appropriate binary for each range of block heights. + +## Syncing from genesis + +If you're encountering any of the errors mentioned above while trying to sync the historical blocks - make sure you're running the correct version of the binary in accordance with this table [Upgrade List](../../protocol/upgrades/upgrade_list.md). diff --git a/docusaurus/docs/develop/developer_guide/recovery_from_chain_halt.md b/docusaurus/docs/develop/developer_guide/recovery_from_chain_halt.md new file mode 100644 index 000000000..03a399052 --- /dev/null +++ b/docusaurus/docs/develop/developer_guide/recovery_from_chain_halt.md @@ -0,0 +1,173 @@ +--- +sidebar_position: 7 +title: Chain Halt Recovery +--- + +## Chain Halt Recovery + +This document describes how to recover from a chain halt. It assumes the cause of +the chain halt has been identified, the new release has been created, and verified +function correctly. + +:::tip +See [Chain Halt Troubleshooting](./chain_halt_troubleshooting.md) for more information on identifying the cause of a chain halt. +::: + +- [Background](#background) +- [Resolving halts during a network upgrade](#resolving-halts-during-a-network-upgrade) + - [Manual binary replacement (preferred)](#manual-binary-replacement-preferred) + - [Rollback, fork and upgrade](#rollback-fork-and-upgrade) + - [Step 5: Data rollback - retrieving snapshot at a specific height](#step-5-data-rollback---retrieving-snapshot-at-a-specific-height) + - [Step 6: Validator Isolation - risks](#step-6-validator-isolation---risks) + +## Background + +Pocket network is built on top of `cosmos-sdk`, which utilizes the CometBFT consensus engine. +Byzantine Fault Tolerant (BFT) consensus algorithm requires that **at least** 2/3 of Validators +are online and voting for the same block to reach a consensus. In order to maintain liveness +and avoid a chain-halt, we need the majority (> 2/3) of Validators to participate +and use the same version of the software. + +## Resolving halts during a network upgrade + +If the halt is caused by the network upgrade, it is possible the solution can be as simple as +skipping an upgrade (i.e. `unsafe-skip-upgrade`) and creating a new (fixed) upgrade. + +Read more about [upgrade contingency plans](../../protocol/upgrades/contigency_plans.md). + +### Manual binary replacement (preferred) + +:::note + +This is the preferred way of resolving consensus-breaking issues. + +**Significant side effect**: this breaks an ability to sync from genesis **without manual interventions**. +For example, when a consensus-breaking issue occurs on a node that is synching from the first block, node operators need +to manually replace the binary with the new one. There are efforts underway to mitigate this issue, including +configuration for `cosmovisor` that could automate the process. + + + +::: + +Since the chain is not moving, **it is impossible** to issue an automatic upgrade with an upgrade plan. Instead, +we need **social consensus** to manually replace the binary and get the chain moving. + +1. Prepare and verify a new binary that addresses the consensus-breaking issue. +2. Reach out to the community and validators so they can upgrade the binary manually. + :::warning UNKNOWN, NEED TO INVESTIGATE + + We might need to coordinate the timing of when the nodes should be started. In Tendermint version of Pocket Network + (Morse), this was necessary to sync consensus rounds and steps, getting the chain moving. It might not be a + requirement anymore, but we need to double-check. [More information](https://docs.cometbft.com/v1.0/spec/consensus/consensus). +3. Update [the documentation](../../protocol/upgrades/upgrade_list.md) to include a range a height when the binary needs + to be repleced. Consider a configuration change for `cosmovisor` so it would automatically replace the binary when + synching from genesis. + + +```mermaid +sequenceDiagram + participant DevTeam + participant Community + participant Validators + participant Documentation + participant Network + + DevTeam->>DevTeam: Prepare and verify new binary + DevTeam->>Community: Announce new binary and instructions + DevTeam->>Validators: Notify validators to upgrade manually + Validators->>Validators: Manually replace the binary + Validators->>Network: Restart nodes with new binary + DevTeam->>Documentation: Update upgrade documentation + Validators->>Network: Network resumes operation + +``` + +### Rollback, fork and upgrade + +:::info + +These instructions are only relevant to Pocket Network's Shannon release. + +We do not currently use `x/gov` and on-chain voting for upgrades. + +Instead, our DAO votes on upgrades off-chain and the Foundation executes +transactions on their behalf. + +::: + +**Performing a rollback is analogous to forking the network at the older height.** + +This should be avoided unless absolutely necessary. + +However, if necessary, the instructions to follow are: + +1. Prepare & verify a new binary that addresses the consensus-breaking issue. +2. [Create a release](../../protocol/upgrades/release_process.md). +3. [Prepare an upgrade transaction](../../protocol/upgrades/upgrade_procedure.md#writing-an-upgrade-transaction) to the new version. +4. Get the Validator set off the network **3 blocks** prior to the height of the chain halt. For example: + - Assume an issue at height `103` + - Get the validator set at height `100` + - Submit an upgrade transaction at `101` + - Upgrade the chain at height `102` + - Avoid the issue at height `103` +5. Ensure all validators rolled back to the same height and use the same snapshot - ([how to get the snapshot](#step-5-data-rollback---retrieving-snapshot-at-a-specific-height)) + - The snapshot should be imported into each Validator's data directory + - This is necessary to ensure data continuity and prevent forks. +6. Isolate the validator set from full nodes - ([why this is necessary](#step-6-validator-isolation---risks)) + - This is necessary to avoid full nodes from gossiping blocks that have been rolled back. + - This may require using a firewall or a private network + - Validators should only be gossip blocks amongst themselves. +7. Start the network and perform the upgrade. For example, reiterating the process above: + - Start all Validators at height `100` + - On block `101`, submit the `MsgSoftwareUpgrade` transaction with a `Plan.height` set to `102`. + - `x/upgrade` will perform the upgrade in the `EndBlocker` of block `102` + - If using `cosmosvisor`, the node will wait to replace the binary +8. Wait for the network to reach the height of the previous ledger (`104`+) +9. Allow validators to open their network to full nodes again. + - Note that full nodes will need to perform the rollback or use a snapshot as well. + +```mermaid +sequenceDiagram + participant DevTeam + participant Foundation + participant Validators + participant FullNodes + participant Network + + DevTeam->>DevTeam: Prepare & verify new binary + DevTeam->>DevTeam: Create a release + Validators->>Validators: Roll back to height before issue or import snapshot + Validators->>Validators: Isolate from Full Nodes + Foundation->>Validators: Distribute upgrade transaction + Validators->>Network: Start network and perform upgrade + Validators->>Network: Wait until over consensus-breaking height + Validators->>FullNodes: Open network connections + FullNodes->>Network: Sync with updated network + Validators->>Network: Network resumes operation + +``` + +#### Step 5: Data rollback - retrieving snapshot at a specific height + +There are two ways to get a snapshot from a prior height: + +1. Execute + ```bash + poktrolld rollback --hard + ``` + repeately until the command responds with the desired block number. +2. Use a snapshot and start the node with `--halt-height=100` parameter so it only syncs up to certain height and then + gracefully shuts down. Add this argument to `poktrolld start` like this: + ```bash + poktrolld start --halt-height=100 + ``` + + +#### Step 6: Validator Isolation - risks + +Having at least one node that has knowledge of the forking ledger can jeopardize the whole process. In particular, the +following errors in logs are the sign of the nodes populating existing blocks: + - `found conflicting vote from ourselves; did you unsafe_reset a validator?` + - `conflicting votes from validator` + diff --git a/docusaurus/docs/protocol/upgrades/contigency_plans.md b/docusaurus/docs/protocol/upgrades/contigency_plans.md new file mode 100644 index 000000000..32b254cad --- /dev/null +++ b/docusaurus/docs/protocol/upgrades/contigency_plans.md @@ -0,0 +1,79 @@ +--- +title: Failed upgrade contingency plan +sidebar_position: 5 +--- + +:::tip + +This documentation covers failed upgrade contingency for `poktroll` - a `cosmos-sdk` based chain. + +While this can be helpful for other blockchain networks, it is not guaranteed to work for other chains. + +::: + +## Contingency plans + +There's always a chance the upgrade will fail. + +This document is intended to help you recover without significant downtime. + +- [Option 0: The bug is discovered before the upgrade height is reached](#option-0-the-bug-is-discovered-before-the-upgrade-height-is-reached) +- [Option 1: The upgrade height is reached and the migration didn't start](#option-1-the-upgrade-height-is-reached-and-the-migration-didnt-start) +- [Option 2: The migration is stuck](#option-2-the-migration-is-stuck) + - [Documentation and scripts to update](#documentation-and-scripts-to-update) +- [Option 3: The network is stuck at the future height after the upgrade](#option-3-the-network-is-stuck-at-the-future-height-after-the-upgrade) + +### Option 0: The bug is discovered before the upgrade height is reached + +**Cancel the upgrade plan!** + +See the instructions of [how to do that here](./upgrade_procedure.md#cancelling-the-upgrade-plan). + +### Option 1: The upgrade height is reached and the migration didn't start + +If the nodes on the network stopped at the upgrade height and the migration did not +start yet (i.e. there are no logs indicating the upgrade handler and store migrations are being executed), +we mist gather social consensus to restart validators with the `--unsafe-skip-upgrade=$upgradeHeightNumber` flag. + +This will skip the upgrade process, allowing the chain to continue and the protocol team to plan another release. + +`--unsafe-skip-upgrade` simply skips the upgrade handler and store migrations. +The chain continues as if the upgrade plan was never set. +The upgrade needs to be fixed, and then a new plan needs to be submitted to the network. + +:::caution + +`--unsafe-skip-upgrade` needs to be documented and added to the scripts so the next time somebody tries to sync the network from genesis - they will automatically skip the failed upgrade. + + + +::: + +### Option 2: The migration is stuck + +If the migration is stuck, there's always a chance the state has been mutated for +the upgrade but the migration didn't complete. + +In such a case, we need to: + +- Roll back validators to the backup. A snapshot is taken by `cosmovisor` automatically prior to upgrade, + if `UNSAFE_SKIP_BACKUP` is set to `false` (which is a default and recommended value - + [more information](https://docs.cosmos.network/main/build/tooling/cosmovisor#command-line-arguments-and-environment-variables)). +- All full nodes and validators on the network: skip the upgrade handler and store migrations be adding `--unsafe-skip-upgrade=$upgradeHeightNumber` + argument to your `poktroll start` command. Like this: + ```bash + poktrolld start --unsafe-skip-upgrade=$upgradeHeightNumber + ``` +- Protocol team: document and add `--unsafe-skip-upgrade=$upgradeHeightNumber` to the scripts so the next time somebody + tries to sync the network from genesis they will automatically skip the failed upgrade. [Documentation and scripts to update](#documentation-and-scripts-to-update) +- Resolve the issue with an upgrade and schedule another plan. + +#### Documentation and scripts to update + +- The [upgrade list](./upgrade_list.md) should reflect a failed upgrade and provide a range of heights that served by each version. +- Systemd service should include`--unsafe-skip-upgrade=$upgradeHeightNumber` argument in its start command [here](https://github.com/pokt-network/poktroll/blob/main/tools/installer/full-node.sh). +- [Helm chart](https://github.com/pokt-network/helm-charts/blob/main/charts/poktrolld/templates/StatefulSet.yaml) (consider exposing via a `values.yaml` file) + +### Option 3: The network is stuck at the future height after the upgrade + +This should be treated as a consensus or non-determinism bug that is unrelated to the upgrade. See [Recovery From Chain Halt](../../develop/developer_guide/recovery_from_chain_halt.md) for more information on how to handle such issues. diff --git a/docusaurus/docs/protocol/upgrades/release_process.md b/docusaurus/docs/protocol/upgrades/release_process.md index 2845f4c84..398d56c05 100644 --- a/docusaurus/docs/protocol/upgrades/release_process.md +++ b/docusaurus/docs/protocol/upgrades/release_process.md @@ -16,13 +16,6 @@ sidebar_position: 4 This document is for the Pocket Network protocol team's internal use only. ::: -- [1. Determine if the Release is Consensus-Breaking](#1-determine-if-the-release-is-consensus-breaking) -- [2. Create a GitHub Release](#2-create-a-github-release) - - [Legend](#legend) -- [3. Write an Upgrade Plan](#3-write-an-upgrade-plan) -- [4. Issue Upgrade on TestNet](#4-issue-upgrade-on-testnet) -- [5. Issue Upgrade on MainNet](#5-issue-upgrade-on-mainnet) - ### 1. Determine if the Release is Consensus-Breaking :::note @@ -59,12 +52,18 @@ You can find an example [here](https://github.com/pokt-network/poktroll/releases ```text ## Protocol Upgrades + + - **Planned Upgrade:** ❌ Not applicable for this release. - **Breaking Change:** ❌ Not applicable for this release. - **Manual Intervention Required:** ✅ Yes, but only for Alpha TestNet participants. If you are participating, please follow the [instructions provided here](https://dev.poktroll.com/operate/quickstart/docker_compose_walkthrough#restarting-a-full-node-after-re-genesis-) for restarting your full node after re-genesis. - **Upgrade Height:** ❌ Not applicable for this release. ## What's Changed + ``` diff --git a/docusaurus/docs/protocol/upgrades/upgrade_list.md b/docusaurus/docs/protocol/upgrades/upgrade_list.md index f4b2c8d19..1df1dd52a 100644 --- a/docusaurus/docs/protocol/upgrades/upgrade_list.md +++ b/docusaurus/docs/protocol/upgrades/upgrade_list.md @@ -8,7 +8,7 @@ sidebar_position: 1 The tables below provide a list of past and upcoming protocol upgrades. For more detailed information about what upgrades are, how they work, and what changes they bring to the protocol, please refer to our [upgrade overview page](./protocol_upgrades.md). - [Legend](#legend) -- [TestNet](#testnet) +- [Alpha TestNet](#alpha-testnet) - [MainNet](#mainnet) ## Legend @@ -18,20 +18,15 @@ The tables below provide a list of past and upcoming protocol upgrades. For more - ❓ - Unknown/To Be Determined - ⚠️ - Warning/Caution Required -## TestNet - -:::warning -This table is currently incomplete and does not include all protocol upgrades. Our recent TestNet upgrades, which were performed via a regenesis, are not listed here. -::: +## Alpha TestNet -| Version | Planned | Breaking | Requires Manual Intervention | Upgrade Height | -| ------------------------------------------------------------------------ | :-----: | :------: | :---------------------------------: | -------------- | -| [`v0.0.7`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.7) | ❓ | ❓ | ✅ (Alpha TestNet Participants Only) | ❓ | -| [`v0.0.6`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.6) | ❓ | ❓ | ✅ (Alpha TestNet Participants Only) | ❓ | -| [`v0.0.5`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.5) | ❓ | ❓ | ✅ (Alpha TestNet Participants Only) | ❓ | -| [`v0.0.4`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.4) | ❓ | ❓ | ✅ (Alpha TestNet Participants Only) | ❓ | +| Version | Planned | Breaking | Requires Manual Intervention | Upgrade Height | +| ---------------------------------------------------------------------------- | :-----: | :------: | :-------------------------------: | -------------- | +| [`v0.0.9-3`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.9-3) | ❌ | ✅ | ⚠️ Alpha TestNet Participants Only | `17102` | +| [`v0.0.9`](https://github.com/pokt-network/poktroll/releases/tag/v0.0.9) | ❓ | ❓ | N/A: genesis version | ❓ | + ## MainNet diff --git a/docusaurus/docs/protocol/upgrades/upgrade_procedure.md b/docusaurus/docs/protocol/upgrades/upgrade_procedure.md index b1098f509..76ec0a824 100644 --- a/docusaurus/docs/protocol/upgrades/upgrade_procedure.md +++ b/docusaurus/docs/protocol/upgrades/upgrade_procedure.md @@ -12,9 +12,13 @@ This page describes the protocol upgrade process, which is internal to the proto - [When is an Upgrade Warranted?](#when-is-an-upgrade-warranted) - [Implementing the Upgrade](#implementing-the-upgrade) - [Writing an Upgrade Transaction](#writing-an-upgrade-transaction) + - [Validate the URLs (live network only)](#validate-the-urls-live-network-only) - [Submitting the upgrade on-chain](#submitting-the-upgrade-on-chain) +- [Cancelling the upgrade plan](#cancelling-the-upgrade-plan) - [Testing the Upgrade](#testing-the-upgrade) - [LocalNet](#localnet) + - [LocalNet Upgrade tl;dr](#localnet-upgrade-tldr) + - [LocalNet Upgrade Full Example Walkthrough](#localnet-upgrade-full-example-walkthrough) - [DevNet](#devnet) - [TestNet](#testnet) - [Mainnet](#mainnet) @@ -39,7 +43,7 @@ An upgrade is necessary whenever there's an API, State Machine, or other Consens 1. When a new version includes a consensus-breaking change, plan for the next protocol upgrade: - If there's a change to a specific module, bump that module's consensus version. - Note any potential parameter changes to include in the upgrade. -2. Create a new upgrade in `app/upgrades`: +2. Create a new upgrade in `app/upgrades`. **THIS MUST BE DONE** even if there are no state changes. - Refer to `historical.go` for past upgrades and examples. - Consult Cosmos-sdk documentation on upgrades for additional guidance [here](https://docs.cosmos.network/main/build/building-apps/app-upgrade) and [here](https://docs.cosmos.network/main/build/modules/upgrade). @@ -67,22 +71,68 @@ An upgrade transaction includes a [Plan](https://github.com/cosmos/cosmos-sdk/bl - `name`: Name of the upgrade. It should match the `VersionName` of `upgrades.Upgrade`. - `height`: The height at which an upgrade should be executed and the node will be restarted. -- `info`: While this field can theoretically contain any information about the upgrade, in practice, `cosmovisor`uses it to obtain information about the binaries. When`cosmovisor` is configured to automatically download binaries, it will pull the binary from the link provided in this field and perform a hash verification (which is optional). +- `info`: Can be empty. **Only needed for live networks where we want cosmovisor to upgrade nodes automatically**. + +:::tip + +When `cosmovisor` is configured to automatically download binaries, it will pull the binary from the link provided in this field and perform a hash verification (which is also optional). We only know the hashes **AFTER** the release has been cut and CI created artifacts for this version. + +::: + +### Validate the URLs (live network only) + +The URLs of the binaries contain checksums. It is critical to ensure they are correct. +Otherwise Cosmovisor won't be able to download the binaries and go through the upgrade. + +The command below (using toold build by the authors of Cosmosvisor) can be used to achieve the above: + +```bash +jq -r '.body.messages[0].plan.info | fromjson | .binaries[]' $PATH_TO_UPGRADE_TRANSACTION_JSON | while IFS= read -r url; do + go-getter "$url" . +done +``` + +The output should look like this: + +```text +2024/09/24 12:40:40 success! +2024/09/24 12:40:42 success! +2024/09/24 12:40:44 success! +2024/09/24 12:40:46 success! +``` + +:::tip + +`go-getter` can be installed using the following command: + +```bash +go install github.com/hashicorp/go-getter/cmd/go-getter@latest +``` + +::: ## Submitting the upgrade on-chain The `MsgSoftwareUpgrade` can be submitted using the following command: ```bash -poktrolld tx authz exec PATH_TO_TRANSACTION_JSON --from pnf +poktrolld tx authz exec $PATH_TO_UPGRADE_TRANSACTION_JSON --from=pnf ``` -If the transaction has been accepted, upgrade plan can be viewed with this command: +If the transaction has been accepted, the upgrade plan can be viewed with this command: ```bash poktrolld query upgrade plan ``` +## Cancelling the upgrade plan + +It is possible to cancel the upgrade before the upgrade plan height is reached. To do so, execute the following transaction: + +```bash +poktrolld tx authz exec tools/scripts/upgrades/authz_cancel_upgrade_tx.json --gas=auto --from=pnf +``` + ## Testing the Upgrade :::warning @@ -91,18 +141,90 @@ Note that for local testing, `cosmovisor` won't pull the binary from the info fi ### LocalNet -LocalNet currently does not support `cosmovisor` and automatic upgrades. However, we have provided scripts to facilitate local testing in the `tools/scripts/upgrades` directory: +LocalNet **DOES NOT** support `cosmovisor` and automatic upgrades at the moment. + +However, **IT IS NOT NEEDED** to simulate and test the upgrade procedure. + +#### LocalNet Upgrade tl;dr + +1. Pull git repo with old version (separate directory) +2. Download release binary of the old version +3. Wipe LocalNet data and generate genesis using OLD version +4. Start node using anOLD binary +5. Write and submit an upgrade transaction on-chain +6. When the Upgrade Plan height is reached, stop the old node and run the new binary +7. Observe the behavior + +#### LocalNet Upgrade Full Example Walkthrough + +Testing an upgrade requires a network running on an old version. + +Ensure LocalNet is running using a binary from the [previous release you wish to upgrade **FROM**](https://github.com/pokt-network/poktroll/releases). We also want to provision the network using this version, which requires us to pull the specific git tag. + +1. Make a note of the version you want to test an upgrade **FROM**. This will be the **OLD** version. For example, let's imagine we're upgrading from `v0.0.9`. +2. Pull a new `poktroll` repo (will be used as an "old" version): + + ```bash + git clone https://github.com/pokt-network/poktroll.git poktroll-upgrade-old + cd poktroll-upgrade-old + git checkout v0.0.9 + + # Download the v0.0.9 binary: https://github.com/pokt-network/poktroll/releases + # CHANGE POKTROLLD_VERSION and ARCH + curl -L "https://github.com/pokt-network/poktroll/releases/download/${POKTROLLD_VERSION}/poktroll_linux_${ARCH}.tar.gz" | tar -zxvf - -C . + + # Validate the version + ./poktrolld version + 0.0.9 + ``` + +3. Stop LocalNet + + ```bash + make localnet_down + ``` + +4. Reset the data + + ```bash + ./poktrolld comet unsafe-reset-all + ``` + +5. Create new genesis using old version (from `poktroll-upgrade-old` dir) + + ```bash + make localnet_regenesis + ``` + +6. Start the network + + ```bash + ./poktrolld start + ``` + +7. [Write](#writing-an-upgrade-transaction) and [Submit](#submitting-the-upgrade-on-chain) a transaction. For example: + + ```bash + poktrolld tx authz exec tools/scripts/upgrades/local_test_v0.0.9-2.json --from=pnf` + ``` -1. Modify `tools/scripts/upgrades/authz_upgrade_tx_example_v0.0.4_height_30.json` to reflect the name of the upgrade and the height at which it should be scheduled. +8. Verify the plan is active -2. Check and update the `tools/scripts/upgrades/cosmovisor-start-node.sh` to point to the correct binaries: + ```bash + poktrolld query upgrade plan + ``` - - The old binary should be compiled to work before the upgrade. - - The new binary should contain the upgrade logic to be executed immediately after the node is started using the new binary. +9. Wait until the height is reached and the old node dies due to the error: `ERR UPGRADE "v0.0.9-2" NEEDED at height`, which is expected. +10. At this point, switch to the repo with the **NEW** version - the code you wish to upgrade the network **TO**. It might be a + `poktroll` repo you working on or a release tag. +11. In the **NEW VERSION GIT REPO** you can build binaries using `go_develop`, `ignite_release` and `ignite_release_extract_binaries` make targets. +12. Start the new version from the **NEW VERSION REPO**: -3. Run `bash tools/scripts/upgrades/cosmovisor-start-node.sh` to wipe the `~/.poktroll` directory and place binaries in the correct locations. + ```bash + ./release_binaries/poktroll_darwin_arm64 start + ``` -4. Execute the transaction as shown in [Submitting the upgrade on-chain](#submitting-the-upgrade-on-chain) section above. +13. Observe the output. Your node should go through the upgrade process and start using the new version. ### DevNet diff --git a/docusaurus/yarn.lock b/docusaurus/yarn.lock index 93cee387e..5c2dcf935 100644 --- a/docusaurus/yarn.lock +++ b/docusaurus/yarn.lock @@ -1810,15 +1810,10 @@ dependencies: "@types/mdx" "^2.0.0" -"@node-rs/jieba-linux-x64-gnu@1.10.0": +"@node-rs/jieba-darwin-arm64@1.10.0": version "1.10.0" - resolved "https://registry.npmjs.org/@node-rs/jieba-linux-x64-gnu/-/jieba-linux-x64-gnu-1.10.0.tgz" - integrity sha512-rS5Shs8JITxJjFIjoIZ5a9O+GO21TJgKu03g2qwFE3QaN5ZOvXtz+/AqqyfT4GmmMhCujD83AGqfOGXDmItF9w== - -"@node-rs/jieba-linux-x64-musl@1.10.0": - version "1.10.0" - resolved "https://registry.npmjs.org/@node-rs/jieba-linux-x64-musl/-/jieba-linux-x64-musl-1.10.0.tgz" - integrity sha512-BvSiF2rR8Birh2oEVHcYwq0WGC1cegkEdddWsPrrSmpKmukJE2zyjcxaOOggq2apb8fIRsjyeeUh6X3R5AgjvA== + resolved "https://registry.npmjs.org/@node-rs/jieba-darwin-arm64/-/jieba-darwin-arm64-1.10.0.tgz" + integrity sha512-IhR5r+XxFcfhVsF93zQ3uCJy8ndotRntXzoW/JCyKqOahUo/ITQRT6vTKHKMyD9xNmjl222OZonBSo2+mlI2fQ== "@node-rs/jieba@^1.6.0": version "1.10.0" @@ -4619,6 +4614,11 @@ fs.realpath@^1.0.0: resolved "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz" integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== +fsevents@~2.3.2: + version "2.3.3" + resolved "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + function-bind@^1.1.2: version "1.1.2" resolved "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz" diff --git a/go.mod b/go.mod index 8e7700b69..91de15f1d 100644 --- a/go.mod +++ b/go.mod @@ -82,7 +82,6 @@ require ( require ( cosmossdk.io/x/tx v0.13.4 github.com/jhump/protoreflect v1.16.0 - go.uber.org/mock v0.4.0 ) require ( diff --git a/go.sum b/go.sum index b35917516..62dc61bb0 100644 --- a/go.sum +++ b/go.sum @@ -1213,8 +1213,6 @@ go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0 go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= -go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/tools/installer/full-node.sh b/tools/installer/full-node.sh index 6faca639b..610df7a83 100644 --- a/tools/installer/full-node.sh +++ b/tools/installer/full-node.sh @@ -94,7 +94,7 @@ setup_env_vars() { echo "export DAEMON_HOME=\$HOME/.poktroll" >> \$HOME/.profile echo "export DAEMON_RESTART_AFTER_UPGRADE=true" >> \$HOME/.profile echo "export DAEMON_ALLOW_DOWNLOAD_BINARIES=true" >> \$HOME/.profile - echo "export UNSAFE_SKIP_BACKUP=true" >> \$HOME/.profile + echo "export UNSAFE_SKIP_BACKUP=false" >> \$HOME/.profile source \$HOME/.profile EOF print_color $GREEN "Environment variables set up successfully." @@ -138,12 +138,16 @@ setup_poktrolld() { exit 1 fi - # Use the direct download link for the latest release - LATEST_RELEASE_URL="https://github.com/pokt-network/poktroll/releases/latest/download/poktroll_linux_${ARCH}.tar.gz" + # Get the version genesis started from. We can't just use `latest` as the new binary won't sync from genesis. + # We need to start syncing from scratch using the version that was used when the network started. + POKTROLLD_VERSION=$(curl -s https://raw.githubusercontent.com/pokt-network/pocket-network-genesis/master/poktrolld/testnet-validated.init-version) + + # Use the direct download link for the correct release + RELEASE_URL="https://github.com/pokt-network/poktroll/releases/download/${POKTROLLD_VERSION}/poktroll_linux_${ARCH}.tar.gz" sudo -u "$POKTROLL_USER" bash << EOF mkdir -p \$HOME/.poktroll/cosmovisor/genesis/bin - curl -L "$LATEST_RELEASE_URL" | tar -zxvf - -C \$HOME/.poktroll/cosmovisor/genesis/bin + curl -L "$RELEASE_URL" | tar -zxvf - -C \$HOME/.poktroll/cosmovisor/genesis/bin chmod +x \$HOME/.poktroll/cosmovisor/genesis/bin/poktrolld ln -sf \$HOME/.poktroll/cosmovisor/genesis/bin/poktrolld \$HOME/bin/poktrolld source \$HOME/.profile diff --git a/tools/scripts/upgrades/authz_cancel_upgrade_tx.json b/tools/scripts/upgrades/authz_cancel_upgrade_tx.json new file mode 100644 index 000000000..014eaac60 --- /dev/null +++ b/tools/scripts/upgrades/authz_cancel_upgrade_tx.json @@ -0,0 +1,10 @@ +{ + "body": { + "messages": [ + { + "@type": "/cosmos.upgrade.v1beta1.MsgCancelUpgrade", + "authority": "pokt10d07y265gmmuvt4z0w9aw880jnsr700j8yv32t" + } + ] + } +} \ No newline at end of file diff --git a/tools/scripts/upgrades/upgrade_tx_v0.0.9.json b/tools/scripts/upgrades/upgrade_tx_v0.0.9.json new file mode 100644 index 000000000..c945229d9 --- /dev/null +++ b/tools/scripts/upgrades/upgrade_tx_v0.0.9.json @@ -0,0 +1,15 @@ +{ + "body": { + "messages": [ + { + "@type": "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade", + "authority": "pokt10d07y265gmmuvt4z0w9aw880jnsr700j8yv32t", + "plan": { + "name": "v0.0.9", + "height": "15510", + "info": "{\"binaries\":{\"linux/amd64\":\"https://github.com/pokt-network/poktroll/releases/download/v0.0.9/poktroll_linux_amd64.tar.gz?checksum=sha256:ab5b99ca0bc4bfbdd7031378d5a01c2a9f040ff310b745866a4dee7e62321c94\",\"linux/arm64\":\"https://github.com/pokt-network/poktroll/releases/download/v0.0.9/poktroll_linux_arm64.tar.gz?checksum=sha256:4b68c2ad326da055d43af1ad1a580158cec0f229d2ec6d9e18280d065260b622\",\"darwin/amd64\":\"https://github.com/pokt-network/poktroll/releases/download/v0.0.9/poktroll_darwin_amd64.tar.gz?checksum=sha256:c81aabddeb190044b979412e5a518bbf5c88305272f72a47e32e13aa765c3330\",\"darwin/arm64\":\"https://github.com/pokt-network/poktroll/releases/download/v0.0.9/poktroll_darwin_arm64.tar.gz?checksum=sha256:e683c55ac13902d107d7a726ed4a5c5affb2af1be3c67dd131ec2072a2cfbcb2\"}}" + } + } + ] + } +} \ No newline at end of file diff --git a/x/tokenomics/types/tx.pb.go b/x/tokenomics/types/tx.pb.go index e4fec264c..9f18a148c 100644 --- a/x/tokenomics/types/tx.pb.go +++ b/x/tokenomics/types/tx.pb.go @@ -125,7 +125,6 @@ type MsgUpdateParam struct { // specified in the `Params` message in `proof/params.proto.` Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` // Types that are valid to be assigned to AsType: - // // *MsgUpdateParam_AsString // *MsgUpdateParam_AsInt64 // *MsgUpdateParam_AsBytes